1 //===- LoopVectorizationLegality.cpp --------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides loop vectorization legality analysis. Original code 10 // resided in LoopVectorize.cpp for a long time. 11 // 12 // At this point, it is implemented as a utility class, not as an analysis 13 // pass. It should be easy to create an analysis pass around it if there 14 // is a need (but D45420 needs to happen first). 15 // 16 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" 17 #include "llvm/Analysis/Loads.h" 18 #include "llvm/Analysis/ValueTracking.h" 19 #include "llvm/Analysis/VectorUtils.h" 20 #include "llvm/IR/IntrinsicInst.h" 21 #include "llvm/IR/PatternMatch.h" 22 #include "llvm/Transforms/Vectorize/LoopVectorize.h" 23 24 using namespace llvm; 25 using namespace PatternMatch; 26 27 #define LV_NAME "loop-vectorize" 28 #define DEBUG_TYPE LV_NAME 29 30 extern cl::opt<bool> EnableVPlanPredication; 31 32 static cl::opt<bool> 33 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, 34 cl::desc("Enable if-conversion during vectorization.")); 35 36 static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold( 37 "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, 38 cl::desc("The maximum allowed number of runtime memory checks with a " 39 "vectorize(enable) pragma.")); 40 41 static cl::opt<unsigned> VectorizeSCEVCheckThreshold( 42 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden, 43 cl::desc("The maximum number of SCEV checks allowed.")); 44 45 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold( 46 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, 47 cl::desc("The maximum number of SCEV checks allowed with a " 48 "vectorize(enable) pragma")); 49 50 /// Maximum vectorization interleave count. 51 static const unsigned MaxInterleaveFactor = 16; 52 53 namespace llvm { 54 55 bool LoopVectorizeHints::Hint::validate(unsigned Val) { 56 switch (Kind) { 57 case HK_WIDTH: 58 return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; 59 case HK_UNROLL: 60 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; 61 case HK_FORCE: 62 return (Val <= 1); 63 case HK_ISVECTORIZED: 64 case HK_PREDICATE: 65 return (Val == 0 || Val == 1); 66 } 67 return false; 68 } 69 70 LoopVectorizeHints::LoopVectorizeHints(const Loop *L, 71 bool InterleaveOnlyWhenForced, 72 OptimizationRemarkEmitter &ORE) 73 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), 74 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), 75 Force("vectorize.enable", FK_Undefined, HK_FORCE), 76 IsVectorized("isvectorized", 0, HK_ISVECTORIZED), 77 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L), 78 ORE(ORE) { 79 // Populate values with existing loop metadata. 80 getHintsFromMetadata(); 81 82 // force-vector-interleave overrides DisableInterleaving. 83 if (VectorizerParams::isInterleaveForced()) 84 Interleave.Value = VectorizerParams::VectorizationInterleave; 85 86 if (IsVectorized.Value != 1) 87 // If the vectorization width and interleaving count are both 1 then 88 // consider the loop to have been already vectorized because there's 89 // nothing more that we can do. 90 IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1; 91 LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs() 92 << "LV: Interleaving disabled by the pass manager\n"); 93 } 94 95 void LoopVectorizeHints::setAlreadyVectorized() { 96 LLVMContext &Context = TheLoop->getHeader()->getContext(); 97 98 MDNode *IsVectorizedMD = MDNode::get( 99 Context, 100 {MDString::get(Context, "llvm.loop.isvectorized"), 101 ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))}); 102 MDNode *LoopID = TheLoop->getLoopID(); 103 MDNode *NewLoopID = 104 makePostTransformationMetadata(Context, LoopID, 105 {Twine(Prefix(), "vectorize.").str(), 106 Twine(Prefix(), "interleave.").str()}, 107 {IsVectorizedMD}); 108 TheLoop->setLoopID(NewLoopID); 109 110 // Update internal cache. 111 IsVectorized.Value = 1; 112 } 113 114 bool LoopVectorizeHints::allowVectorization( 115 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const { 116 if (getForce() == LoopVectorizeHints::FK_Disabled) { 117 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); 118 emitRemarkWithHints(); 119 return false; 120 } 121 122 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) { 123 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); 124 emitRemarkWithHints(); 125 return false; 126 } 127 128 if (getIsVectorized() == 1) { 129 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); 130 // FIXME: Add interleave.disable metadata. This will allow 131 // vectorize.disable to be used without disabling the pass and errors 132 // to differentiate between disabled vectorization and a width of 1. 133 ORE.emit([&]() { 134 return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(), 135 "AllDisabled", L->getStartLoc(), 136 L->getHeader()) 137 << "loop not vectorized: vectorization and interleaving are " 138 "explicitly disabled, or the loop has already been " 139 "vectorized"; 140 }); 141 return false; 142 } 143 144 return true; 145 } 146 147 void LoopVectorizeHints::emitRemarkWithHints() const { 148 using namespace ore; 149 150 ORE.emit([&]() { 151 if (Force.Value == LoopVectorizeHints::FK_Disabled) 152 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", 153 TheLoop->getStartLoc(), 154 TheLoop->getHeader()) 155 << "loop not vectorized: vectorization is explicitly disabled"; 156 else { 157 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", 158 TheLoop->getStartLoc(), TheLoop->getHeader()); 159 R << "loop not vectorized"; 160 if (Force.Value == LoopVectorizeHints::FK_Enabled) { 161 R << " (Force=" << NV("Force", true); 162 if (Width.Value != 0) 163 R << ", Vector Width=" << NV("VectorWidth", Width.Value); 164 if (Interleave.Value != 0) 165 R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value); 166 R << ")"; 167 } 168 return R; 169 } 170 }); 171 } 172 173 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const { 174 if (getWidth() == 1) 175 return LV_NAME; 176 if (getForce() == LoopVectorizeHints::FK_Disabled) 177 return LV_NAME; 178 if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0) 179 return LV_NAME; 180 return OptimizationRemarkAnalysis::AlwaysPrint; 181 } 182 183 void LoopVectorizeHints::getHintsFromMetadata() { 184 MDNode *LoopID = TheLoop->getLoopID(); 185 if (!LoopID) 186 return; 187 188 // First operand should refer to the loop id itself. 189 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 190 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 191 192 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { 193 const MDString *S = nullptr; 194 SmallVector<Metadata *, 4> Args; 195 196 // The expected hint is either a MDString or a MDNode with the first 197 // operand a MDString. 198 if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) { 199 if (!MD || MD->getNumOperands() == 0) 200 continue; 201 S = dyn_cast<MDString>(MD->getOperand(0)); 202 for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i) 203 Args.push_back(MD->getOperand(i)); 204 } else { 205 S = dyn_cast<MDString>(LoopID->getOperand(i)); 206 assert(Args.size() == 0 && "too many arguments for MDString"); 207 } 208 209 if (!S) 210 continue; 211 212 // Check if the hint starts with the loop metadata prefix. 213 StringRef Name = S->getString(); 214 if (Args.size() == 1) 215 setHint(Name, Args[0]); 216 } 217 } 218 219 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { 220 if (!Name.startswith(Prefix())) 221 return; 222 Name = Name.substr(Prefix().size(), StringRef::npos); 223 224 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg); 225 if (!C) 226 return; 227 unsigned Val = C->getZExtValue(); 228 229 Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate}; 230 for (auto H : Hints) { 231 if (Name == H->Name) { 232 if (H->validate(Val)) 233 H->Value = Val; 234 else 235 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); 236 break; 237 } 238 } 239 } 240 241 bool LoopVectorizationRequirements::doesNotMeet( 242 Function *F, Loop *L, const LoopVectorizeHints &Hints) { 243 const char *PassName = Hints.vectorizeAnalysisPassName(); 244 bool Failed = false; 245 if (UnsafeAlgebraInst && !Hints.allowReordering()) { 246 ORE.emit([&]() { 247 return OptimizationRemarkAnalysisFPCommute( 248 PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(), 249 UnsafeAlgebraInst->getParent()) 250 << "loop not vectorized: cannot prove it is safe to reorder " 251 "floating-point operations"; 252 }); 253 Failed = true; 254 } 255 256 // Test if runtime memcheck thresholds are exceeded. 257 bool PragmaThresholdReached = 258 NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; 259 bool ThresholdReached = 260 NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; 261 if ((ThresholdReached && !Hints.allowReordering()) || 262 PragmaThresholdReached) { 263 ORE.emit([&]() { 264 return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps", 265 L->getStartLoc(), 266 L->getHeader()) 267 << "loop not vectorized: cannot prove it is safe to reorder " 268 "memory operations"; 269 }); 270 LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); 271 Failed = true; 272 } 273 274 return Failed; 275 } 276 277 // Return true if the inner loop \p Lp is uniform with regard to the outer loop 278 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes 279 // executing the inner loop will execute the same iterations). This check is 280 // very constrained for now but it will be relaxed in the future. \p Lp is 281 // considered uniform if it meets all the following conditions: 282 // 1) it has a canonical IV (starting from 0 and with stride 1), 283 // 2) its latch terminator is a conditional branch and, 284 // 3) its latch condition is a compare instruction whose operands are the 285 // canonical IV and an OuterLp invariant. 286 // This check doesn't take into account the uniformity of other conditions not 287 // related to the loop latch because they don't affect the loop uniformity. 288 // 289 // NOTE: We decided to keep all these checks and its associated documentation 290 // together so that we can easily have a picture of the current supported loop 291 // nests. However, some of the current checks don't depend on \p OuterLp and 292 // would be redundantly executed for each \p Lp if we invoked this function for 293 // different candidate outer loops. This is not the case for now because we 294 // don't currently have the infrastructure to evaluate multiple candidate outer 295 // loops and \p OuterLp will be a fixed parameter while we only support explicit 296 // outer loop vectorization. It's also very likely that these checks go away 297 // before introducing the aforementioned infrastructure. However, if this is not 298 // the case, we should move the \p OuterLp independent checks to a separate 299 // function that is only executed once for each \p Lp. 300 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) { 301 assert(Lp->getLoopLatch() && "Expected loop with a single latch."); 302 303 // If Lp is the outer loop, it's uniform by definition. 304 if (Lp == OuterLp) 305 return true; 306 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp."); 307 308 // 1. 309 PHINode *IV = Lp->getCanonicalInductionVariable(); 310 if (!IV) { 311 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n"); 312 return false; 313 } 314 315 // 2. 316 BasicBlock *Latch = Lp->getLoopLatch(); 317 auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator()); 318 if (!LatchBr || LatchBr->isUnconditional()) { 319 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n"); 320 return false; 321 } 322 323 // 3. 324 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition()); 325 if (!LatchCmp) { 326 LLVM_DEBUG( 327 dbgs() << "LV: Loop latch condition is not a compare instruction.\n"); 328 return false; 329 } 330 331 Value *CondOp0 = LatchCmp->getOperand(0); 332 Value *CondOp1 = LatchCmp->getOperand(1); 333 Value *IVUpdate = IV->getIncomingValueForBlock(Latch); 334 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) && 335 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) { 336 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n"); 337 return false; 338 } 339 340 return true; 341 } 342 343 // Return true if \p Lp and all its nested loops are uniform with regard to \p 344 // OuterLp. 345 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) { 346 if (!isUniformLoop(Lp, OuterLp)) 347 return false; 348 349 // Check if nested loops are uniform. 350 for (Loop *SubLp : *Lp) 351 if (!isUniformLoopNest(SubLp, OuterLp)) 352 return false; 353 354 return true; 355 } 356 357 /// Check whether it is safe to if-convert this phi node. 358 /// 359 /// Phi nodes with constant expressions that can trap are not safe to if 360 /// convert. 361 static bool canIfConvertPHINodes(BasicBlock *BB) { 362 for (PHINode &Phi : BB->phis()) { 363 for (Value *V : Phi.incoming_values()) 364 if (auto *C = dyn_cast<Constant>(V)) 365 if (C->canTrap()) 366 return false; 367 } 368 return true; 369 } 370 371 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { 372 if (Ty->isPointerTy()) 373 return DL.getIntPtrType(Ty); 374 375 // It is possible that char's or short's overflow when we ask for the loop's 376 // trip count, work around this by changing the type size. 377 if (Ty->getScalarSizeInBits() < 32) 378 return Type::getInt32Ty(Ty->getContext()); 379 380 return Ty; 381 } 382 383 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { 384 Ty0 = convertPointerToIntegerType(DL, Ty0); 385 Ty1 = convertPointerToIntegerType(DL, Ty1); 386 if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) 387 return Ty0; 388 return Ty1; 389 } 390 391 /// Check that the instruction has outside loop users and is not an 392 /// identified reduction variable. 393 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, 394 SmallPtrSetImpl<Value *> &AllowedExit) { 395 // Reductions, Inductions and non-header phis are allowed to have exit users. All 396 // other instructions must not have external users. 397 if (!AllowedExit.count(Inst)) 398 // Check that all of the users of the loop are inside the BB. 399 for (User *U : Inst->users()) { 400 Instruction *UI = cast<Instruction>(U); 401 // This user may be a reduction exit value. 402 if (!TheLoop->contains(UI)) { 403 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n'); 404 return true; 405 } 406 } 407 return false; 408 } 409 410 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { 411 const ValueToValueMap &Strides = 412 getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); 413 414 bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize(); 415 int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false); 416 if (Stride == 1 || Stride == -1) 417 return Stride; 418 return 0; 419 } 420 421 bool LoopVectorizationLegality::isUniform(Value *V) { 422 return LAI->isUniform(V); 423 } 424 425 bool LoopVectorizationLegality::canVectorizeOuterLoop() { 426 assert(!TheLoop->empty() && "We are not vectorizing an outer loop."); 427 // Store the result and return it at the end instead of exiting early, in case 428 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 429 bool Result = true; 430 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 431 432 for (BasicBlock *BB : TheLoop->blocks()) { 433 // Check whether the BB terminator is a BranchInst. Any other terminator is 434 // not supported yet. 435 auto *Br = dyn_cast<BranchInst>(BB->getTerminator()); 436 if (!Br) { 437 reportVectorizationFailure("Unsupported basic block terminator", 438 "loop control flow is not understood by vectorizer", 439 "CFGNotUnderstood", ORE, TheLoop); 440 if (DoExtraAnalysis) 441 Result = false; 442 else 443 return false; 444 } 445 446 // Check whether the BranchInst is a supported one. Only unconditional 447 // branches, conditional branches with an outer loop invariant condition or 448 // backedges are supported. 449 // FIXME: We skip these checks when VPlan predication is enabled as we 450 // want to allow divergent branches. This whole check will be removed 451 // once VPlan predication is on by default. 452 if (!EnableVPlanPredication && Br && Br->isConditional() && 453 !TheLoop->isLoopInvariant(Br->getCondition()) && 454 !LI->isLoopHeader(Br->getSuccessor(0)) && 455 !LI->isLoopHeader(Br->getSuccessor(1))) { 456 reportVectorizationFailure("Unsupported conditional branch", 457 "loop control flow is not understood by vectorizer", 458 "CFGNotUnderstood", ORE, TheLoop); 459 if (DoExtraAnalysis) 460 Result = false; 461 else 462 return false; 463 } 464 } 465 466 // Check whether inner loops are uniform. At this point, we only support 467 // simple outer loops scenarios with uniform nested loops. 468 if (!isUniformLoopNest(TheLoop /*loop nest*/, 469 TheLoop /*context outer loop*/)) { 470 reportVectorizationFailure("Outer loop contains divergent loops", 471 "loop control flow is not understood by vectorizer", 472 "CFGNotUnderstood", ORE, TheLoop); 473 if (DoExtraAnalysis) 474 Result = false; 475 else 476 return false; 477 } 478 479 // Check whether we are able to set up outer loop induction. 480 if (!setupOuterLoopInductions()) { 481 reportVectorizationFailure("Unsupported outer loop Phi(s)", 482 "Unsupported outer loop Phi(s)", 483 "UnsupportedPhi", ORE, TheLoop); 484 if (DoExtraAnalysis) 485 Result = false; 486 else 487 return false; 488 } 489 490 return Result; 491 } 492 493 void LoopVectorizationLegality::addInductionPhi( 494 PHINode *Phi, const InductionDescriptor &ID, 495 SmallPtrSetImpl<Value *> &AllowedExit) { 496 Inductions[Phi] = ID; 497 498 // In case this induction also comes with casts that we know we can ignore 499 // in the vectorized loop body, record them here. All casts could be recorded 500 // here for ignoring, but suffices to record only the first (as it is the 501 // only one that may bw used outside the cast sequence). 502 const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts(); 503 if (!Casts.empty()) 504 InductionCastsToIgnore.insert(*Casts.begin()); 505 506 Type *PhiTy = Phi->getType(); 507 const DataLayout &DL = Phi->getModule()->getDataLayout(); 508 509 // Get the widest type. 510 if (!PhiTy->isFloatingPointTy()) { 511 if (!WidestIndTy) 512 WidestIndTy = convertPointerToIntegerType(DL, PhiTy); 513 else 514 WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); 515 } 516 517 // Int inductions are special because we only allow one IV. 518 if (ID.getKind() == InductionDescriptor::IK_IntInduction && 519 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() && 520 isa<Constant>(ID.getStartValue()) && 521 cast<Constant>(ID.getStartValue())->isNullValue()) { 522 523 // Use the phi node with the widest type as induction. Use the last 524 // one if there are multiple (no good reason for doing this other 525 // than it is expedient). We've checked that it begins at zero and 526 // steps by one, so this is a canonical induction variable. 527 if (!PrimaryInduction || PhiTy == WidestIndTy) 528 PrimaryInduction = Phi; 529 } 530 531 // Both the PHI node itself, and the "post-increment" value feeding 532 // back into the PHI node may have external users. 533 // We can allow those uses, except if the SCEVs we have for them rely 534 // on predicates that only hold within the loop, since allowing the exit 535 // currently means re-using this SCEV outside the loop (see PR33706 for more 536 // details). 537 if (PSE.getUnionPredicate().isAlwaysTrue()) { 538 AllowedExit.insert(Phi); 539 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); 540 } 541 542 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n"); 543 } 544 545 bool LoopVectorizationLegality::setupOuterLoopInductions() { 546 BasicBlock *Header = TheLoop->getHeader(); 547 548 // Returns true if a given Phi is a supported induction. 549 auto isSupportedPhi = [&](PHINode &Phi) -> bool { 550 InductionDescriptor ID; 551 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) && 552 ID.getKind() == InductionDescriptor::IK_IntInduction) { 553 addInductionPhi(&Phi, ID, AllowedExit); 554 return true; 555 } else { 556 // Bail out for any Phi in the outer loop header that is not a supported 557 // induction. 558 LLVM_DEBUG( 559 dbgs() 560 << "LV: Found unsupported PHI for outer loop vectorization.\n"); 561 return false; 562 } 563 }; 564 565 if (llvm::all_of(Header->phis(), isSupportedPhi)) 566 return true; 567 else 568 return false; 569 } 570 571 /// Checks if a function is scalarizable according to the TLI, in 572 /// the sense that it should be vectorized and then expanded in 573 /// multiple scalarcalls. This is represented in the 574 /// TLI via mappings that do not specify a vector name, as in the 575 /// following example: 576 /// 577 /// const VecDesc VecIntrinsics[] = { 578 /// {"llvm.phx.abs.i32", "", 4} 579 /// }; 580 static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) { 581 const StringRef ScalarName = CI.getCalledFunction()->getName(); 582 bool Scalarize = TLI.isFunctionVectorizable(ScalarName); 583 // Check that all known VFs are not associated to a vector 584 // function, i.e. the vector name is emty. 585 if (Scalarize) 586 for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName); 587 VF <= WidestVF; VF *= 2) { 588 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF); 589 } 590 return Scalarize; 591 } 592 593 bool LoopVectorizationLegality::canVectorizeInstrs() { 594 BasicBlock *Header = TheLoop->getHeader(); 595 596 // Look for the attribute signaling the absence of NaNs. 597 Function &F = *Header->getParent(); 598 HasFunNoNaNAttr = 599 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 600 601 // For each block in the loop. 602 for (BasicBlock *BB : TheLoop->blocks()) { 603 // Scan the instructions in the block and look for hazards. 604 for (Instruction &I : *BB) { 605 if (auto *Phi = dyn_cast<PHINode>(&I)) { 606 Type *PhiTy = Phi->getType(); 607 // Check that this PHI type is allowed. 608 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && 609 !PhiTy->isPointerTy()) { 610 reportVectorizationFailure("Found a non-int non-pointer PHI", 611 "loop control flow is not understood by vectorizer", 612 "CFGNotUnderstood", ORE, TheLoop); 613 return false; 614 } 615 616 // If this PHINode is not in the header block, then we know that we 617 // can convert it to select during if-conversion. No need to check if 618 // the PHIs in this block are induction or reduction variables. 619 if (BB != Header) { 620 // Non-header phi nodes that have outside uses can be vectorized. Add 621 // them to the list of allowed exits. 622 // Unsafe cyclic dependencies with header phis are identified during 623 // legalization for reduction, induction and first order 624 // recurrences. 625 AllowedExit.insert(&I); 626 continue; 627 } 628 629 // We only allow if-converted PHIs with exactly two incoming values. 630 if (Phi->getNumIncomingValues() != 2) { 631 reportVectorizationFailure("Found an invalid PHI", 632 "loop control flow is not understood by vectorizer", 633 "CFGNotUnderstood", ORE, TheLoop, Phi); 634 return false; 635 } 636 637 RecurrenceDescriptor RedDes; 638 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, 639 DT)) { 640 if (RedDes.hasUnsafeAlgebra()) 641 Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst()); 642 AllowedExit.insert(RedDes.getLoopExitInstr()); 643 Reductions[Phi] = RedDes; 644 continue; 645 } 646 647 // TODO: Instead of recording the AllowedExit, it would be good to record the 648 // complementary set: NotAllowedExit. These include (but may not be 649 // limited to): 650 // 1. Reduction phis as they represent the one-before-last value, which 651 // is not available when vectorized 652 // 2. Induction phis and increment when SCEV predicates cannot be used 653 // outside the loop - see addInductionPhi 654 // 3. Non-Phis with outside uses when SCEV predicates cannot be used 655 // outside the loop - see call to hasOutsideLoopUser in the non-phi 656 // handling below 657 // 4. FirstOrderRecurrence phis that can possibly be handled by 658 // extraction. 659 // By recording these, we can then reason about ways to vectorize each 660 // of these NotAllowedExit. 661 InductionDescriptor ID; 662 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) { 663 addInductionPhi(Phi, ID, AllowedExit); 664 if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr) 665 Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst()); 666 continue; 667 } 668 669 if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, 670 SinkAfter, DT)) { 671 AllowedExit.insert(Phi); 672 FirstOrderRecurrences.insert(Phi); 673 continue; 674 } 675 676 // As a last resort, coerce the PHI to a AddRec expression 677 // and re-try classifying it a an induction PHI. 678 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) { 679 addInductionPhi(Phi, ID, AllowedExit); 680 continue; 681 } 682 683 reportVectorizationFailure("Found an unidentified PHI", 684 "value that could not be identified as " 685 "reduction is used outside the loop", 686 "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi); 687 return false; 688 } // end of PHI handling 689 690 // We handle calls that: 691 // * Are debug info intrinsics. 692 // * Have a mapping to an IR intrinsic. 693 // * Have a vector version available. 694 auto *CI = dyn_cast<CallInst>(&I); 695 696 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && 697 !isa<DbgInfoIntrinsic>(CI) && 698 !(CI->getCalledFunction() && TLI && 699 (!VFDatabase::getMappings(*CI).empty() || 700 isTLIScalarize(*TLI, *CI)))) { 701 // If the call is a recognized math libary call, it is likely that 702 // we can vectorize it given loosened floating-point constraints. 703 LibFunc Func; 704 bool IsMathLibCall = 705 TLI && CI->getCalledFunction() && 706 CI->getType()->isFloatingPointTy() && 707 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && 708 TLI->hasOptimizedCodeGen(Func); 709 710 if (IsMathLibCall) { 711 // TODO: Ideally, we should not use clang-specific language here, 712 // but it's hard to provide meaningful yet generic advice. 713 // Also, should this be guarded by allowExtraAnalysis() and/or be part 714 // of the returned info from isFunctionVectorizable()? 715 reportVectorizationFailure( 716 "Found a non-intrinsic callsite", 717 "library call cannot be vectorized. " 718 "Try compiling with -fno-math-errno, -ffast-math, " 719 "or similar flags", 720 "CantVectorizeLibcall", ORE, TheLoop, CI); 721 } else { 722 reportVectorizationFailure("Found a non-intrinsic callsite", 723 "call instruction cannot be vectorized", 724 "CantVectorizeLibcall", ORE, TheLoop, CI); 725 } 726 return false; 727 } 728 729 // Some intrinsics have scalar arguments and should be same in order for 730 // them to be vectorized (i.e. loop invariant). 731 if (CI) { 732 auto *SE = PSE.getSE(); 733 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI); 734 for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) 735 if (hasVectorInstrinsicScalarOpd(IntrinID, i)) { 736 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) { 737 reportVectorizationFailure("Found unvectorizable intrinsic", 738 "intrinsic instruction cannot be vectorized", 739 "CantVectorizeIntrinsic", ORE, TheLoop, CI); 740 return false; 741 } 742 } 743 } 744 745 // Check that the instruction return type is vectorizable. 746 // Also, we can't vectorize extractelement instructions. 747 if ((!VectorType::isValidElementType(I.getType()) && 748 !I.getType()->isVoidTy()) || 749 isa<ExtractElementInst>(I)) { 750 reportVectorizationFailure("Found unvectorizable type", 751 "instruction return type cannot be vectorized", 752 "CantVectorizeInstructionReturnType", ORE, TheLoop, &I); 753 return false; 754 } 755 756 // Check that the stored type is vectorizable. 757 if (auto *ST = dyn_cast<StoreInst>(&I)) { 758 Type *T = ST->getValueOperand()->getType(); 759 if (!VectorType::isValidElementType(T)) { 760 reportVectorizationFailure("Store instruction cannot be vectorized", 761 "store instruction cannot be vectorized", 762 "CantVectorizeStore", ORE, TheLoop, ST); 763 return false; 764 } 765 766 // For nontemporal stores, check that a nontemporal vector version is 767 // supported on the target. 768 if (ST->getMetadata(LLVMContext::MD_nontemporal)) { 769 // Arbitrarily try a vector of 2 elements. 770 Type *VecTy = VectorType::get(T, /*NumElements=*/2); 771 assert(VecTy && "did not find vectorized version of stored type"); 772 const MaybeAlign Alignment = getLoadStoreAlignment(ST); 773 assert(Alignment && "Alignment should be set"); 774 if (!TTI->isLegalNTStore(VecTy, *Alignment)) { 775 reportVectorizationFailure( 776 "nontemporal store instruction cannot be vectorized", 777 "nontemporal store instruction cannot be vectorized", 778 "CantVectorizeNontemporalStore", ORE, TheLoop, ST); 779 return false; 780 } 781 } 782 783 } else if (auto *LD = dyn_cast<LoadInst>(&I)) { 784 if (LD->getMetadata(LLVMContext::MD_nontemporal)) { 785 // For nontemporal loads, check that a nontemporal vector version is 786 // supported on the target (arbitrarily try a vector of 2 elements). 787 Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2); 788 assert(VecTy && "did not find vectorized version of load type"); 789 const MaybeAlign Alignment = getLoadStoreAlignment(LD); 790 assert(Alignment && "Alignment should be set"); 791 if (!TTI->isLegalNTLoad(VecTy, *Alignment)) { 792 reportVectorizationFailure( 793 "nontemporal load instruction cannot be vectorized", 794 "nontemporal load instruction cannot be vectorized", 795 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD); 796 return false; 797 } 798 } 799 800 // FP instructions can allow unsafe algebra, thus vectorizable by 801 // non-IEEE-754 compliant SIMD units. 802 // This applies to floating-point math operations and calls, not memory 803 // operations, shuffles, or casts, as they don't change precision or 804 // semantics. 805 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) && 806 !I.isFast()) { 807 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); 808 Hints->setPotentiallyUnsafe(); 809 } 810 811 // Reduction instructions are allowed to have exit users. 812 // All other instructions must not have external users. 813 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) { 814 // We can safely vectorize loops where instructions within the loop are 815 // used outside the loop only if the SCEV predicates within the loop is 816 // same as outside the loop. Allowing the exit means reusing the SCEV 817 // outside the loop. 818 if (PSE.getUnionPredicate().isAlwaysTrue()) { 819 AllowedExit.insert(&I); 820 continue; 821 } 822 reportVectorizationFailure("Value cannot be used outside the loop", 823 "value cannot be used outside the loop", 824 "ValueUsedOutsideLoop", ORE, TheLoop, &I); 825 return false; 826 } 827 } // next instr. 828 } 829 830 if (!PrimaryInduction) { 831 if (Inductions.empty()) { 832 reportVectorizationFailure("Did not find one integer induction var", 833 "loop induction variable could not be identified", 834 "NoInductionVariable", ORE, TheLoop); 835 return false; 836 } else if (!WidestIndTy) { 837 reportVectorizationFailure("Did not find one integer induction var", 838 "integer loop induction variable could not be identified", 839 "NoIntegerInductionVariable", ORE, TheLoop); 840 return false; 841 } else { 842 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); 843 } 844 } 845 846 // For first order recurrences, we use the previous value (incoming value from 847 // the latch) to check if it dominates all users of the recurrence. Bail out 848 // if we have to sink such an instruction for another recurrence, as the 849 // dominance requirement may not hold after sinking. 850 BasicBlock *LoopLatch = TheLoop->getLoopLatch(); 851 if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) { 852 Instruction *V = 853 cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch)); 854 return SinkAfter.find(V) != SinkAfter.end(); 855 })) 856 return false; 857 858 // Now we know the widest induction type, check if our found induction 859 // is the same size. If it's not, unset it here and InnerLoopVectorizer 860 // will create another. 861 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType()) 862 PrimaryInduction = nullptr; 863 864 return true; 865 } 866 867 bool LoopVectorizationLegality::canVectorizeMemory() { 868 LAI = &(*GetLAA)(*TheLoop); 869 const OptimizationRemarkAnalysis *LAR = LAI->getReport(); 870 if (LAR) { 871 ORE->emit([&]() { 872 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(), 873 "loop not vectorized: ", *LAR); 874 }); 875 } 876 if (!LAI->canVectorizeMemory()) 877 return false; 878 879 if (LAI->hasDependenceInvolvingLoopInvariantAddress()) { 880 reportVectorizationFailure("Stores to a uniform address", 881 "write to a loop invariant address could not be vectorized", 882 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); 883 return false; 884 } 885 Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); 886 PSE.addPredicate(LAI->getPSE().getUnionPredicate()); 887 888 return true; 889 } 890 891 bool LoopVectorizationLegality::isInductionPhi(const Value *V) { 892 Value *In0 = const_cast<Value *>(V); 893 PHINode *PN = dyn_cast_or_null<PHINode>(In0); 894 if (!PN) 895 return false; 896 897 return Inductions.count(PN); 898 } 899 900 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) { 901 auto *Inst = dyn_cast<Instruction>(V); 902 return (Inst && InductionCastsToIgnore.count(Inst)); 903 } 904 905 bool LoopVectorizationLegality::isInductionVariable(const Value *V) { 906 return isInductionPhi(V) || isCastedInductionVariable(V); 907 } 908 909 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) { 910 return FirstOrderRecurrences.count(Phi); 911 } 912 913 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { 914 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); 915 } 916 917 bool LoopVectorizationLegality::blockCanBePredicated( 918 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) { 919 const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); 920 921 for (Instruction &I : *BB) { 922 // Check that we don't have a constant expression that can trap as operand. 923 for (Value *Operand : I.operands()) { 924 if (auto *C = dyn_cast<Constant>(Operand)) 925 if (C->canTrap()) 926 return false; 927 } 928 929 // We can predicate blocks with calls to assume, as long as we drop them in 930 // case we flatten the CFG via predication. 931 if (match(&I, m_Intrinsic<Intrinsic::assume>())) { 932 ConditionalAssumes.insert(&I); 933 continue; 934 } 935 936 // We might be able to hoist the load. 937 if (I.mayReadFromMemory()) { 938 auto *LI = dyn_cast<LoadInst>(&I); 939 if (!LI) 940 return false; 941 if (!SafePtrs.count(LI->getPointerOperand())) { 942 // !llvm.mem.parallel_loop_access implies if-conversion safety. 943 // Otherwise, record that the load needs (real or emulated) masking 944 // and let the cost model decide. 945 if (!IsAnnotatedParallel || PreserveGuards) 946 MaskedOp.insert(LI); 947 continue; 948 } 949 } 950 951 if (I.mayWriteToMemory()) { 952 auto *SI = dyn_cast<StoreInst>(&I); 953 if (!SI) 954 return false; 955 // Predicated store requires some form of masking: 956 // 1) masked store HW instruction, 957 // 2) emulation via load-blend-store (only if safe and legal to do so, 958 // be aware on the race conditions), or 959 // 3) element-by-element predicate check and scalar store. 960 MaskedOp.insert(SI); 961 continue; 962 } 963 if (I.mayThrow()) 964 return false; 965 } 966 967 return true; 968 } 969 970 bool LoopVectorizationLegality::canVectorizeWithIfConvert() { 971 if (!EnableIfConversion) { 972 reportVectorizationFailure("If-conversion is disabled", 973 "if-conversion is disabled", 974 "IfConversionDisabled", 975 ORE, TheLoop); 976 return false; 977 } 978 979 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); 980 981 // A list of pointers which are known to be dereferenceable within scope of 982 // the loop body for each iteration of the loop which executes. That is, 983 // the memory pointed to can be dereferenced (with the access size implied by 984 // the value's type) unconditionally within the loop header without 985 // introducing a new fault. 986 SmallPtrSet<Value *, 8> SafePointes; 987 988 // Collect safe addresses. 989 for (BasicBlock *BB : TheLoop->blocks()) { 990 if (!blockNeedsPredication(BB)) { 991 for (Instruction &I : *BB) 992 if (auto *Ptr = getLoadStorePointerOperand(&I)) 993 SafePointes.insert(Ptr); 994 continue; 995 } 996 997 // For a block which requires predication, a address may be safe to access 998 // in the loop w/o predication if we can prove dereferenceability facts 999 // sufficient to ensure it'll never fault within the loop. For the moment, 1000 // we restrict this to loads; stores are more complicated due to 1001 // concurrency restrictions. 1002 ScalarEvolution &SE = *PSE.getSE(); 1003 for (Instruction &I : *BB) { 1004 LoadInst *LI = dyn_cast<LoadInst>(&I); 1005 if (LI && !mustSuppressSpeculation(*LI) && 1006 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT)) 1007 SafePointes.insert(LI->getPointerOperand()); 1008 } 1009 } 1010 1011 // Collect the blocks that need predication. 1012 BasicBlock *Header = TheLoop->getHeader(); 1013 for (BasicBlock *BB : TheLoop->blocks()) { 1014 // We don't support switch statements inside loops. 1015 if (!isa<BranchInst>(BB->getTerminator())) { 1016 reportVectorizationFailure("Loop contains a switch statement", 1017 "loop contains a switch statement", 1018 "LoopContainsSwitch", ORE, TheLoop, 1019 BB->getTerminator()); 1020 return false; 1021 } 1022 1023 // We must be able to predicate all blocks that need to be predicated. 1024 if (blockNeedsPredication(BB)) { 1025 if (!blockCanBePredicated(BB, SafePointes)) { 1026 reportVectorizationFailure( 1027 "Control flow cannot be substituted for a select", 1028 "control flow cannot be substituted for a select", 1029 "NoCFGForSelect", ORE, TheLoop, 1030 BB->getTerminator()); 1031 return false; 1032 } 1033 } else if (BB != Header && !canIfConvertPHINodes(BB)) { 1034 reportVectorizationFailure( 1035 "Control flow cannot be substituted for a select", 1036 "control flow cannot be substituted for a select", 1037 "NoCFGForSelect", ORE, TheLoop, 1038 BB->getTerminator()); 1039 return false; 1040 } 1041 } 1042 1043 // We can if-convert this loop. 1044 return true; 1045 } 1046 1047 // Helper function to canVectorizeLoopNestCFG. 1048 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, 1049 bool UseVPlanNativePath) { 1050 assert((UseVPlanNativePath || Lp->empty()) && 1051 "VPlan-native path is not enabled."); 1052 1053 // TODO: ORE should be improved to show more accurate information when an 1054 // outer loop can't be vectorized because a nested loop is not understood or 1055 // legal. Something like: "outer_loop_location: loop not vectorized: 1056 // (inner_loop_location) loop control flow is not understood by vectorizer". 1057 1058 // Store the result and return it at the end instead of exiting early, in case 1059 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1060 bool Result = true; 1061 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1062 1063 // We must have a loop in canonical form. Loops with indirectbr in them cannot 1064 // be canonicalized. 1065 if (!Lp->getLoopPreheader()) { 1066 reportVectorizationFailure("Loop doesn't have a legal pre-header", 1067 "loop control flow is not understood by vectorizer", 1068 "CFGNotUnderstood", ORE, TheLoop); 1069 if (DoExtraAnalysis) 1070 Result = false; 1071 else 1072 return false; 1073 } 1074 1075 // We must have a single backedge. 1076 if (Lp->getNumBackEdges() != 1) { 1077 reportVectorizationFailure("The loop must have a single backedge", 1078 "loop control flow is not understood by vectorizer", 1079 "CFGNotUnderstood", ORE, TheLoop); 1080 if (DoExtraAnalysis) 1081 Result = false; 1082 else 1083 return false; 1084 } 1085 1086 // We must have a single exiting block. 1087 if (!Lp->getExitingBlock()) { 1088 reportVectorizationFailure("The loop must have an exiting block", 1089 "loop control flow is not understood by vectorizer", 1090 "CFGNotUnderstood", ORE, TheLoop); 1091 if (DoExtraAnalysis) 1092 Result = false; 1093 else 1094 return false; 1095 } 1096 1097 // We only handle bottom-tested loops, i.e. loop in which the condition is 1098 // checked at the end of each iteration. With that we can assume that all 1099 // instructions in the loop are executed the same number of times. 1100 if (Lp->getExitingBlock() != Lp->getLoopLatch()) { 1101 reportVectorizationFailure("The exiting block is not the loop latch", 1102 "loop control flow is not understood by vectorizer", 1103 "CFGNotUnderstood", ORE, TheLoop); 1104 if (DoExtraAnalysis) 1105 Result = false; 1106 else 1107 return false; 1108 } 1109 1110 return Result; 1111 } 1112 1113 bool LoopVectorizationLegality::canVectorizeLoopNestCFG( 1114 Loop *Lp, bool UseVPlanNativePath) { 1115 // Store the result and return it at the end instead of exiting early, in case 1116 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1117 bool Result = true; 1118 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1119 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) { 1120 if (DoExtraAnalysis) 1121 Result = false; 1122 else 1123 return false; 1124 } 1125 1126 // Recursively check whether the loop control flow of nested loops is 1127 // understood. 1128 for (Loop *SubLp : *Lp) 1129 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) { 1130 if (DoExtraAnalysis) 1131 Result = false; 1132 else 1133 return false; 1134 } 1135 1136 return Result; 1137 } 1138 1139 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { 1140 // Store the result and return it at the end instead of exiting early, in case 1141 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1142 bool Result = true; 1143 1144 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1145 // Check whether the loop-related control flow in the loop nest is expected by 1146 // vectorizer. 1147 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) { 1148 if (DoExtraAnalysis) 1149 Result = false; 1150 else 1151 return false; 1152 } 1153 1154 // We need to have a loop header. 1155 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() 1156 << '\n'); 1157 1158 // Specific checks for outer loops. We skip the remaining legal checks at this 1159 // point because they don't support outer loops. 1160 if (!TheLoop->empty()) { 1161 assert(UseVPlanNativePath && "VPlan-native path is not enabled."); 1162 1163 if (!canVectorizeOuterLoop()) { 1164 reportVectorizationFailure("Unsupported outer loop", 1165 "unsupported outer loop", 1166 "UnsupportedOuterLoop", 1167 ORE, TheLoop); 1168 // TODO: Implement DoExtraAnalysis when subsequent legal checks support 1169 // outer loops. 1170 return false; 1171 } 1172 1173 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n"); 1174 return Result; 1175 } 1176 1177 assert(TheLoop->empty() && "Inner loop expected."); 1178 // Check if we can if-convert non-single-bb loops. 1179 unsigned NumBlocks = TheLoop->getNumBlocks(); 1180 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { 1181 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); 1182 if (DoExtraAnalysis) 1183 Result = false; 1184 else 1185 return false; 1186 } 1187 1188 // Check if we can vectorize the instructions and CFG in this loop. 1189 if (!canVectorizeInstrs()) { 1190 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); 1191 if (DoExtraAnalysis) 1192 Result = false; 1193 else 1194 return false; 1195 } 1196 1197 // Go over each instruction and look at memory deps. 1198 if (!canVectorizeMemory()) { 1199 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); 1200 if (DoExtraAnalysis) 1201 Result = false; 1202 else 1203 return false; 1204 } 1205 1206 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop" 1207 << (LAI->getRuntimePointerChecking()->Need 1208 ? " (with a runtime bound check)" 1209 : "") 1210 << "!\n"); 1211 1212 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold; 1213 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) 1214 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; 1215 1216 if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { 1217 reportVectorizationFailure("Too many SCEV checks needed", 1218 "Too many SCEV assumptions need to be made and checked at runtime", 1219 "TooManySCEVRunTimeChecks", ORE, TheLoop); 1220 if (DoExtraAnalysis) 1221 Result = false; 1222 else 1223 return false; 1224 } 1225 1226 // Okay! We've done all the tests. If any have failed, return false. Otherwise 1227 // we can vectorize, and at this point we don't have any other mem analysis 1228 // which may limit our maximum vectorization factor, so just return true with 1229 // no restrictions. 1230 return Result; 1231 } 1232 1233 bool LoopVectorizationLegality::prepareToFoldTailByMasking() { 1234 1235 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); 1236 1237 SmallPtrSet<const Value *, 8> ReductionLiveOuts; 1238 1239 for (auto &Reduction : getReductionVars()) 1240 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr()); 1241 1242 // TODO: handle non-reduction outside users when tail is folded by masking. 1243 for (auto *AE : AllowedExit) { 1244 // Check that all users of allowed exit values are inside the loop or 1245 // are the live-out of a reduction. 1246 if (ReductionLiveOuts.count(AE)) 1247 continue; 1248 for (User *U : AE->users()) { 1249 Instruction *UI = cast<Instruction>(U); 1250 if (TheLoop->contains(UI)) 1251 continue; 1252 reportVectorizationFailure( 1253 "Cannot fold tail by masking, loop has an outside user for", 1254 "Cannot fold tail by masking in the presence of live outs.", 1255 "LiveOutFoldingTailByMasking", ORE, TheLoop, UI); 1256 return false; 1257 } 1258 } 1259 1260 // The list of pointers that we can safely read and write to remains empty. 1261 SmallPtrSet<Value *, 8> SafePointers; 1262 1263 // Check and mark all blocks for predication, including those that ordinarily 1264 // do not need predication such as the header block. 1265 for (BasicBlock *BB : TheLoop->blocks()) { 1266 if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) { 1267 reportVectorizationFailure( 1268 "Cannot fold tail by masking as required", 1269 "control flow cannot be substituted for a select", 1270 "NoCFGForSelect", ORE, TheLoop, 1271 BB->getTerminator()); 1272 return false; 1273 } 1274 } 1275 1276 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n"); 1277 return true; 1278 } 1279 1280 } // namespace llvm 1281