1 //===-- LoopUtils.cpp - Loop Utility functions -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines common loop utility functions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Utils/LoopUtils.h" 15 #include "llvm/Analysis/AliasAnalysis.h" 16 #include "llvm/Analysis/BasicAliasAnalysis.h" 17 #include "llvm/Analysis/GlobalsModRef.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/LoopInfo.h" 20 #include "llvm/Analysis/LoopPass.h" 21 #include "llvm/Analysis/ScalarEvolution.h" 22 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 23 #include "llvm/Analysis/ScalarEvolutionExpander.h" 24 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 25 #include "llvm/IR/Dominators.h" 26 #include "llvm/IR/Instructions.h" 27 #include "llvm/IR/Module.h" 28 #include "llvm/IR/PatternMatch.h" 29 #include "llvm/IR/ValueHandle.h" 30 #include "llvm/Pass.h" 31 #include "llvm/Support/Debug.h" 32 33 using namespace llvm; 34 using namespace llvm::PatternMatch; 35 36 #define DEBUG_TYPE "loop-utils" 37 38 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, 39 SmallPtrSetImpl<Instruction *> &Set) { 40 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) 41 if (!Set.count(dyn_cast<Instruction>(*Use))) 42 return false; 43 return true; 44 } 45 46 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { 47 switch (Kind) { 48 default: 49 break; 50 case RK_IntegerAdd: 51 case RK_IntegerMult: 52 case RK_IntegerOr: 53 case RK_IntegerAnd: 54 case RK_IntegerXor: 55 case RK_IntegerMinMax: 56 return true; 57 } 58 return false; 59 } 60 61 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { 62 return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); 63 } 64 65 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { 66 switch (Kind) { 67 default: 68 break; 69 case RK_IntegerAdd: 70 case RK_IntegerMult: 71 case RK_FloatAdd: 72 case RK_FloatMult: 73 return true; 74 } 75 return false; 76 } 77 78 Instruction * 79 RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT, 80 SmallPtrSetImpl<Instruction *> &Visited, 81 SmallPtrSetImpl<Instruction *> &CI) { 82 if (!Phi->hasOneUse()) 83 return Phi; 84 85 const APInt *M = nullptr; 86 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); 87 88 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT 89 // with a new integer type of the corresponding bit width. 90 if (match(J, m_CombineOr(m_And(m_Instruction(I), m_APInt(M)), 91 m_And(m_APInt(M), m_Instruction(I))))) { 92 int32_t Bits = (*M + 1).exactLogBase2(); 93 if (Bits > 0) { 94 RT = IntegerType::get(Phi->getContext(), Bits); 95 Visited.insert(Phi); 96 CI.insert(J); 97 return J; 98 } 99 } 100 return Phi; 101 } 102 103 bool RecurrenceDescriptor::getSourceExtensionKind( 104 Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned, 105 SmallPtrSetImpl<Instruction *> &Visited, 106 SmallPtrSetImpl<Instruction *> &CI) { 107 108 SmallVector<Instruction *, 8> Worklist; 109 bool FoundOneOperand = false; 110 unsigned DstSize = RT->getPrimitiveSizeInBits(); 111 Worklist.push_back(Exit); 112 113 // Traverse the instructions in the reduction expression, beginning with the 114 // exit value. 115 while (!Worklist.empty()) { 116 Instruction *I = Worklist.pop_back_val(); 117 for (Use &U : I->operands()) { 118 119 // Terminate the traversal if the operand is not an instruction, or we 120 // reach the starting value. 121 Instruction *J = dyn_cast<Instruction>(U.get()); 122 if (!J || J == Start) 123 continue; 124 125 // Otherwise, investigate the operation if it is also in the expression. 126 if (Visited.count(J)) { 127 Worklist.push_back(J); 128 continue; 129 } 130 131 // If the operand is not in Visited, it is not a reduction operation, but 132 // it does feed into one. Make sure it is either a single-use sign- or 133 // zero-extend instruction. 134 CastInst *Cast = dyn_cast<CastInst>(J); 135 bool IsSExtInst = isa<SExtInst>(J); 136 if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst)) 137 return false; 138 139 // Ensure the source type of the extend is no larger than the reduction 140 // type. It is not necessary for the types to be identical. 141 unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); 142 if (SrcSize > DstSize) 143 return false; 144 145 // Furthermore, ensure that all such extends are of the same kind. 146 if (FoundOneOperand) { 147 if (IsSigned != IsSExtInst) 148 return false; 149 } else { 150 FoundOneOperand = true; 151 IsSigned = IsSExtInst; 152 } 153 154 // Lastly, if the source type of the extend matches the reduction type, 155 // add the extend to CI so that we can avoid accounting for it in the 156 // cost model. 157 if (SrcSize == DstSize) 158 CI.insert(Cast); 159 } 160 } 161 return true; 162 } 163 164 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, 165 Loop *TheLoop, bool HasFunNoNaNAttr, 166 RecurrenceDescriptor &RedDes) { 167 if (Phi->getNumIncomingValues() != 2) 168 return false; 169 170 // Reduction variables are only found in the loop header block. 171 if (Phi->getParent() != TheLoop->getHeader()) 172 return false; 173 174 // Obtain the reduction start value from the value that comes from the loop 175 // preheader. 176 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); 177 178 // ExitInstruction is the single value which is used outside the loop. 179 // We only allow for a single reduction value to be used outside the loop. 180 // This includes users of the reduction, variables (which form a cycle 181 // which ends in the phi node). 182 Instruction *ExitInstruction = nullptr; 183 // Indicates that we found a reduction operation in our scan. 184 bool FoundReduxOp = false; 185 186 // We start with the PHI node and scan for all of the users of this 187 // instruction. All users must be instructions that can be used as reduction 188 // variables (such as ADD). We must have a single out-of-block user. The cycle 189 // must include the original PHI. 190 bool FoundStartPHI = false; 191 192 // To recognize min/max patterns formed by a icmp select sequence, we store 193 // the number of instruction we saw from the recognized min/max pattern, 194 // to make sure we only see exactly the two instructions. 195 unsigned NumCmpSelectPatternInst = 0; 196 InstDesc ReduxDesc(false, nullptr); 197 198 // Data used for determining if the recurrence has been type-promoted. 199 Type *RecurrenceType = Phi->getType(); 200 SmallPtrSet<Instruction *, 4> CastInsts; 201 Instruction *Start = Phi; 202 bool IsSigned = false; 203 204 SmallPtrSet<Instruction *, 8> VisitedInsts; 205 SmallVector<Instruction *, 8> Worklist; 206 207 // Return early if the recurrence kind does not match the type of Phi. If the 208 // recurrence kind is arithmetic, we attempt to look through AND operations 209 // resulting from the type promotion performed by InstCombine. Vector 210 // operations are not limited to the legal integer widths, so we may be able 211 // to evaluate the reduction in the narrower width. 212 if (RecurrenceType->isFloatingPointTy()) { 213 if (!isFloatingPointRecurrenceKind(Kind)) 214 return false; 215 } else { 216 if (!isIntegerRecurrenceKind(Kind)) 217 return false; 218 if (isArithmeticRecurrenceKind(Kind)) 219 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); 220 } 221 222 Worklist.push_back(Start); 223 VisitedInsts.insert(Start); 224 225 // A value in the reduction can be used: 226 // - By the reduction: 227 // - Reduction operation: 228 // - One use of reduction value (safe). 229 // - Multiple use of reduction value (not safe). 230 // - PHI: 231 // - All uses of the PHI must be the reduction (safe). 232 // - Otherwise, not safe. 233 // - By instructions outside of the loop (safe). 234 // * One value may have several outside users, but all outside 235 // uses must be of the same value. 236 // - By an instruction that is not part of the reduction (not safe). 237 // This is either: 238 // * An instruction type other than PHI or the reduction operation. 239 // * A PHI in the header other than the initial PHI. 240 while (!Worklist.empty()) { 241 Instruction *Cur = Worklist.back(); 242 Worklist.pop_back(); 243 244 // No Users. 245 // If the instruction has no users then this is a broken chain and can't be 246 // a reduction variable. 247 if (Cur->use_empty()) 248 return false; 249 250 bool IsAPhi = isa<PHINode>(Cur); 251 252 // A header PHI use other than the original PHI. 253 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) 254 return false; 255 256 // Reductions of instructions such as Div, and Sub is only possible if the 257 // LHS is the reduction variable. 258 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && 259 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && 260 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) 261 return false; 262 263 // Any reduction instruction must be of one of the allowed kinds. We ignore 264 // the starting value (the Phi or an AND instruction if the Phi has been 265 // type-promoted). 266 if (Cur != Start) { 267 ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); 268 if (!ReduxDesc.isRecurrence()) 269 return false; 270 } 271 272 // A reduction operation must only have one use of the reduction value. 273 if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && 274 hasMultipleUsesOf(Cur, VisitedInsts)) 275 return false; 276 277 // All inputs to a PHI node must be a reduction value. 278 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) 279 return false; 280 281 if (Kind == RK_IntegerMinMax && 282 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) 283 ++NumCmpSelectPatternInst; 284 if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) 285 ++NumCmpSelectPatternInst; 286 287 // Check whether we found a reduction operator. 288 FoundReduxOp |= !IsAPhi && Cur != Start; 289 290 // Process users of current instruction. Push non-PHI nodes after PHI nodes 291 // onto the stack. This way we are going to have seen all inputs to PHI 292 // nodes once we get to them. 293 SmallVector<Instruction *, 8> NonPHIs; 294 SmallVector<Instruction *, 8> PHIs; 295 for (User *U : Cur->users()) { 296 Instruction *UI = cast<Instruction>(U); 297 298 // Check if we found the exit user. 299 BasicBlock *Parent = UI->getParent(); 300 if (!TheLoop->contains(Parent)) { 301 // If we already know this instruction is used externally, move on to 302 // the next user. 303 if (ExitInstruction == Cur) 304 continue; 305 306 // Exit if you find multiple values used outside or if the header phi 307 // node is being used. In this case the user uses the value of the 308 // previous iteration, in which case we would loose "VF-1" iterations of 309 // the reduction operation if we vectorize. 310 if (ExitInstruction != nullptr || Cur == Phi) 311 return false; 312 313 // The instruction used by an outside user must be the last instruction 314 // before we feed back to the reduction phi. Otherwise, we loose VF-1 315 // operations on the value. 316 if (!is_contained(Phi->operands(), Cur)) 317 return false; 318 319 ExitInstruction = Cur; 320 continue; 321 } 322 323 // Process instructions only once (termination). Each reduction cycle 324 // value must only be used once, except by phi nodes and min/max 325 // reductions which are represented as a cmp followed by a select. 326 InstDesc IgnoredVal(false, nullptr); 327 if (VisitedInsts.insert(UI).second) { 328 if (isa<PHINode>(UI)) 329 PHIs.push_back(UI); 330 else 331 NonPHIs.push_back(UI); 332 } else if (!isa<PHINode>(UI) && 333 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && 334 !isa<SelectInst>(UI)) || 335 !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) 336 return false; 337 338 // Remember that we completed the cycle. 339 if (UI == Phi) 340 FoundStartPHI = true; 341 } 342 Worklist.append(PHIs.begin(), PHIs.end()); 343 Worklist.append(NonPHIs.begin(), NonPHIs.end()); 344 } 345 346 // This means we have seen one but not the other instruction of the 347 // pattern or more than just a select and cmp. 348 if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && 349 NumCmpSelectPatternInst != 2) 350 return false; 351 352 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) 353 return false; 354 355 // If we think Phi may have been type-promoted, we also need to ensure that 356 // all source operands of the reduction are either SExtInsts or ZEstInsts. If 357 // so, we will be able to evaluate the reduction in the narrower bit width. 358 if (Start != Phi) 359 if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType, 360 IsSigned, VisitedInsts, CastInsts)) 361 return false; 362 363 // We found a reduction var if we have reached the original phi node and we 364 // only have a single instruction with out-of-loop users. 365 366 // The ExitInstruction(Instruction which is allowed to have out-of-loop users) 367 // is saved as part of the RecurrenceDescriptor. 368 369 // Save the description of this reduction variable. 370 RecurrenceDescriptor RD( 371 RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), 372 ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); 373 RedDes = RD; 374 375 return true; 376 } 377 378 /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction 379 /// pattern corresponding to a min(X, Y) or max(X, Y). 380 RecurrenceDescriptor::InstDesc 381 RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { 382 383 assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && 384 "Expect a select instruction"); 385 Instruction *Cmp = nullptr; 386 SelectInst *Select = nullptr; 387 388 // We must handle the select(cmp()) as a single instruction. Advance to the 389 // select. 390 if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) { 391 if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin()))) 392 return InstDesc(false, I); 393 return InstDesc(Select, Prev.getMinMaxKind()); 394 } 395 396 // Only handle single use cases for now. 397 if (!(Select = dyn_cast<SelectInst>(I))) 398 return InstDesc(false, I); 399 if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && 400 !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) 401 return InstDesc(false, I); 402 if (!Cmp->hasOneUse()) 403 return InstDesc(false, I); 404 405 Value *CmpLeft; 406 Value *CmpRight; 407 408 // Look for a min/max pattern. 409 if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 410 return InstDesc(Select, MRK_UIntMin); 411 else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 412 return InstDesc(Select, MRK_UIntMax); 413 else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 414 return InstDesc(Select, MRK_SIntMax); 415 else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 416 return InstDesc(Select, MRK_SIntMin); 417 else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 418 return InstDesc(Select, MRK_FloatMin); 419 else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 420 return InstDesc(Select, MRK_FloatMax); 421 else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 422 return InstDesc(Select, MRK_FloatMin); 423 else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 424 return InstDesc(Select, MRK_FloatMax); 425 426 return InstDesc(false, I); 427 } 428 429 RecurrenceDescriptor::InstDesc 430 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, 431 InstDesc &Prev, bool HasFunNoNaNAttr) { 432 bool FP = I->getType()->isFloatingPointTy(); 433 Instruction *UAI = Prev.getUnsafeAlgebraInst(); 434 if (!UAI && FP && !I->hasUnsafeAlgebra()) 435 UAI = I; // Found an unsafe (unvectorizable) algebra instruction. 436 437 switch (I->getOpcode()) { 438 default: 439 return InstDesc(false, I); 440 case Instruction::PHI: 441 return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); 442 case Instruction::Sub: 443 case Instruction::Add: 444 return InstDesc(Kind == RK_IntegerAdd, I); 445 case Instruction::Mul: 446 return InstDesc(Kind == RK_IntegerMult, I); 447 case Instruction::And: 448 return InstDesc(Kind == RK_IntegerAnd, I); 449 case Instruction::Or: 450 return InstDesc(Kind == RK_IntegerOr, I); 451 case Instruction::Xor: 452 return InstDesc(Kind == RK_IntegerXor, I); 453 case Instruction::FMul: 454 return InstDesc(Kind == RK_FloatMult, I, UAI); 455 case Instruction::FSub: 456 case Instruction::FAdd: 457 return InstDesc(Kind == RK_FloatAdd, I, UAI); 458 case Instruction::FCmp: 459 case Instruction::ICmp: 460 case Instruction::Select: 461 if (Kind != RK_IntegerMinMax && 462 (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) 463 return InstDesc(false, I); 464 return isMinMaxSelectCmpPattern(I, Prev); 465 } 466 } 467 468 bool RecurrenceDescriptor::hasMultipleUsesOf( 469 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) { 470 unsigned NumUses = 0; 471 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; 472 ++Use) { 473 if (Insts.count(dyn_cast<Instruction>(*Use))) 474 ++NumUses; 475 if (NumUses > 1) 476 return true; 477 } 478 479 return false; 480 } 481 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, 482 RecurrenceDescriptor &RedDes) { 483 484 BasicBlock *Header = TheLoop->getHeader(); 485 Function &F = *Header->getParent(); 486 bool HasFunNoNaNAttr = 487 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 488 489 if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { 490 DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); 491 return true; 492 } 493 if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) { 494 DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); 495 return true; 496 } 497 if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) { 498 DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); 499 return true; 500 } 501 if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) { 502 DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); 503 return true; 504 } 505 if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) { 506 DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); 507 return true; 508 } 509 if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, 510 RedDes)) { 511 DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); 512 return true; 513 } 514 if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) { 515 DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); 516 return true; 517 } 518 if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { 519 DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); 520 return true; 521 } 522 if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) { 523 DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); 524 return true; 525 } 526 // Not a reduction of known type. 527 return false; 528 } 529 530 bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, 531 DominatorTree *DT) { 532 533 // Ensure the phi node is in the loop header and has two incoming values. 534 if (Phi->getParent() != TheLoop->getHeader() || 535 Phi->getNumIncomingValues() != 2) 536 return false; 537 538 // Ensure the loop has a preheader and a single latch block. The loop 539 // vectorizer will need the latch to set up the next iteration of the loop. 540 auto *Preheader = TheLoop->getLoopPreheader(); 541 auto *Latch = TheLoop->getLoopLatch(); 542 if (!Preheader || !Latch) 543 return false; 544 545 // Ensure the phi node's incoming blocks are the loop preheader and latch. 546 if (Phi->getBasicBlockIndex(Preheader) < 0 || 547 Phi->getBasicBlockIndex(Latch) < 0) 548 return false; 549 550 // Get the previous value. The previous value comes from the latch edge while 551 // the initial value comes form the preheader edge. 552 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); 553 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous)) 554 return false; 555 556 for (User *U : Phi->users()) 557 if (auto *I = dyn_cast<Instruction>(U)) { 558 // Ensure every user of the phi node is dominated by the previous value. 559 // The dominance requirement ensures the loop vectorizer will not need to 560 // vectorize the initial value prior to the first iteration of the loop. 561 if (!DT->dominates(Previous, I)) 562 return false; 563 // When the phi node has users outside the loop, the current logic for 564 // fixFirstOrderRecurrences may generate incorrect code. Specifically, we 565 // extract the last element from the vectorized phi, which would be the 566 // update to the phi before exiting the loop. However, what we want is the 567 // previous phi value before the update (i.e. the second last update 568 // before end of the vectorized loop). 569 // See added test cases in first-order-recurrence.ll 570 if (!TheLoop->contains(I)) 571 return false; 572 } 573 574 return true; 575 } 576 577 /// This function returns the identity element (or neutral element) for 578 /// the operation K. 579 Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, 580 Type *Tp) { 581 switch (K) { 582 case RK_IntegerXor: 583 case RK_IntegerAdd: 584 case RK_IntegerOr: 585 // Adding, Xoring, Oring zero to a number does not change it. 586 return ConstantInt::get(Tp, 0); 587 case RK_IntegerMult: 588 // Multiplying a number by 1 does not change it. 589 return ConstantInt::get(Tp, 1); 590 case RK_IntegerAnd: 591 // AND-ing a number with an all-1 value does not change it. 592 return ConstantInt::get(Tp, -1, true); 593 case RK_FloatMult: 594 // Multiplying a number by 1 does not change it. 595 return ConstantFP::get(Tp, 1.0L); 596 case RK_FloatAdd: 597 // Adding zero to a number does not change it. 598 return ConstantFP::get(Tp, 0.0L); 599 default: 600 llvm_unreachable("Unknown recurrence kind"); 601 } 602 } 603 604 /// This function translates the recurrence kind to an LLVM binary operator. 605 unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { 606 switch (Kind) { 607 case RK_IntegerAdd: 608 return Instruction::Add; 609 case RK_IntegerMult: 610 return Instruction::Mul; 611 case RK_IntegerOr: 612 return Instruction::Or; 613 case RK_IntegerAnd: 614 return Instruction::And; 615 case RK_IntegerXor: 616 return Instruction::Xor; 617 case RK_FloatMult: 618 return Instruction::FMul; 619 case RK_FloatAdd: 620 return Instruction::FAdd; 621 case RK_IntegerMinMax: 622 return Instruction::ICmp; 623 case RK_FloatMinMax: 624 return Instruction::FCmp; 625 default: 626 llvm_unreachable("Unknown recurrence operation"); 627 } 628 } 629 630 Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, 631 MinMaxRecurrenceKind RK, 632 Value *Left, Value *Right) { 633 CmpInst::Predicate P = CmpInst::ICMP_NE; 634 switch (RK) { 635 default: 636 llvm_unreachable("Unknown min/max recurrence kind"); 637 case MRK_UIntMin: 638 P = CmpInst::ICMP_ULT; 639 break; 640 case MRK_UIntMax: 641 P = CmpInst::ICMP_UGT; 642 break; 643 case MRK_SIntMin: 644 P = CmpInst::ICMP_SLT; 645 break; 646 case MRK_SIntMax: 647 P = CmpInst::ICMP_SGT; 648 break; 649 case MRK_FloatMin: 650 P = CmpInst::FCMP_OLT; 651 break; 652 case MRK_FloatMax: 653 P = CmpInst::FCMP_OGT; 654 break; 655 } 656 657 // We only match FP sequences with unsafe algebra, so we can unconditionally 658 // set it on any generated instructions. 659 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 660 FastMathFlags FMF; 661 FMF.setUnsafeAlgebra(); 662 Builder.setFastMathFlags(FMF); 663 664 Value *Cmp; 665 if (RK == MRK_FloatMin || RK == MRK_FloatMax) 666 Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); 667 else 668 Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); 669 670 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); 671 return Select; 672 } 673 674 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, 675 const SCEV *Step, BinaryOperator *BOp) 676 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { 677 assert(IK != IK_NoInduction && "Not an induction"); 678 679 // Start value type should match the induction kind and the value 680 // itself should not be null. 681 assert(StartValue && "StartValue is null"); 682 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && 683 "StartValue is not a pointer for pointer induction"); 684 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && 685 "StartValue is not an integer for integer induction"); 686 687 // Check the Step Value. It should be non-zero integer value. 688 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && 689 "Step value is zero"); 690 691 assert((IK != IK_PtrInduction || getConstIntStepValue()) && 692 "Step value should be constant for pointer induction"); 693 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && 694 "StepValue is not an integer"); 695 696 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && 697 "StepValue is not FP for FpInduction"); 698 assert((IK != IK_FpInduction || (InductionBinOp && 699 (InductionBinOp->getOpcode() == Instruction::FAdd || 700 InductionBinOp->getOpcode() == Instruction::FSub))) && 701 "Binary opcode should be specified for FP induction"); 702 } 703 704 int InductionDescriptor::getConsecutiveDirection() const { 705 ConstantInt *ConstStep = getConstIntStepValue(); 706 if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) 707 return ConstStep->getSExtValue(); 708 return 0; 709 } 710 711 ConstantInt *InductionDescriptor::getConstIntStepValue() const { 712 if (isa<SCEVConstant>(Step)) 713 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); 714 return nullptr; 715 } 716 717 Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, 718 ScalarEvolution *SE, 719 const DataLayout& DL) const { 720 721 SCEVExpander Exp(*SE, DL, "induction"); 722 assert(Index->getType() == Step->getType() && 723 "Index type does not match StepValue type"); 724 switch (IK) { 725 case IK_IntInduction: { 726 assert(Index->getType() == StartValue->getType() && 727 "Index type does not match StartValue type"); 728 729 // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution 730 // and calculate (Start + Index * Step) for all cases, without 731 // special handling for "isOne" and "isMinusOne". 732 // But in the real life the result code getting worse. We mix SCEV 733 // expressions and ADD/SUB operations and receive redundant 734 // intermediate values being calculated in different ways and 735 // Instcombine is unable to reduce them all. 736 737 if (getConstIntStepValue() && 738 getConstIntStepValue()->isMinusOne()) 739 return B.CreateSub(StartValue, Index); 740 if (getConstIntStepValue() && 741 getConstIntStepValue()->isOne()) 742 return B.CreateAdd(StartValue, Index); 743 const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), 744 SE->getMulExpr(Step, SE->getSCEV(Index))); 745 return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); 746 } 747 case IK_PtrInduction: { 748 assert(isa<SCEVConstant>(Step) && 749 "Expected constant step for pointer induction"); 750 const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); 751 Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); 752 return B.CreateGEP(nullptr, StartValue, Index); 753 } 754 case IK_FpInduction: { 755 assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); 756 assert(InductionBinOp && 757 (InductionBinOp->getOpcode() == Instruction::FAdd || 758 InductionBinOp->getOpcode() == Instruction::FSub) && 759 "Original bin op should be defined for FP induction"); 760 761 Value *StepValue = cast<SCEVUnknown>(Step)->getValue(); 762 763 // Floating point operations had to be 'fast' to enable the induction. 764 FastMathFlags Flags; 765 Flags.setUnsafeAlgebra(); 766 767 Value *MulExp = B.CreateFMul(StepValue, Index); 768 if (isa<Instruction>(MulExp)) 769 // We have to check, the MulExp may be a constant. 770 cast<Instruction>(MulExp)->setFastMathFlags(Flags); 771 772 Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue, 773 MulExp, "induction"); 774 if (isa<Instruction>(BOp)) 775 cast<Instruction>(BOp)->setFastMathFlags(Flags); 776 777 return BOp; 778 } 779 case IK_NoInduction: 780 return nullptr; 781 } 782 llvm_unreachable("invalid enum"); 783 } 784 785 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, 786 ScalarEvolution *SE, 787 InductionDescriptor &D) { 788 789 // Here we only handle FP induction variables. 790 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); 791 792 if (TheLoop->getHeader() != Phi->getParent()) 793 return false; 794 795 // The loop may have multiple entrances or multiple exits; we can analyze 796 // this phi if it has a unique entry value and a unique backedge value. 797 if (Phi->getNumIncomingValues() != 2) 798 return false; 799 Value *BEValue = nullptr, *StartValue = nullptr; 800 if (TheLoop->contains(Phi->getIncomingBlock(0))) { 801 BEValue = Phi->getIncomingValue(0); 802 StartValue = Phi->getIncomingValue(1); 803 } else { 804 assert(TheLoop->contains(Phi->getIncomingBlock(1)) && 805 "Unexpected Phi node in the loop"); 806 BEValue = Phi->getIncomingValue(1); 807 StartValue = Phi->getIncomingValue(0); 808 } 809 810 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); 811 if (!BOp) 812 return false; 813 814 Value *Addend = nullptr; 815 if (BOp->getOpcode() == Instruction::FAdd) { 816 if (BOp->getOperand(0) == Phi) 817 Addend = BOp->getOperand(1); 818 else if (BOp->getOperand(1) == Phi) 819 Addend = BOp->getOperand(0); 820 } else if (BOp->getOpcode() == Instruction::FSub) 821 if (BOp->getOperand(0) == Phi) 822 Addend = BOp->getOperand(1); 823 824 if (!Addend) 825 return false; 826 827 // The addend should be loop invariant 828 if (auto *I = dyn_cast<Instruction>(Addend)) 829 if (TheLoop->contains(I)) 830 return false; 831 832 // FP Step has unknown SCEV 833 const SCEV *Step = SE->getUnknown(Addend); 834 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); 835 return true; 836 } 837 838 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 839 PredicatedScalarEvolution &PSE, 840 InductionDescriptor &D, 841 bool Assume) { 842 Type *PhiTy = Phi->getType(); 843 844 // Handle integer and pointer inductions variables. 845 // Now we handle also FP induction but not trying to make a 846 // recurrent expression from the PHI node in-place. 847 848 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && 849 !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) 850 return false; 851 852 if (PhiTy->isFloatingPointTy()) 853 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); 854 855 const SCEV *PhiScev = PSE.getSCEV(Phi); 856 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 857 858 // We need this expression to be an AddRecExpr. 859 if (Assume && !AR) 860 AR = PSE.getAsAddRec(Phi); 861 862 if (!AR) { 863 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 864 return false; 865 } 866 867 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); 868 } 869 870 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 871 ScalarEvolution *SE, 872 InductionDescriptor &D, 873 const SCEV *Expr) { 874 Type *PhiTy = Phi->getType(); 875 // We only handle integer and pointer inductions variables. 876 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) 877 return false; 878 879 // Check that the PHI is consecutive. 880 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); 881 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 882 883 if (!AR) { 884 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 885 return false; 886 } 887 888 if (AR->getLoop() != TheLoop) { 889 // FIXME: We should treat this as a uniform. Unfortunately, we 890 // don't currently know how to handled uniform PHIs. 891 DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); 892 return false; 893 } 894 895 Value *StartValue = 896 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); 897 const SCEV *Step = AR->getStepRecurrence(*SE); 898 // Calculate the pointer stride and check if it is consecutive. 899 // The stride may be a constant or a loop invariant integer value. 900 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); 901 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) 902 return false; 903 904 if (PhiTy->isIntegerTy()) { 905 D = InductionDescriptor(StartValue, IK_IntInduction, Step); 906 return true; 907 } 908 909 assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); 910 // Pointer induction should be a constant. 911 if (!ConstStep) 912 return false; 913 914 ConstantInt *CV = ConstStep->getValue(); 915 Type *PointerElementType = PhiTy->getPointerElementType(); 916 // The pointer stride cannot be determined if the pointer element type is not 917 // sized. 918 if (!PointerElementType->isSized()) 919 return false; 920 921 const DataLayout &DL = Phi->getModule()->getDataLayout(); 922 int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); 923 if (!Size) 924 return false; 925 926 int64_t CVSize = CV->getSExtValue(); 927 if (CVSize % Size) 928 return false; 929 auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, 930 true /* signed */); 931 D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); 932 return true; 933 } 934 935 /// \brief Returns the instructions that use values defined in the loop. 936 SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { 937 SmallVector<Instruction *, 8> UsedOutside; 938 939 for (auto *Block : L->getBlocks()) 940 // FIXME: I believe that this could use copy_if if the Inst reference could 941 // be adapted into a pointer. 942 for (auto &Inst : *Block) { 943 auto Users = Inst.users(); 944 if (any_of(Users, [&](User *U) { 945 auto *Use = cast<Instruction>(U); 946 return !L->contains(Use->getParent()); 947 })) 948 UsedOutside.push_back(&Inst); 949 } 950 951 return UsedOutside; 952 } 953 954 void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { 955 // By definition, all loop passes need the LoopInfo analysis and the 956 // Dominator tree it depends on. Because they all participate in the loop 957 // pass manager, they must also preserve these. 958 AU.addRequired<DominatorTreeWrapperPass>(); 959 AU.addPreserved<DominatorTreeWrapperPass>(); 960 AU.addRequired<LoopInfoWrapperPass>(); 961 AU.addPreserved<LoopInfoWrapperPass>(); 962 963 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs 964 // here because users shouldn't directly get them from this header. 965 extern char &LoopSimplifyID; 966 extern char &LCSSAID; 967 AU.addRequiredID(LoopSimplifyID); 968 AU.addPreservedID(LoopSimplifyID); 969 AU.addRequiredID(LCSSAID); 970 AU.addPreservedID(LCSSAID); 971 // This is used in the LPPassManager to perform LCSSA verification on passes 972 // which preserve lcssa form 973 AU.addRequired<LCSSAVerificationPass>(); 974 AU.addPreserved<LCSSAVerificationPass>(); 975 976 // Loop passes are designed to run inside of a loop pass manager which means 977 // that any function analyses they require must be required by the first loop 978 // pass in the manager (so that it is computed before the loop pass manager 979 // runs) and preserved by all loop pasess in the manager. To make this 980 // reasonably robust, the set needed for most loop passes is maintained here. 981 // If your loop pass requires an analysis not listed here, you will need to 982 // carefully audit the loop pass manager nesting structure that results. 983 AU.addRequired<AAResultsWrapperPass>(); 984 AU.addPreserved<AAResultsWrapperPass>(); 985 AU.addPreserved<BasicAAWrapperPass>(); 986 AU.addPreserved<GlobalsAAWrapperPass>(); 987 AU.addPreserved<SCEVAAWrapperPass>(); 988 AU.addRequired<ScalarEvolutionWrapperPass>(); 989 AU.addPreserved<ScalarEvolutionWrapperPass>(); 990 } 991 992 /// Manually defined generic "LoopPass" dependency initialization. This is used 993 /// to initialize the exact set of passes from above in \c 994 /// getLoopAnalysisUsage. It can be used within a loop pass's initialization 995 /// with: 996 /// 997 /// INITIALIZE_PASS_DEPENDENCY(LoopPass) 998 /// 999 /// As-if "LoopPass" were a pass. 1000 void llvm::initializeLoopPassPass(PassRegistry &Registry) { 1001 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1002 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1003 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 1004 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 1005 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1006 INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) 1007 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 1008 INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) 1009 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 1010 } 1011 1012 /// \brief Find string metadata for loop 1013 /// 1014 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an 1015 /// operand or null otherwise. If the string metadata is not found return 1016 /// Optional's not-a-value. 1017 Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, 1018 StringRef Name) { 1019 MDNode *LoopID = TheLoop->getLoopID(); 1020 // Return none if LoopID is false. 1021 if (!LoopID) 1022 return None; 1023 1024 // First operand should refer to the loop id itself. 1025 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 1026 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 1027 1028 // Iterate over LoopID operands and look for MDString Metadata 1029 for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { 1030 MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); 1031 if (!MD) 1032 continue; 1033 MDString *S = dyn_cast<MDString>(MD->getOperand(0)); 1034 if (!S) 1035 continue; 1036 // Return true if MDString holds expected MetaData. 1037 if (Name.equals(S->getString())) 1038 switch (MD->getNumOperands()) { 1039 case 1: 1040 return nullptr; 1041 case 2: 1042 return &MD->getOperand(1); 1043 default: 1044 llvm_unreachable("loop metadata has 0 or 1 operand"); 1045 } 1046 } 1047 return None; 1048 } 1049 1050 /// Returns true if the instruction in a loop is guaranteed to execute at least 1051 /// once. 1052 bool llvm::isGuaranteedToExecute(const Instruction &Inst, 1053 const DominatorTree *DT, const Loop *CurLoop, 1054 const LoopSafetyInfo *SafetyInfo) { 1055 // We have to check to make sure that the instruction dominates all 1056 // of the exit blocks. If it doesn't, then there is a path out of the loop 1057 // which does not execute this instruction, so we can't hoist it. 1058 1059 // If the instruction is in the header block for the loop (which is very 1060 // common), it is always guaranteed to dominate the exit blocks. Since this 1061 // is a common case, and can save some work, check it now. 1062 if (Inst.getParent() == CurLoop->getHeader()) 1063 // If there's a throw in the header block, we can't guarantee we'll reach 1064 // Inst. 1065 return !SafetyInfo->HeaderMayThrow; 1066 1067 // Somewhere in this loop there is an instruction which may throw and make us 1068 // exit the loop. 1069 if (SafetyInfo->MayThrow) 1070 return false; 1071 1072 // Get the exit blocks for the current loop. 1073 SmallVector<BasicBlock *, 8> ExitBlocks; 1074 CurLoop->getExitBlocks(ExitBlocks); 1075 1076 // Verify that the block dominates each of the exit blocks of the loop. 1077 for (BasicBlock *ExitBlock : ExitBlocks) 1078 if (!DT->dominates(Inst.getParent(), ExitBlock)) 1079 return false; 1080 1081 // As a degenerate case, if the loop is statically infinite then we haven't 1082 // proven anything since there are no exit blocks. 1083 if (ExitBlocks.empty()) 1084 return false; 1085 1086 // FIXME: In general, we have to prove that the loop isn't an infinite loop. 1087 // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is 1088 // just a special case of this.) 1089 return true; 1090 } 1091 1092 Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { 1093 // Only support loops with a unique exiting block, and a latch. 1094 if (!L->getExitingBlock()) 1095 return None; 1096 1097 // Get the branch weights for the the loop's backedge. 1098 BranchInst *LatchBR = 1099 dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator()); 1100 if (!LatchBR || LatchBR->getNumSuccessors() != 2) 1101 return None; 1102 1103 assert((LatchBR->getSuccessor(0) == L->getHeader() || 1104 LatchBR->getSuccessor(1) == L->getHeader()) && 1105 "At least one edge out of the latch must go to the header"); 1106 1107 // To estimate the number of times the loop body was executed, we want to 1108 // know the number of times the backedge was taken, vs. the number of times 1109 // we exited the loop. 1110 uint64_t TrueVal, FalseVal; 1111 if (!LatchBR->extractProfMetadata(TrueVal, FalseVal)) 1112 return None; 1113 1114 if (!TrueVal || !FalseVal) 1115 return 0; 1116 1117 // Divide the count of the backedge by the count of the edge exiting the loop, 1118 // rounding to nearest. 1119 if (LatchBR->getSuccessor(0) == L->getHeader()) 1120 return (TrueVal + (FalseVal / 2)) / FalseVal; 1121 else 1122 return (FalseVal + (TrueVal / 2)) / TrueVal; 1123 } 1124