1 //===-- LoopUtils.cpp - Loop Utility functions -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines common loop utility functions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Utils/LoopUtils.h" 15 #include "llvm/ADT/ScopeExit.h" 16 #include "llvm/Analysis/AliasAnalysis.h" 17 #include "llvm/Analysis/BasicAliasAnalysis.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/LoopInfo.h" 20 #include "llvm/Analysis/LoopPass.h" 21 #include "llvm/Analysis/ScalarEvolution.h" 22 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 23 #include "llvm/Analysis/ScalarEvolutionExpander.h" 24 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 25 #include "llvm/Analysis/TargetTransformInfo.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Instructions.h" 28 #include "llvm/IR/Module.h" 29 #include "llvm/IR/PatternMatch.h" 30 #include "llvm/IR/ValueHandle.h" 31 #include "llvm/Pass.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 34 35 using namespace llvm; 36 using namespace llvm::PatternMatch; 37 38 #define DEBUG_TYPE "loop-utils" 39 40 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, 41 SmallPtrSetImpl<Instruction *> &Set) { 42 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) 43 if (!Set.count(dyn_cast<Instruction>(*Use))) 44 return false; 45 return true; 46 } 47 48 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { 49 switch (Kind) { 50 default: 51 break; 52 case RK_IntegerAdd: 53 case RK_IntegerMult: 54 case RK_IntegerOr: 55 case RK_IntegerAnd: 56 case RK_IntegerXor: 57 case RK_IntegerMinMax: 58 return true; 59 } 60 return false; 61 } 62 63 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { 64 return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); 65 } 66 67 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { 68 switch (Kind) { 69 default: 70 break; 71 case RK_IntegerAdd: 72 case RK_IntegerMult: 73 case RK_FloatAdd: 74 case RK_FloatMult: 75 return true; 76 } 77 return false; 78 } 79 80 Instruction * 81 RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT, 82 SmallPtrSetImpl<Instruction *> &Visited, 83 SmallPtrSetImpl<Instruction *> &CI) { 84 if (!Phi->hasOneUse()) 85 return Phi; 86 87 const APInt *M = nullptr; 88 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); 89 90 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT 91 // with a new integer type of the corresponding bit width. 92 if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { 93 int32_t Bits = (*M + 1).exactLogBase2(); 94 if (Bits > 0) { 95 RT = IntegerType::get(Phi->getContext(), Bits); 96 Visited.insert(Phi); 97 CI.insert(J); 98 return J; 99 } 100 } 101 return Phi; 102 } 103 104 bool RecurrenceDescriptor::getSourceExtensionKind( 105 Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned, 106 SmallPtrSetImpl<Instruction *> &Visited, 107 SmallPtrSetImpl<Instruction *> &CI) { 108 109 SmallVector<Instruction *, 8> Worklist; 110 bool FoundOneOperand = false; 111 unsigned DstSize = RT->getPrimitiveSizeInBits(); 112 Worklist.push_back(Exit); 113 114 // Traverse the instructions in the reduction expression, beginning with the 115 // exit value. 116 while (!Worklist.empty()) { 117 Instruction *I = Worklist.pop_back_val(); 118 for (Use &U : I->operands()) { 119 120 // Terminate the traversal if the operand is not an instruction, or we 121 // reach the starting value. 122 Instruction *J = dyn_cast<Instruction>(U.get()); 123 if (!J || J == Start) 124 continue; 125 126 // Otherwise, investigate the operation if it is also in the expression. 127 if (Visited.count(J)) { 128 Worklist.push_back(J); 129 continue; 130 } 131 132 // If the operand is not in Visited, it is not a reduction operation, but 133 // it does feed into one. Make sure it is either a single-use sign- or 134 // zero-extend instruction. 135 CastInst *Cast = dyn_cast<CastInst>(J); 136 bool IsSExtInst = isa<SExtInst>(J); 137 if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst)) 138 return false; 139 140 // Ensure the source type of the extend is no larger than the reduction 141 // type. It is not necessary for the types to be identical. 142 unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); 143 if (SrcSize > DstSize) 144 return false; 145 146 // Furthermore, ensure that all such extends are of the same kind. 147 if (FoundOneOperand) { 148 if (IsSigned != IsSExtInst) 149 return false; 150 } else { 151 FoundOneOperand = true; 152 IsSigned = IsSExtInst; 153 } 154 155 // Lastly, if the source type of the extend matches the reduction type, 156 // add the extend to CI so that we can avoid accounting for it in the 157 // cost model. 158 if (SrcSize == DstSize) 159 CI.insert(Cast); 160 } 161 } 162 return true; 163 } 164 165 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, 166 Loop *TheLoop, bool HasFunNoNaNAttr, 167 RecurrenceDescriptor &RedDes) { 168 if (Phi->getNumIncomingValues() != 2) 169 return false; 170 171 // Reduction variables are only found in the loop header block. 172 if (Phi->getParent() != TheLoop->getHeader()) 173 return false; 174 175 // Obtain the reduction start value from the value that comes from the loop 176 // preheader. 177 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); 178 179 // ExitInstruction is the single value which is used outside the loop. 180 // We only allow for a single reduction value to be used outside the loop. 181 // This includes users of the reduction, variables (which form a cycle 182 // which ends in the phi node). 183 Instruction *ExitInstruction = nullptr; 184 // Indicates that we found a reduction operation in our scan. 185 bool FoundReduxOp = false; 186 187 // We start with the PHI node and scan for all of the users of this 188 // instruction. All users must be instructions that can be used as reduction 189 // variables (such as ADD). We must have a single out-of-block user. The cycle 190 // must include the original PHI. 191 bool FoundStartPHI = false; 192 193 // To recognize min/max patterns formed by a icmp select sequence, we store 194 // the number of instruction we saw from the recognized min/max pattern, 195 // to make sure we only see exactly the two instructions. 196 unsigned NumCmpSelectPatternInst = 0; 197 InstDesc ReduxDesc(false, nullptr); 198 199 // Data used for determining if the recurrence has been type-promoted. 200 Type *RecurrenceType = Phi->getType(); 201 SmallPtrSet<Instruction *, 4> CastInsts; 202 Instruction *Start = Phi; 203 bool IsSigned = false; 204 205 SmallPtrSet<Instruction *, 8> VisitedInsts; 206 SmallVector<Instruction *, 8> Worklist; 207 208 // Return early if the recurrence kind does not match the type of Phi. If the 209 // recurrence kind is arithmetic, we attempt to look through AND operations 210 // resulting from the type promotion performed by InstCombine. Vector 211 // operations are not limited to the legal integer widths, so we may be able 212 // to evaluate the reduction in the narrower width. 213 if (RecurrenceType->isFloatingPointTy()) { 214 if (!isFloatingPointRecurrenceKind(Kind)) 215 return false; 216 } else { 217 if (!isIntegerRecurrenceKind(Kind)) 218 return false; 219 if (isArithmeticRecurrenceKind(Kind)) 220 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); 221 } 222 223 Worklist.push_back(Start); 224 VisitedInsts.insert(Start); 225 226 // A value in the reduction can be used: 227 // - By the reduction: 228 // - Reduction operation: 229 // - One use of reduction value (safe). 230 // - Multiple use of reduction value (not safe). 231 // - PHI: 232 // - All uses of the PHI must be the reduction (safe). 233 // - Otherwise, not safe. 234 // - By instructions outside of the loop (safe). 235 // * One value may have several outside users, but all outside 236 // uses must be of the same value. 237 // - By an instruction that is not part of the reduction (not safe). 238 // This is either: 239 // * An instruction type other than PHI or the reduction operation. 240 // * A PHI in the header other than the initial PHI. 241 while (!Worklist.empty()) { 242 Instruction *Cur = Worklist.back(); 243 Worklist.pop_back(); 244 245 // No Users. 246 // If the instruction has no users then this is a broken chain and can't be 247 // a reduction variable. 248 if (Cur->use_empty()) 249 return false; 250 251 bool IsAPhi = isa<PHINode>(Cur); 252 253 // A header PHI use other than the original PHI. 254 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) 255 return false; 256 257 // Reductions of instructions such as Div, and Sub is only possible if the 258 // LHS is the reduction variable. 259 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && 260 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && 261 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) 262 return false; 263 264 // Any reduction instruction must be of one of the allowed kinds. We ignore 265 // the starting value (the Phi or an AND instruction if the Phi has been 266 // type-promoted). 267 if (Cur != Start) { 268 ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); 269 if (!ReduxDesc.isRecurrence()) 270 return false; 271 } 272 273 // A reduction operation must only have one use of the reduction value. 274 if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && 275 hasMultipleUsesOf(Cur, VisitedInsts)) 276 return false; 277 278 // All inputs to a PHI node must be a reduction value. 279 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) 280 return false; 281 282 if (Kind == RK_IntegerMinMax && 283 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) 284 ++NumCmpSelectPatternInst; 285 if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) 286 ++NumCmpSelectPatternInst; 287 288 // Check whether we found a reduction operator. 289 FoundReduxOp |= !IsAPhi && Cur != Start; 290 291 // Process users of current instruction. Push non-PHI nodes after PHI nodes 292 // onto the stack. This way we are going to have seen all inputs to PHI 293 // nodes once we get to them. 294 SmallVector<Instruction *, 8> NonPHIs; 295 SmallVector<Instruction *, 8> PHIs; 296 for (User *U : Cur->users()) { 297 Instruction *UI = cast<Instruction>(U); 298 299 // Check if we found the exit user. 300 BasicBlock *Parent = UI->getParent(); 301 if (!TheLoop->contains(Parent)) { 302 // If we already know this instruction is used externally, move on to 303 // the next user. 304 if (ExitInstruction == Cur) 305 continue; 306 307 // Exit if you find multiple values used outside or if the header phi 308 // node is being used. In this case the user uses the value of the 309 // previous iteration, in which case we would loose "VF-1" iterations of 310 // the reduction operation if we vectorize. 311 if (ExitInstruction != nullptr || Cur == Phi) 312 return false; 313 314 // The instruction used by an outside user must be the last instruction 315 // before we feed back to the reduction phi. Otherwise, we loose VF-1 316 // operations on the value. 317 if (!is_contained(Phi->operands(), Cur)) 318 return false; 319 320 ExitInstruction = Cur; 321 continue; 322 } 323 324 // Process instructions only once (termination). Each reduction cycle 325 // value must only be used once, except by phi nodes and min/max 326 // reductions which are represented as a cmp followed by a select. 327 InstDesc IgnoredVal(false, nullptr); 328 if (VisitedInsts.insert(UI).second) { 329 if (isa<PHINode>(UI)) 330 PHIs.push_back(UI); 331 else 332 NonPHIs.push_back(UI); 333 } else if (!isa<PHINode>(UI) && 334 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && 335 !isa<SelectInst>(UI)) || 336 !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) 337 return false; 338 339 // Remember that we completed the cycle. 340 if (UI == Phi) 341 FoundStartPHI = true; 342 } 343 Worklist.append(PHIs.begin(), PHIs.end()); 344 Worklist.append(NonPHIs.begin(), NonPHIs.end()); 345 } 346 347 // This means we have seen one but not the other instruction of the 348 // pattern or more than just a select and cmp. 349 if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && 350 NumCmpSelectPatternInst != 2) 351 return false; 352 353 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) 354 return false; 355 356 // If we think Phi may have been type-promoted, we also need to ensure that 357 // all source operands of the reduction are either SExtInsts or ZEstInsts. If 358 // so, we will be able to evaluate the reduction in the narrower bit width. 359 if (Start != Phi) 360 if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType, 361 IsSigned, VisitedInsts, CastInsts)) 362 return false; 363 364 // We found a reduction var if we have reached the original phi node and we 365 // only have a single instruction with out-of-loop users. 366 367 // The ExitInstruction(Instruction which is allowed to have out-of-loop users) 368 // is saved as part of the RecurrenceDescriptor. 369 370 // Save the description of this reduction variable. 371 RecurrenceDescriptor RD( 372 RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), 373 ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); 374 RedDes = RD; 375 376 return true; 377 } 378 379 /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction 380 /// pattern corresponding to a min(X, Y) or max(X, Y). 381 RecurrenceDescriptor::InstDesc 382 RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { 383 384 assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && 385 "Expect a select instruction"); 386 Instruction *Cmp = nullptr; 387 SelectInst *Select = nullptr; 388 389 // We must handle the select(cmp()) as a single instruction. Advance to the 390 // select. 391 if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) { 392 if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin()))) 393 return InstDesc(false, I); 394 return InstDesc(Select, Prev.getMinMaxKind()); 395 } 396 397 // Only handle single use cases for now. 398 if (!(Select = dyn_cast<SelectInst>(I))) 399 return InstDesc(false, I); 400 if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && 401 !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) 402 return InstDesc(false, I); 403 if (!Cmp->hasOneUse()) 404 return InstDesc(false, I); 405 406 Value *CmpLeft; 407 Value *CmpRight; 408 409 // Look for a min/max pattern. 410 if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 411 return InstDesc(Select, MRK_UIntMin); 412 else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 413 return InstDesc(Select, MRK_UIntMax); 414 else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 415 return InstDesc(Select, MRK_SIntMax); 416 else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 417 return InstDesc(Select, MRK_SIntMin); 418 else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 419 return InstDesc(Select, MRK_FloatMin); 420 else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 421 return InstDesc(Select, MRK_FloatMax); 422 else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 423 return InstDesc(Select, MRK_FloatMin); 424 else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 425 return InstDesc(Select, MRK_FloatMax); 426 427 return InstDesc(false, I); 428 } 429 430 RecurrenceDescriptor::InstDesc 431 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, 432 InstDesc &Prev, bool HasFunNoNaNAttr) { 433 bool FP = I->getType()->isFloatingPointTy(); 434 Instruction *UAI = Prev.getUnsafeAlgebraInst(); 435 if (!UAI && FP && !I->isFast()) 436 UAI = I; // Found an unsafe (unvectorizable) algebra instruction. 437 438 switch (I->getOpcode()) { 439 default: 440 return InstDesc(false, I); 441 case Instruction::PHI: 442 return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); 443 case Instruction::Sub: 444 case Instruction::Add: 445 return InstDesc(Kind == RK_IntegerAdd, I); 446 case Instruction::Mul: 447 return InstDesc(Kind == RK_IntegerMult, I); 448 case Instruction::And: 449 return InstDesc(Kind == RK_IntegerAnd, I); 450 case Instruction::Or: 451 return InstDesc(Kind == RK_IntegerOr, I); 452 case Instruction::Xor: 453 return InstDesc(Kind == RK_IntegerXor, I); 454 case Instruction::FMul: 455 return InstDesc(Kind == RK_FloatMult, I, UAI); 456 case Instruction::FSub: 457 case Instruction::FAdd: 458 return InstDesc(Kind == RK_FloatAdd, I, UAI); 459 case Instruction::FCmp: 460 case Instruction::ICmp: 461 case Instruction::Select: 462 if (Kind != RK_IntegerMinMax && 463 (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) 464 return InstDesc(false, I); 465 return isMinMaxSelectCmpPattern(I, Prev); 466 } 467 } 468 469 bool RecurrenceDescriptor::hasMultipleUsesOf( 470 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) { 471 unsigned NumUses = 0; 472 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; 473 ++Use) { 474 if (Insts.count(dyn_cast<Instruction>(*Use))) 475 ++NumUses; 476 if (NumUses > 1) 477 return true; 478 } 479 480 return false; 481 } 482 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, 483 RecurrenceDescriptor &RedDes) { 484 485 BasicBlock *Header = TheLoop->getHeader(); 486 Function &F = *Header->getParent(); 487 bool HasFunNoNaNAttr = 488 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 489 490 if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { 491 DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); 492 return true; 493 } 494 if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) { 495 DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); 496 return true; 497 } 498 if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) { 499 DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); 500 return true; 501 } 502 if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) { 503 DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); 504 return true; 505 } 506 if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) { 507 DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); 508 return true; 509 } 510 if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, 511 RedDes)) { 512 DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); 513 return true; 514 } 515 if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) { 516 DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); 517 return true; 518 } 519 if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { 520 DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); 521 return true; 522 } 523 if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) { 524 DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); 525 return true; 526 } 527 // Not a reduction of known type. 528 return false; 529 } 530 531 bool RecurrenceDescriptor::isFirstOrderRecurrence( 532 PHINode *Phi, Loop *TheLoop, 533 DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) { 534 535 // Ensure the phi node is in the loop header and has two incoming values. 536 if (Phi->getParent() != TheLoop->getHeader() || 537 Phi->getNumIncomingValues() != 2) 538 return false; 539 540 // Ensure the loop has a preheader and a single latch block. The loop 541 // vectorizer will need the latch to set up the next iteration of the loop. 542 auto *Preheader = TheLoop->getLoopPreheader(); 543 auto *Latch = TheLoop->getLoopLatch(); 544 if (!Preheader || !Latch) 545 return false; 546 547 // Ensure the phi node's incoming blocks are the loop preheader and latch. 548 if (Phi->getBasicBlockIndex(Preheader) < 0 || 549 Phi->getBasicBlockIndex(Latch) < 0) 550 return false; 551 552 // Get the previous value. The previous value comes from the latch edge while 553 // the initial value comes form the preheader edge. 554 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); 555 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) || 556 SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. 557 return false; 558 559 // Ensure every user of the phi node is dominated by the previous value. 560 // The dominance requirement ensures the loop vectorizer will not need to 561 // vectorize the initial value prior to the first iteration of the loop. 562 // TODO: Consider extending this sinking to handle other kinds of instructions 563 // and expressions, beyond sinking a single cast past Previous. 564 if (Phi->hasOneUse()) { 565 auto *I = Phi->user_back(); 566 if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() && 567 DT->dominates(Previous, I->user_back())) { 568 if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking. 569 SinkAfter[I] = Previous; 570 return true; 571 } 572 } 573 574 for (User *U : Phi->users()) 575 if (auto *I = dyn_cast<Instruction>(U)) { 576 if (!DT->dominates(Previous, I)) 577 return false; 578 } 579 580 return true; 581 } 582 583 /// This function returns the identity element (or neutral element) for 584 /// the operation K. 585 Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, 586 Type *Tp) { 587 switch (K) { 588 case RK_IntegerXor: 589 case RK_IntegerAdd: 590 case RK_IntegerOr: 591 // Adding, Xoring, Oring zero to a number does not change it. 592 return ConstantInt::get(Tp, 0); 593 case RK_IntegerMult: 594 // Multiplying a number by 1 does not change it. 595 return ConstantInt::get(Tp, 1); 596 case RK_IntegerAnd: 597 // AND-ing a number with an all-1 value does not change it. 598 return ConstantInt::get(Tp, -1, true); 599 case RK_FloatMult: 600 // Multiplying a number by 1 does not change it. 601 return ConstantFP::get(Tp, 1.0L); 602 case RK_FloatAdd: 603 // Adding zero to a number does not change it. 604 return ConstantFP::get(Tp, 0.0L); 605 default: 606 llvm_unreachable("Unknown recurrence kind"); 607 } 608 } 609 610 /// This function translates the recurrence kind to an LLVM binary operator. 611 unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { 612 switch (Kind) { 613 case RK_IntegerAdd: 614 return Instruction::Add; 615 case RK_IntegerMult: 616 return Instruction::Mul; 617 case RK_IntegerOr: 618 return Instruction::Or; 619 case RK_IntegerAnd: 620 return Instruction::And; 621 case RK_IntegerXor: 622 return Instruction::Xor; 623 case RK_FloatMult: 624 return Instruction::FMul; 625 case RK_FloatAdd: 626 return Instruction::FAdd; 627 case RK_IntegerMinMax: 628 return Instruction::ICmp; 629 case RK_FloatMinMax: 630 return Instruction::FCmp; 631 default: 632 llvm_unreachable("Unknown recurrence operation"); 633 } 634 } 635 636 Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, 637 MinMaxRecurrenceKind RK, 638 Value *Left, Value *Right) { 639 CmpInst::Predicate P = CmpInst::ICMP_NE; 640 switch (RK) { 641 default: 642 llvm_unreachable("Unknown min/max recurrence kind"); 643 case MRK_UIntMin: 644 P = CmpInst::ICMP_ULT; 645 break; 646 case MRK_UIntMax: 647 P = CmpInst::ICMP_UGT; 648 break; 649 case MRK_SIntMin: 650 P = CmpInst::ICMP_SLT; 651 break; 652 case MRK_SIntMax: 653 P = CmpInst::ICMP_SGT; 654 break; 655 case MRK_FloatMin: 656 P = CmpInst::FCMP_OLT; 657 break; 658 case MRK_FloatMax: 659 P = CmpInst::FCMP_OGT; 660 break; 661 } 662 663 // We only match FP sequences that are 'fast', so we can unconditionally 664 // set it on any generated instructions. 665 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 666 FastMathFlags FMF; 667 FMF.setFast(); 668 Builder.setFastMathFlags(FMF); 669 670 Value *Cmp; 671 if (RK == MRK_FloatMin || RK == MRK_FloatMax) 672 Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); 673 else 674 Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); 675 676 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); 677 return Select; 678 } 679 680 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, 681 const SCEV *Step, BinaryOperator *BOp, 682 SmallVectorImpl<Instruction *> *Casts) 683 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { 684 assert(IK != IK_NoInduction && "Not an induction"); 685 686 // Start value type should match the induction kind and the value 687 // itself should not be null. 688 assert(StartValue && "StartValue is null"); 689 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && 690 "StartValue is not a pointer for pointer induction"); 691 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && 692 "StartValue is not an integer for integer induction"); 693 694 // Check the Step Value. It should be non-zero integer value. 695 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && 696 "Step value is zero"); 697 698 assert((IK != IK_PtrInduction || getConstIntStepValue()) && 699 "Step value should be constant for pointer induction"); 700 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && 701 "StepValue is not an integer"); 702 703 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && 704 "StepValue is not FP for FpInduction"); 705 assert((IK != IK_FpInduction || (InductionBinOp && 706 (InductionBinOp->getOpcode() == Instruction::FAdd || 707 InductionBinOp->getOpcode() == Instruction::FSub))) && 708 "Binary opcode should be specified for FP induction"); 709 710 if (Casts) { 711 for (auto &Inst : *Casts) { 712 RedundantCasts.push_back(Inst); 713 } 714 } 715 } 716 717 int InductionDescriptor::getConsecutiveDirection() const { 718 ConstantInt *ConstStep = getConstIntStepValue(); 719 if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) 720 return ConstStep->getSExtValue(); 721 return 0; 722 } 723 724 ConstantInt *InductionDescriptor::getConstIntStepValue() const { 725 if (isa<SCEVConstant>(Step)) 726 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); 727 return nullptr; 728 } 729 730 Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, 731 ScalarEvolution *SE, 732 const DataLayout& DL) const { 733 734 SCEVExpander Exp(*SE, DL, "induction"); 735 assert(Index->getType() == Step->getType() && 736 "Index type does not match StepValue type"); 737 switch (IK) { 738 case IK_IntInduction: { 739 assert(Index->getType() == StartValue->getType() && 740 "Index type does not match StartValue type"); 741 742 // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution 743 // and calculate (Start + Index * Step) for all cases, without 744 // special handling for "isOne" and "isMinusOne". 745 // But in the real life the result code getting worse. We mix SCEV 746 // expressions and ADD/SUB operations and receive redundant 747 // intermediate values being calculated in different ways and 748 // Instcombine is unable to reduce them all. 749 750 if (getConstIntStepValue() && 751 getConstIntStepValue()->isMinusOne()) 752 return B.CreateSub(StartValue, Index); 753 if (getConstIntStepValue() && 754 getConstIntStepValue()->isOne()) 755 return B.CreateAdd(StartValue, Index); 756 const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), 757 SE->getMulExpr(Step, SE->getSCEV(Index))); 758 return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); 759 } 760 case IK_PtrInduction: { 761 assert(isa<SCEVConstant>(Step) && 762 "Expected constant step for pointer induction"); 763 const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); 764 Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); 765 return B.CreateGEP(nullptr, StartValue, Index); 766 } 767 case IK_FpInduction: { 768 assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); 769 assert(InductionBinOp && 770 (InductionBinOp->getOpcode() == Instruction::FAdd || 771 InductionBinOp->getOpcode() == Instruction::FSub) && 772 "Original bin op should be defined for FP induction"); 773 774 Value *StepValue = cast<SCEVUnknown>(Step)->getValue(); 775 776 // Floating point operations had to be 'fast' to enable the induction. 777 FastMathFlags Flags; 778 Flags.setFast(); 779 780 Value *MulExp = B.CreateFMul(StepValue, Index); 781 if (isa<Instruction>(MulExp)) 782 // We have to check, the MulExp may be a constant. 783 cast<Instruction>(MulExp)->setFastMathFlags(Flags); 784 785 Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue, 786 MulExp, "induction"); 787 if (isa<Instruction>(BOp)) 788 cast<Instruction>(BOp)->setFastMathFlags(Flags); 789 790 return BOp; 791 } 792 case IK_NoInduction: 793 return nullptr; 794 } 795 llvm_unreachable("invalid enum"); 796 } 797 798 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, 799 ScalarEvolution *SE, 800 InductionDescriptor &D) { 801 802 // Here we only handle FP induction variables. 803 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); 804 805 if (TheLoop->getHeader() != Phi->getParent()) 806 return false; 807 808 // The loop may have multiple entrances or multiple exits; we can analyze 809 // this phi if it has a unique entry value and a unique backedge value. 810 if (Phi->getNumIncomingValues() != 2) 811 return false; 812 Value *BEValue = nullptr, *StartValue = nullptr; 813 if (TheLoop->contains(Phi->getIncomingBlock(0))) { 814 BEValue = Phi->getIncomingValue(0); 815 StartValue = Phi->getIncomingValue(1); 816 } else { 817 assert(TheLoop->contains(Phi->getIncomingBlock(1)) && 818 "Unexpected Phi node in the loop"); 819 BEValue = Phi->getIncomingValue(1); 820 StartValue = Phi->getIncomingValue(0); 821 } 822 823 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); 824 if (!BOp) 825 return false; 826 827 Value *Addend = nullptr; 828 if (BOp->getOpcode() == Instruction::FAdd) { 829 if (BOp->getOperand(0) == Phi) 830 Addend = BOp->getOperand(1); 831 else if (BOp->getOperand(1) == Phi) 832 Addend = BOp->getOperand(0); 833 } else if (BOp->getOpcode() == Instruction::FSub) 834 if (BOp->getOperand(0) == Phi) 835 Addend = BOp->getOperand(1); 836 837 if (!Addend) 838 return false; 839 840 // The addend should be loop invariant 841 if (auto *I = dyn_cast<Instruction>(Addend)) 842 if (TheLoop->contains(I)) 843 return false; 844 845 // FP Step has unknown SCEV 846 const SCEV *Step = SE->getUnknown(Addend); 847 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); 848 return true; 849 } 850 851 /// This function is called when we suspect that the update-chain of a phi node 852 /// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, 853 /// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime 854 /// predicate P under which the SCEV expression for the phi can be the 855 /// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the 856 /// cast instructions that are involved in the update-chain of this induction. 857 /// A caller that adds the required runtime predicate can be free to drop these 858 /// cast instructions, and compute the phi using \p AR (instead of some scev 859 /// expression with casts). 860 /// 861 /// For example, without a predicate the scev expression can take the following 862 /// form: 863 /// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) 864 /// 865 /// It corresponds to the following IR sequence: 866 /// %for.body: 867 /// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] 868 /// %casted_phi = "ExtTrunc i64 %x" 869 /// %add = add i64 %casted_phi, %step 870 /// 871 /// where %x is given in \p PN, 872 /// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate, 873 /// and the IR sequence that "ExtTrunc i64 %x" represents can take one of 874 /// several forms, for example, such as: 875 /// ExtTrunc1: %casted_phi = and %x, 2^n-1 876 /// or: 877 /// ExtTrunc2: %t = shl %x, m 878 /// %casted_phi = ashr %t, m 879 /// 880 /// If we are able to find such sequence, we return the instructions 881 /// we found, namely %casted_phi and the instructions on its use-def chain up 882 /// to the phi (not including the phi). 883 bool getCastsForInductionPHI( 884 PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev, 885 const SCEVAddRecExpr *AR, SmallVectorImpl<Instruction *> &CastInsts) { 886 887 assert(CastInsts.empty() && "CastInsts is expected to be empty."); 888 auto *PN = cast<PHINode>(PhiScev->getValue()); 889 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); 890 const Loop *L = AR->getLoop(); 891 892 // Find any cast instructions that participate in the def-use chain of 893 // PhiScev in the loop. 894 // FORNOW/TODO: We currently expect the def-use chain to include only 895 // two-operand instructions, where one of the operands is an invariant. 896 // createAddRecFromPHIWithCasts() currently does not support anything more 897 // involved than that, so we keep the search simple. This can be 898 // extended/generalized as needed. 899 900 auto getDef = [&](const Value *Val) -> Value * { 901 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val); 902 if (!BinOp) 903 return nullptr; 904 Value *Op0 = BinOp->getOperand(0); 905 Value *Op1 = BinOp->getOperand(1); 906 Value *Def = nullptr; 907 if (L->isLoopInvariant(Op0)) 908 Def = Op1; 909 else if (L->isLoopInvariant(Op1)) 910 Def = Op0; 911 return Def; 912 }; 913 914 // Look for the instruction that defines the induction via the 915 // loop backedge. 916 BasicBlock *Latch = L->getLoopLatch(); 917 if (!Latch) 918 return false; 919 Value *Val = PN->getIncomingValueForBlock(Latch); 920 if (!Val) 921 return false; 922 923 // Follow the def-use chain until the induction phi is reached. 924 // If on the way we encounter a Value that has the same SCEV Expr as the 925 // phi node, we can consider the instructions we visit from that point 926 // as part of the cast-sequence that can be ignored. 927 bool InCastSequence = false; 928 auto *Inst = dyn_cast<Instruction>(Val); 929 while (Val != PN) { 930 // If we encountered a phi node other than PN, or if we left the loop, 931 // we bail out. 932 if (!Inst || !L->contains(Inst)) { 933 return false; 934 } 935 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val)); 936 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR)) 937 InCastSequence = true; 938 if (InCastSequence) { 939 // Only the last instruction in the cast sequence is expected to have 940 // uses outside the induction def-use chain. 941 if (!CastInsts.empty()) 942 if (!Inst->hasOneUse()) 943 return false; 944 CastInsts.push_back(Inst); 945 } 946 Val = getDef(Val); 947 if (!Val) 948 return false; 949 Inst = dyn_cast<Instruction>(Val); 950 } 951 952 return InCastSequence; 953 } 954 955 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 956 PredicatedScalarEvolution &PSE, 957 InductionDescriptor &D, 958 bool Assume) { 959 Type *PhiTy = Phi->getType(); 960 961 // Handle integer and pointer inductions variables. 962 // Now we handle also FP induction but not trying to make a 963 // recurrent expression from the PHI node in-place. 964 965 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && 966 !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) 967 return false; 968 969 if (PhiTy->isFloatingPointTy()) 970 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); 971 972 const SCEV *PhiScev = PSE.getSCEV(Phi); 973 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 974 975 // We need this expression to be an AddRecExpr. 976 if (Assume && !AR) 977 AR = PSE.getAsAddRec(Phi); 978 979 if (!AR) { 980 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 981 return false; 982 } 983 984 // Record any Cast instructions that participate in the induction update 985 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev); 986 // If we started from an UnknownSCEV, and managed to build an addRecurrence 987 // only after enabling Assume with PSCEV, this means we may have encountered 988 // cast instructions that required adding a runtime check in order to 989 // guarantee the correctness of the AddRecurence respresentation of the 990 // induction. 991 if (PhiScev != AR && SymbolicPhi) { 992 SmallVector<Instruction *, 2> Casts; 993 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts)) 994 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts); 995 } 996 997 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); 998 } 999 1000 bool InductionDescriptor::isInductionPHI( 1001 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, 1002 InductionDescriptor &D, const SCEV *Expr, 1003 SmallVectorImpl<Instruction *> *CastsToIgnore) { 1004 Type *PhiTy = Phi->getType(); 1005 // We only handle integer and pointer inductions variables. 1006 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) 1007 return false; 1008 1009 // Check that the PHI is consecutive. 1010 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); 1011 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1012 1013 if (!AR) { 1014 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1015 return false; 1016 } 1017 1018 if (AR->getLoop() != TheLoop) { 1019 // FIXME: We should treat this as a uniform. Unfortunately, we 1020 // don't currently know how to handled uniform PHIs. 1021 DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); 1022 return false; 1023 } 1024 1025 Value *StartValue = 1026 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); 1027 const SCEV *Step = AR->getStepRecurrence(*SE); 1028 // Calculate the pointer stride and check if it is consecutive. 1029 // The stride may be a constant or a loop invariant integer value. 1030 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); 1031 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) 1032 return false; 1033 1034 if (PhiTy->isIntegerTy()) { 1035 D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr, 1036 CastsToIgnore); 1037 return true; 1038 } 1039 1040 assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); 1041 // Pointer induction should be a constant. 1042 if (!ConstStep) 1043 return false; 1044 1045 ConstantInt *CV = ConstStep->getValue(); 1046 Type *PointerElementType = PhiTy->getPointerElementType(); 1047 // The pointer stride cannot be determined if the pointer element type is not 1048 // sized. 1049 if (!PointerElementType->isSized()) 1050 return false; 1051 1052 const DataLayout &DL = Phi->getModule()->getDataLayout(); 1053 int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); 1054 if (!Size) 1055 return false; 1056 1057 int64_t CVSize = CV->getSExtValue(); 1058 if (CVSize % Size) 1059 return false; 1060 auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, 1061 true /* signed */); 1062 D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); 1063 return true; 1064 } 1065 1066 bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, 1067 bool PreserveLCSSA) { 1068 bool Changed = false; 1069 1070 // We re-use a vector for the in-loop predecesosrs. 1071 SmallVector<BasicBlock *, 4> InLoopPredecessors; 1072 1073 auto RewriteExit = [&](BasicBlock *BB) { 1074 assert(InLoopPredecessors.empty() && 1075 "Must start with an empty predecessors list!"); 1076 auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); 1077 1078 // See if there are any non-loop predecessors of this exit block and 1079 // keep track of the in-loop predecessors. 1080 bool IsDedicatedExit = true; 1081 for (auto *PredBB : predecessors(BB)) 1082 if (L->contains(PredBB)) { 1083 if (isa<IndirectBrInst>(PredBB->getTerminator())) 1084 // We cannot rewrite exiting edges from an indirectbr. 1085 return false; 1086 1087 InLoopPredecessors.push_back(PredBB); 1088 } else { 1089 IsDedicatedExit = false; 1090 } 1091 1092 assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); 1093 1094 // Nothing to do if this is already a dedicated exit. 1095 if (IsDedicatedExit) 1096 return false; 1097 1098 auto *NewExitBB = SplitBlockPredecessors( 1099 BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA); 1100 1101 if (!NewExitBB) 1102 DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: " 1103 << *L << "\n"); 1104 else 1105 DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " 1106 << NewExitBB->getName() << "\n"); 1107 return true; 1108 }; 1109 1110 // Walk the exit blocks directly rather than building up a data structure for 1111 // them, but only visit each one once. 1112 SmallPtrSet<BasicBlock *, 4> Visited; 1113 for (auto *BB : L->blocks()) 1114 for (auto *SuccBB : successors(BB)) { 1115 // We're looking for exit blocks so skip in-loop successors. 1116 if (L->contains(SuccBB)) 1117 continue; 1118 1119 // Visit each exit block exactly once. 1120 if (!Visited.insert(SuccBB).second) 1121 continue; 1122 1123 Changed |= RewriteExit(SuccBB); 1124 } 1125 1126 return Changed; 1127 } 1128 1129 /// \brief Returns the instructions that use values defined in the loop. 1130 SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { 1131 SmallVector<Instruction *, 8> UsedOutside; 1132 1133 for (auto *Block : L->getBlocks()) 1134 // FIXME: I believe that this could use copy_if if the Inst reference could 1135 // be adapted into a pointer. 1136 for (auto &Inst : *Block) { 1137 auto Users = Inst.users(); 1138 if (any_of(Users, [&](User *U) { 1139 auto *Use = cast<Instruction>(U); 1140 return !L->contains(Use->getParent()); 1141 })) 1142 UsedOutside.push_back(&Inst); 1143 } 1144 1145 return UsedOutside; 1146 } 1147 1148 void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { 1149 // By definition, all loop passes need the LoopInfo analysis and the 1150 // Dominator tree it depends on. Because they all participate in the loop 1151 // pass manager, they must also preserve these. 1152 AU.addRequired<DominatorTreeWrapperPass>(); 1153 AU.addPreserved<DominatorTreeWrapperPass>(); 1154 AU.addRequired<LoopInfoWrapperPass>(); 1155 AU.addPreserved<LoopInfoWrapperPass>(); 1156 1157 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs 1158 // here because users shouldn't directly get them from this header. 1159 extern char &LoopSimplifyID; 1160 extern char &LCSSAID; 1161 AU.addRequiredID(LoopSimplifyID); 1162 AU.addPreservedID(LoopSimplifyID); 1163 AU.addRequiredID(LCSSAID); 1164 AU.addPreservedID(LCSSAID); 1165 // This is used in the LPPassManager to perform LCSSA verification on passes 1166 // which preserve lcssa form 1167 AU.addRequired<LCSSAVerificationPass>(); 1168 AU.addPreserved<LCSSAVerificationPass>(); 1169 1170 // Loop passes are designed to run inside of a loop pass manager which means 1171 // that any function analyses they require must be required by the first loop 1172 // pass in the manager (so that it is computed before the loop pass manager 1173 // runs) and preserved by all loop pasess in the manager. To make this 1174 // reasonably robust, the set needed for most loop passes is maintained here. 1175 // If your loop pass requires an analysis not listed here, you will need to 1176 // carefully audit the loop pass manager nesting structure that results. 1177 AU.addRequired<AAResultsWrapperPass>(); 1178 AU.addPreserved<AAResultsWrapperPass>(); 1179 AU.addPreserved<BasicAAWrapperPass>(); 1180 AU.addPreserved<GlobalsAAWrapperPass>(); 1181 AU.addPreserved<SCEVAAWrapperPass>(); 1182 AU.addRequired<ScalarEvolutionWrapperPass>(); 1183 AU.addPreserved<ScalarEvolutionWrapperPass>(); 1184 } 1185 1186 /// Manually defined generic "LoopPass" dependency initialization. This is used 1187 /// to initialize the exact set of passes from above in \c 1188 /// getLoopAnalysisUsage. It can be used within a loop pass's initialization 1189 /// with: 1190 /// 1191 /// INITIALIZE_PASS_DEPENDENCY(LoopPass) 1192 /// 1193 /// As-if "LoopPass" were a pass. 1194 void llvm::initializeLoopPassPass(PassRegistry &Registry) { 1195 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1196 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1197 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 1198 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 1199 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1200 INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) 1201 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 1202 INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) 1203 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 1204 } 1205 1206 /// \brief Find string metadata for loop 1207 /// 1208 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an 1209 /// operand or null otherwise. If the string metadata is not found return 1210 /// Optional's not-a-value. 1211 Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, 1212 StringRef Name) { 1213 MDNode *LoopID = TheLoop->getLoopID(); 1214 // Return none if LoopID is false. 1215 if (!LoopID) 1216 return None; 1217 1218 // First operand should refer to the loop id itself. 1219 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 1220 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 1221 1222 // Iterate over LoopID operands and look for MDString Metadata 1223 for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { 1224 MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); 1225 if (!MD) 1226 continue; 1227 MDString *S = dyn_cast<MDString>(MD->getOperand(0)); 1228 if (!S) 1229 continue; 1230 // Return true if MDString holds expected MetaData. 1231 if (Name.equals(S->getString())) 1232 switch (MD->getNumOperands()) { 1233 case 1: 1234 return nullptr; 1235 case 2: 1236 return &MD->getOperand(1); 1237 default: 1238 llvm_unreachable("loop metadata has 0 or 1 operand"); 1239 } 1240 } 1241 return None; 1242 } 1243 1244 /// Does a BFS from a given node to all of its children inside a given loop. 1245 /// The returned vector of nodes includes the starting point. 1246 SmallVector<DomTreeNode *, 16> 1247 llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { 1248 SmallVector<DomTreeNode *, 16> Worklist; 1249 auto AddRegionToWorklist = [&](DomTreeNode *DTN) { 1250 // Only include subregions in the top level loop. 1251 BasicBlock *BB = DTN->getBlock(); 1252 if (CurLoop->contains(BB)) 1253 Worklist.push_back(DTN); 1254 }; 1255 1256 AddRegionToWorklist(N); 1257 1258 for (size_t I = 0; I < Worklist.size(); I++) 1259 for (DomTreeNode *Child : Worklist[I]->getChildren()) 1260 AddRegionToWorklist(Child); 1261 1262 return Worklist; 1263 } 1264 1265 void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, 1266 ScalarEvolution *SE = nullptr, 1267 LoopInfo *LI = nullptr) { 1268 assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!"); 1269 auto *Preheader = L->getLoopPreheader(); 1270 assert(Preheader && "Preheader should exist!"); 1271 1272 // Now that we know the removal is safe, remove the loop by changing the 1273 // branch from the preheader to go to the single exit block. 1274 // 1275 // Because we're deleting a large chunk of code at once, the sequence in which 1276 // we remove things is very important to avoid invalidation issues. 1277 1278 // Tell ScalarEvolution that the loop is deleted. Do this before 1279 // deleting the loop so that ScalarEvolution can look at the loop 1280 // to determine what it needs to clean up. 1281 if (SE) 1282 SE->forgetLoop(L); 1283 1284 auto *ExitBlock = L->getUniqueExitBlock(); 1285 assert(ExitBlock && "Should have a unique exit block!"); 1286 assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); 1287 1288 auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator()); 1289 assert(OldBr && "Preheader must end with a branch"); 1290 assert(OldBr->isUnconditional() && "Preheader must have a single successor"); 1291 // Connect the preheader to the exit block. Keep the old edge to the header 1292 // around to perform the dominator tree update in two separate steps 1293 // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge 1294 // preheader -> header. 1295 // 1296 // 1297 // 0. Preheader 1. Preheader 2. Preheader 1298 // | | | | 1299 // V | V | 1300 // Header <--\ | Header <--\ | Header <--\ 1301 // | | | | | | | | | | | 1302 // | V | | | V | | | V | 1303 // | Body --/ | | Body --/ | | Body --/ 1304 // V V V V V 1305 // Exit Exit Exit 1306 // 1307 // By doing this is two separate steps we can perform the dominator tree 1308 // update without using the batch update API. 1309 // 1310 // Even when the loop is never executed, we cannot remove the edge from the 1311 // source block to the exit block. Consider the case where the unexecuted loop 1312 // branches back to an outer loop. If we deleted the loop and removed the edge 1313 // coming to this inner loop, this will break the outer loop structure (by 1314 // deleting the backedge of the outer loop). If the outer loop is indeed a 1315 // non-loop, it will be deleted in a future iteration of loop deletion pass. 1316 IRBuilder<> Builder(OldBr); 1317 Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); 1318 // Remove the old branch. The conditional branch becomes a new terminator. 1319 OldBr->eraseFromParent(); 1320 1321 // Rewrite phis in the exit block to get their inputs from the Preheader 1322 // instead of the exiting block. 1323 BasicBlock::iterator BI = ExitBlock->begin(); 1324 while (PHINode *P = dyn_cast<PHINode>(BI)) { 1325 // Set the zero'th element of Phi to be from the preheader and remove all 1326 // other incoming values. Given the loop has dedicated exits, all other 1327 // incoming values must be from the exiting blocks. 1328 int PredIndex = 0; 1329 P->setIncomingBlock(PredIndex, Preheader); 1330 // Removes all incoming values from all other exiting blocks (including 1331 // duplicate values from an exiting block). 1332 // Nuke all entries except the zero'th entry which is the preheader entry. 1333 // NOTE! We need to remove Incoming Values in the reverse order as done 1334 // below, to keep the indices valid for deletion (removeIncomingValues 1335 // updates getNumIncomingValues and shifts all values down into the operand 1336 // being deleted). 1337 for (unsigned i = 0, e = P->getNumIncomingValues() - 1; i != e; ++i) 1338 P->removeIncomingValue(e - i, false); 1339 1340 assert((P->getNumIncomingValues() == 1 && 1341 P->getIncomingBlock(PredIndex) == Preheader) && 1342 "Should have exactly one value and that's from the preheader!"); 1343 ++BI; 1344 } 1345 1346 // Disconnect the loop body by branching directly to its exit. 1347 Builder.SetInsertPoint(Preheader->getTerminator()); 1348 Builder.CreateBr(ExitBlock); 1349 // Remove the old branch. 1350 Preheader->getTerminator()->eraseFromParent(); 1351 1352 if (DT) { 1353 // Update the dominator tree by informing it about the new edge from the 1354 // preheader to the exit. 1355 DT->insertEdge(Preheader, ExitBlock); 1356 // Inform the dominator tree about the removed edge. 1357 DT->deleteEdge(Preheader, L->getHeader()); 1358 } 1359 1360 // Remove the block from the reference counting scheme, so that we can 1361 // delete it freely later. 1362 for (auto *Block : L->blocks()) 1363 Block->dropAllReferences(); 1364 1365 if (LI) { 1366 // Erase the instructions and the blocks without having to worry 1367 // about ordering because we already dropped the references. 1368 // NOTE: This iteration is safe because erasing the block does not remove 1369 // its entry from the loop's block list. We do that in the next section. 1370 for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end(); 1371 LpI != LpE; ++LpI) 1372 (*LpI)->eraseFromParent(); 1373 1374 // Finally, the blocks from loopinfo. This has to happen late because 1375 // otherwise our loop iterators won't work. 1376 1377 SmallPtrSet<BasicBlock *, 8> blocks; 1378 blocks.insert(L->block_begin(), L->block_end()); 1379 for (BasicBlock *BB : blocks) 1380 LI->removeBlock(BB); 1381 1382 // The last step is to update LoopInfo now that we've eliminated this loop. 1383 LI->erase(L); 1384 } 1385 } 1386 1387 /// Returns true if the instruction in a loop is guaranteed to execute at least 1388 /// once. 1389 bool llvm::isGuaranteedToExecute(const Instruction &Inst, 1390 const DominatorTree *DT, const Loop *CurLoop, 1391 const LoopSafetyInfo *SafetyInfo) { 1392 // We have to check to make sure that the instruction dominates all 1393 // of the exit blocks. If it doesn't, then there is a path out of the loop 1394 // which does not execute this instruction, so we can't hoist it. 1395 1396 // If the instruction is in the header block for the loop (which is very 1397 // common), it is always guaranteed to dominate the exit blocks. Since this 1398 // is a common case, and can save some work, check it now. 1399 if (Inst.getParent() == CurLoop->getHeader()) 1400 // If there's a throw in the header block, we can't guarantee we'll reach 1401 // Inst. 1402 return !SafetyInfo->HeaderMayThrow; 1403 1404 // Somewhere in this loop there is an instruction which may throw and make us 1405 // exit the loop. 1406 if (SafetyInfo->MayThrow) 1407 return false; 1408 1409 // Get the exit blocks for the current loop. 1410 SmallVector<BasicBlock *, 8> ExitBlocks; 1411 CurLoop->getExitBlocks(ExitBlocks); 1412 1413 // Verify that the block dominates each of the exit blocks of the loop. 1414 for (BasicBlock *ExitBlock : ExitBlocks) 1415 if (!DT->dominates(Inst.getParent(), ExitBlock)) 1416 return false; 1417 1418 // As a degenerate case, if the loop is statically infinite then we haven't 1419 // proven anything since there are no exit blocks. 1420 if (ExitBlocks.empty()) 1421 return false; 1422 1423 // FIXME: In general, we have to prove that the loop isn't an infinite loop. 1424 // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is 1425 // just a special case of this.) 1426 return true; 1427 } 1428 1429 Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { 1430 // Only support loops with a unique exiting block, and a latch. 1431 if (!L->getExitingBlock()) 1432 return None; 1433 1434 // Get the branch weights for the the loop's backedge. 1435 BranchInst *LatchBR = 1436 dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator()); 1437 if (!LatchBR || LatchBR->getNumSuccessors() != 2) 1438 return None; 1439 1440 assert((LatchBR->getSuccessor(0) == L->getHeader() || 1441 LatchBR->getSuccessor(1) == L->getHeader()) && 1442 "At least one edge out of the latch must go to the header"); 1443 1444 // To estimate the number of times the loop body was executed, we want to 1445 // know the number of times the backedge was taken, vs. the number of times 1446 // we exited the loop. 1447 uint64_t TrueVal, FalseVal; 1448 if (!LatchBR->extractProfMetadata(TrueVal, FalseVal)) 1449 return None; 1450 1451 if (!TrueVal || !FalseVal) 1452 return 0; 1453 1454 // Divide the count of the backedge by the count of the edge exiting the loop, 1455 // rounding to nearest. 1456 if (LatchBR->getSuccessor(0) == L->getHeader()) 1457 return (TrueVal + (FalseVal / 2)) / FalseVal; 1458 else 1459 return (FalseVal + (TrueVal / 2)) / TrueVal; 1460 } 1461 1462 /// \brief Adds a 'fast' flag to floating point operations. 1463 static Value *addFastMathFlag(Value *V) { 1464 if (isa<FPMathOperator>(V)) { 1465 FastMathFlags Flags; 1466 Flags.setFast(); 1467 cast<Instruction>(V)->setFastMathFlags(Flags); 1468 } 1469 return V; 1470 } 1471 1472 // Helper to generate a log2 shuffle reduction. 1473 Value * 1474 llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, 1475 RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, 1476 ArrayRef<Value *> RedOps) { 1477 unsigned VF = Src->getType()->getVectorNumElements(); 1478 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles 1479 // and vector ops, reducing the set of values being computed by half each 1480 // round. 1481 assert(isPowerOf2_32(VF) && 1482 "Reduction emission only supported for pow2 vectors!"); 1483 Value *TmpVec = Src; 1484 SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); 1485 for (unsigned i = VF; i != 1; i >>= 1) { 1486 // Move the upper half of the vector to the lower half. 1487 for (unsigned j = 0; j != i / 2; ++j) 1488 ShuffleMask[j] = Builder.getInt32(i / 2 + j); 1489 1490 // Fill the rest of the mask with undef. 1491 std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), 1492 UndefValue::get(Builder.getInt32Ty())); 1493 1494 Value *Shuf = Builder.CreateShuffleVector( 1495 TmpVec, UndefValue::get(TmpVec->getType()), 1496 ConstantVector::get(ShuffleMask), "rdx.shuf"); 1497 1498 if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 1499 // Floating point operations had to be 'fast' to enable the reduction. 1500 TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, 1501 TmpVec, Shuf, "bin.rdx")); 1502 } else { 1503 assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && 1504 "Invalid min/max"); 1505 TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec, 1506 Shuf); 1507 } 1508 if (!RedOps.empty()) 1509 propagateIRFlags(TmpVec, RedOps); 1510 } 1511 // The result is in the first element of the vector. 1512 return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); 1513 } 1514 1515 /// Create a simple vector reduction specified by an opcode and some 1516 /// flags (if generating min/max reductions). 1517 Value *llvm::createSimpleTargetReduction( 1518 IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, 1519 Value *Src, TargetTransformInfo::ReductionFlags Flags, 1520 ArrayRef<Value *> RedOps) { 1521 assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); 1522 1523 Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); 1524 std::function<Value*()> BuildFunc; 1525 using RD = RecurrenceDescriptor; 1526 RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; 1527 // TODO: Support creating ordered reductions. 1528 FastMathFlags FMFFast; 1529 FMFFast.setFast(); 1530 1531 switch (Opcode) { 1532 case Instruction::Add: 1533 BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; 1534 break; 1535 case Instruction::Mul: 1536 BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; 1537 break; 1538 case Instruction::And: 1539 BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; 1540 break; 1541 case Instruction::Or: 1542 BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; 1543 break; 1544 case Instruction::Xor: 1545 BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; 1546 break; 1547 case Instruction::FAdd: 1548 BuildFunc = [&]() { 1549 auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); 1550 cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); 1551 return Rdx; 1552 }; 1553 break; 1554 case Instruction::FMul: 1555 BuildFunc = [&]() { 1556 auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); 1557 cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); 1558 return Rdx; 1559 }; 1560 break; 1561 case Instruction::ICmp: 1562 if (Flags.IsMaxOp) { 1563 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; 1564 BuildFunc = [&]() { 1565 return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); 1566 }; 1567 } else { 1568 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; 1569 BuildFunc = [&]() { 1570 return Builder.CreateIntMinReduce(Src, Flags.IsSigned); 1571 }; 1572 } 1573 break; 1574 case Instruction::FCmp: 1575 if (Flags.IsMaxOp) { 1576 MinMaxKind = RD::MRK_FloatMax; 1577 BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; 1578 } else { 1579 MinMaxKind = RD::MRK_FloatMin; 1580 BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; 1581 } 1582 break; 1583 default: 1584 llvm_unreachable("Unhandled opcode"); 1585 break; 1586 } 1587 if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) 1588 return BuildFunc(); 1589 return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); 1590 } 1591 1592 /// Create a vector reduction using a given recurrence descriptor. 1593 Value *llvm::createTargetReduction(IRBuilder<> &B, 1594 const TargetTransformInfo *TTI, 1595 RecurrenceDescriptor &Desc, Value *Src, 1596 bool NoNaN) { 1597 // TODO: Support in-order reductions based on the recurrence descriptor. 1598 using RD = RecurrenceDescriptor; 1599 RD::RecurrenceKind RecKind = Desc.getRecurrenceKind(); 1600 TargetTransformInfo::ReductionFlags Flags; 1601 Flags.NoNaN = NoNaN; 1602 switch (RecKind) { 1603 case RD::RK_FloatAdd: 1604 return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags); 1605 case RD::RK_FloatMult: 1606 return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags); 1607 case RD::RK_IntegerAdd: 1608 return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags); 1609 case RD::RK_IntegerMult: 1610 return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags); 1611 case RD::RK_IntegerAnd: 1612 return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags); 1613 case RD::RK_IntegerOr: 1614 return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags); 1615 case RD::RK_IntegerXor: 1616 return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags); 1617 case RD::RK_IntegerMinMax: { 1618 RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); 1619 Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax); 1620 Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin); 1621 return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags); 1622 } 1623 case RD::RK_FloatMinMax: { 1624 Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax; 1625 return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags); 1626 } 1627 default: 1628 llvm_unreachable("Unhandled RecKind"); 1629 } 1630 } 1631 1632 void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { 1633 auto *VecOp = dyn_cast<Instruction>(I); 1634 if (!VecOp) 1635 return; 1636 auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0]) 1637 : dyn_cast<Instruction>(OpValue); 1638 if (!Intersection) 1639 return; 1640 const unsigned Opcode = Intersection->getOpcode(); 1641 VecOp->copyIRFlags(Intersection); 1642 for (auto *V : VL) { 1643 auto *Instr = dyn_cast<Instruction>(V); 1644 if (!Instr) 1645 continue; 1646 if (OpValue == nullptr || Opcode == Instr->getOpcode()) 1647 VecOp->andIRFlags(V); 1648 } 1649 } 1650