1 //===-- LoopUtils.cpp - Loop Utility functions -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines common loop utility functions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Utils/LoopUtils.h" 15 #include "llvm/ADT/ScopeExit.h" 16 #include "llvm/Analysis/AliasAnalysis.h" 17 #include "llvm/Analysis/BasicAliasAnalysis.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/LoopInfo.h" 20 #include "llvm/Analysis/LoopPass.h" 21 #include "llvm/Analysis/ScalarEvolution.h" 22 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 23 #include "llvm/Analysis/ScalarEvolutionExpander.h" 24 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 25 #include "llvm/Analysis/TargetTransformInfo.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Instructions.h" 28 #include "llvm/IR/Module.h" 29 #include "llvm/IR/PatternMatch.h" 30 #include "llvm/IR/ValueHandle.h" 31 #include "llvm/Pass.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 34 35 using namespace llvm; 36 using namespace llvm::PatternMatch; 37 38 #define DEBUG_TYPE "loop-utils" 39 40 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, 41 SmallPtrSetImpl<Instruction *> &Set) { 42 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) 43 if (!Set.count(dyn_cast<Instruction>(*Use))) 44 return false; 45 return true; 46 } 47 48 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { 49 switch (Kind) { 50 default: 51 break; 52 case RK_IntegerAdd: 53 case RK_IntegerMult: 54 case RK_IntegerOr: 55 case RK_IntegerAnd: 56 case RK_IntegerXor: 57 case RK_IntegerMinMax: 58 return true; 59 } 60 return false; 61 } 62 63 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { 64 return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); 65 } 66 67 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { 68 switch (Kind) { 69 default: 70 break; 71 case RK_IntegerAdd: 72 case RK_IntegerMult: 73 case RK_FloatAdd: 74 case RK_FloatMult: 75 return true; 76 } 77 return false; 78 } 79 80 Instruction * 81 RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT, 82 SmallPtrSetImpl<Instruction *> &Visited, 83 SmallPtrSetImpl<Instruction *> &CI) { 84 if (!Phi->hasOneUse()) 85 return Phi; 86 87 const APInt *M = nullptr; 88 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); 89 90 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT 91 // with a new integer type of the corresponding bit width. 92 if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { 93 int32_t Bits = (*M + 1).exactLogBase2(); 94 if (Bits > 0) { 95 RT = IntegerType::get(Phi->getContext(), Bits); 96 Visited.insert(Phi); 97 CI.insert(J); 98 return J; 99 } 100 } 101 return Phi; 102 } 103 104 bool RecurrenceDescriptor::getSourceExtensionKind( 105 Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned, 106 SmallPtrSetImpl<Instruction *> &Visited, 107 SmallPtrSetImpl<Instruction *> &CI) { 108 109 SmallVector<Instruction *, 8> Worklist; 110 bool FoundOneOperand = false; 111 unsigned DstSize = RT->getPrimitiveSizeInBits(); 112 Worklist.push_back(Exit); 113 114 // Traverse the instructions in the reduction expression, beginning with the 115 // exit value. 116 while (!Worklist.empty()) { 117 Instruction *I = Worklist.pop_back_val(); 118 for (Use &U : I->operands()) { 119 120 // Terminate the traversal if the operand is not an instruction, or we 121 // reach the starting value. 122 Instruction *J = dyn_cast<Instruction>(U.get()); 123 if (!J || J == Start) 124 continue; 125 126 // Otherwise, investigate the operation if it is also in the expression. 127 if (Visited.count(J)) { 128 Worklist.push_back(J); 129 continue; 130 } 131 132 // If the operand is not in Visited, it is not a reduction operation, but 133 // it does feed into one. Make sure it is either a single-use sign- or 134 // zero-extend instruction. 135 CastInst *Cast = dyn_cast<CastInst>(J); 136 bool IsSExtInst = isa<SExtInst>(J); 137 if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst)) 138 return false; 139 140 // Ensure the source type of the extend is no larger than the reduction 141 // type. It is not necessary for the types to be identical. 142 unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); 143 if (SrcSize > DstSize) 144 return false; 145 146 // Furthermore, ensure that all such extends are of the same kind. 147 if (FoundOneOperand) { 148 if (IsSigned != IsSExtInst) 149 return false; 150 } else { 151 FoundOneOperand = true; 152 IsSigned = IsSExtInst; 153 } 154 155 // Lastly, if the source type of the extend matches the reduction type, 156 // add the extend to CI so that we can avoid accounting for it in the 157 // cost model. 158 if (SrcSize == DstSize) 159 CI.insert(Cast); 160 } 161 } 162 return true; 163 } 164 165 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, 166 Loop *TheLoop, bool HasFunNoNaNAttr, 167 RecurrenceDescriptor &RedDes) { 168 if (Phi->getNumIncomingValues() != 2) 169 return false; 170 171 // Reduction variables are only found in the loop header block. 172 if (Phi->getParent() != TheLoop->getHeader()) 173 return false; 174 175 // Obtain the reduction start value from the value that comes from the loop 176 // preheader. 177 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); 178 179 // ExitInstruction is the single value which is used outside the loop. 180 // We only allow for a single reduction value to be used outside the loop. 181 // This includes users of the reduction, variables (which form a cycle 182 // which ends in the phi node). 183 Instruction *ExitInstruction = nullptr; 184 // Indicates that we found a reduction operation in our scan. 185 bool FoundReduxOp = false; 186 187 // We start with the PHI node and scan for all of the users of this 188 // instruction. All users must be instructions that can be used as reduction 189 // variables (such as ADD). We must have a single out-of-block user. The cycle 190 // must include the original PHI. 191 bool FoundStartPHI = false; 192 193 // To recognize min/max patterns formed by a icmp select sequence, we store 194 // the number of instruction we saw from the recognized min/max pattern, 195 // to make sure we only see exactly the two instructions. 196 unsigned NumCmpSelectPatternInst = 0; 197 InstDesc ReduxDesc(false, nullptr); 198 199 // Data used for determining if the recurrence has been type-promoted. 200 Type *RecurrenceType = Phi->getType(); 201 SmallPtrSet<Instruction *, 4> CastInsts; 202 Instruction *Start = Phi; 203 bool IsSigned = false; 204 205 SmallPtrSet<Instruction *, 8> VisitedInsts; 206 SmallVector<Instruction *, 8> Worklist; 207 208 // Return early if the recurrence kind does not match the type of Phi. If the 209 // recurrence kind is arithmetic, we attempt to look through AND operations 210 // resulting from the type promotion performed by InstCombine. Vector 211 // operations are not limited to the legal integer widths, so we may be able 212 // to evaluate the reduction in the narrower width. 213 if (RecurrenceType->isFloatingPointTy()) { 214 if (!isFloatingPointRecurrenceKind(Kind)) 215 return false; 216 } else { 217 if (!isIntegerRecurrenceKind(Kind)) 218 return false; 219 if (isArithmeticRecurrenceKind(Kind)) 220 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); 221 } 222 223 Worklist.push_back(Start); 224 VisitedInsts.insert(Start); 225 226 // A value in the reduction can be used: 227 // - By the reduction: 228 // - Reduction operation: 229 // - One use of reduction value (safe). 230 // - Multiple use of reduction value (not safe). 231 // - PHI: 232 // - All uses of the PHI must be the reduction (safe). 233 // - Otherwise, not safe. 234 // - By instructions outside of the loop (safe). 235 // * One value may have several outside users, but all outside 236 // uses must be of the same value. 237 // - By an instruction that is not part of the reduction (not safe). 238 // This is either: 239 // * An instruction type other than PHI or the reduction operation. 240 // * A PHI in the header other than the initial PHI. 241 while (!Worklist.empty()) { 242 Instruction *Cur = Worklist.back(); 243 Worklist.pop_back(); 244 245 // No Users. 246 // If the instruction has no users then this is a broken chain and can't be 247 // a reduction variable. 248 if (Cur->use_empty()) 249 return false; 250 251 bool IsAPhi = isa<PHINode>(Cur); 252 253 // A header PHI use other than the original PHI. 254 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) 255 return false; 256 257 // Reductions of instructions such as Div, and Sub is only possible if the 258 // LHS is the reduction variable. 259 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && 260 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && 261 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) 262 return false; 263 264 // Any reduction instruction must be of one of the allowed kinds. We ignore 265 // the starting value (the Phi or an AND instruction if the Phi has been 266 // type-promoted). 267 if (Cur != Start) { 268 ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); 269 if (!ReduxDesc.isRecurrence()) 270 return false; 271 } 272 273 // A reduction operation must only have one use of the reduction value. 274 if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && 275 hasMultipleUsesOf(Cur, VisitedInsts)) 276 return false; 277 278 // All inputs to a PHI node must be a reduction value. 279 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) 280 return false; 281 282 if (Kind == RK_IntegerMinMax && 283 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) 284 ++NumCmpSelectPatternInst; 285 if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) 286 ++NumCmpSelectPatternInst; 287 288 // Check whether we found a reduction operator. 289 FoundReduxOp |= !IsAPhi && Cur != Start; 290 291 // Process users of current instruction. Push non-PHI nodes after PHI nodes 292 // onto the stack. This way we are going to have seen all inputs to PHI 293 // nodes once we get to them. 294 SmallVector<Instruction *, 8> NonPHIs; 295 SmallVector<Instruction *, 8> PHIs; 296 for (User *U : Cur->users()) { 297 Instruction *UI = cast<Instruction>(U); 298 299 // Check if we found the exit user. 300 BasicBlock *Parent = UI->getParent(); 301 if (!TheLoop->contains(Parent)) { 302 // If we already know this instruction is used externally, move on to 303 // the next user. 304 if (ExitInstruction == Cur) 305 continue; 306 307 // Exit if you find multiple values used outside or if the header phi 308 // node is being used. In this case the user uses the value of the 309 // previous iteration, in which case we would loose "VF-1" iterations of 310 // the reduction operation if we vectorize. 311 if (ExitInstruction != nullptr || Cur == Phi) 312 return false; 313 314 // The instruction used by an outside user must be the last instruction 315 // before we feed back to the reduction phi. Otherwise, we loose VF-1 316 // operations on the value. 317 if (!is_contained(Phi->operands(), Cur)) 318 return false; 319 320 ExitInstruction = Cur; 321 continue; 322 } 323 324 // Process instructions only once (termination). Each reduction cycle 325 // value must only be used once, except by phi nodes and min/max 326 // reductions which are represented as a cmp followed by a select. 327 InstDesc IgnoredVal(false, nullptr); 328 if (VisitedInsts.insert(UI).second) { 329 if (isa<PHINode>(UI)) 330 PHIs.push_back(UI); 331 else 332 NonPHIs.push_back(UI); 333 } else if (!isa<PHINode>(UI) && 334 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && 335 !isa<SelectInst>(UI)) || 336 !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) 337 return false; 338 339 // Remember that we completed the cycle. 340 if (UI == Phi) 341 FoundStartPHI = true; 342 } 343 Worklist.append(PHIs.begin(), PHIs.end()); 344 Worklist.append(NonPHIs.begin(), NonPHIs.end()); 345 } 346 347 // This means we have seen one but not the other instruction of the 348 // pattern or more than just a select and cmp. 349 if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && 350 NumCmpSelectPatternInst != 2) 351 return false; 352 353 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) 354 return false; 355 356 // If we think Phi may have been type-promoted, we also need to ensure that 357 // all source operands of the reduction are either SExtInsts or ZEstInsts. If 358 // so, we will be able to evaluate the reduction in the narrower bit width. 359 if (Start != Phi) 360 if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType, 361 IsSigned, VisitedInsts, CastInsts)) 362 return false; 363 364 // We found a reduction var if we have reached the original phi node and we 365 // only have a single instruction with out-of-loop users. 366 367 // The ExitInstruction(Instruction which is allowed to have out-of-loop users) 368 // is saved as part of the RecurrenceDescriptor. 369 370 // Save the description of this reduction variable. 371 RecurrenceDescriptor RD( 372 RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), 373 ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); 374 RedDes = RD; 375 376 return true; 377 } 378 379 /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction 380 /// pattern corresponding to a min(X, Y) or max(X, Y). 381 RecurrenceDescriptor::InstDesc 382 RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { 383 384 assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && 385 "Expect a select instruction"); 386 Instruction *Cmp = nullptr; 387 SelectInst *Select = nullptr; 388 389 // We must handle the select(cmp()) as a single instruction. Advance to the 390 // select. 391 if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) { 392 if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin()))) 393 return InstDesc(false, I); 394 return InstDesc(Select, Prev.getMinMaxKind()); 395 } 396 397 // Only handle single use cases for now. 398 if (!(Select = dyn_cast<SelectInst>(I))) 399 return InstDesc(false, I); 400 if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && 401 !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) 402 return InstDesc(false, I); 403 if (!Cmp->hasOneUse()) 404 return InstDesc(false, I); 405 406 Value *CmpLeft; 407 Value *CmpRight; 408 409 // Look for a min/max pattern. 410 if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 411 return InstDesc(Select, MRK_UIntMin); 412 else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 413 return InstDesc(Select, MRK_UIntMax); 414 else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 415 return InstDesc(Select, MRK_SIntMax); 416 else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 417 return InstDesc(Select, MRK_SIntMin); 418 else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 419 return InstDesc(Select, MRK_FloatMin); 420 else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 421 return InstDesc(Select, MRK_FloatMax); 422 else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 423 return InstDesc(Select, MRK_FloatMin); 424 else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 425 return InstDesc(Select, MRK_FloatMax); 426 427 return InstDesc(false, I); 428 } 429 430 RecurrenceDescriptor::InstDesc 431 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, 432 InstDesc &Prev, bool HasFunNoNaNAttr) { 433 bool FP = I->getType()->isFloatingPointTy(); 434 Instruction *UAI = Prev.getUnsafeAlgebraInst(); 435 if (!UAI && FP && !I->hasUnsafeAlgebra()) 436 UAI = I; // Found an unsafe (unvectorizable) algebra instruction. 437 438 switch (I->getOpcode()) { 439 default: 440 return InstDesc(false, I); 441 case Instruction::PHI: 442 return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); 443 case Instruction::Sub: 444 case Instruction::Add: 445 return InstDesc(Kind == RK_IntegerAdd, I); 446 case Instruction::Mul: 447 return InstDesc(Kind == RK_IntegerMult, I); 448 case Instruction::And: 449 return InstDesc(Kind == RK_IntegerAnd, I); 450 case Instruction::Or: 451 return InstDesc(Kind == RK_IntegerOr, I); 452 case Instruction::Xor: 453 return InstDesc(Kind == RK_IntegerXor, I); 454 case Instruction::FMul: 455 return InstDesc(Kind == RK_FloatMult, I, UAI); 456 case Instruction::FSub: 457 case Instruction::FAdd: 458 return InstDesc(Kind == RK_FloatAdd, I, UAI); 459 case Instruction::FCmp: 460 case Instruction::ICmp: 461 case Instruction::Select: 462 if (Kind != RK_IntegerMinMax && 463 (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) 464 return InstDesc(false, I); 465 return isMinMaxSelectCmpPattern(I, Prev); 466 } 467 } 468 469 bool RecurrenceDescriptor::hasMultipleUsesOf( 470 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) { 471 unsigned NumUses = 0; 472 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; 473 ++Use) { 474 if (Insts.count(dyn_cast<Instruction>(*Use))) 475 ++NumUses; 476 if (NumUses > 1) 477 return true; 478 } 479 480 return false; 481 } 482 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, 483 RecurrenceDescriptor &RedDes) { 484 485 BasicBlock *Header = TheLoop->getHeader(); 486 Function &F = *Header->getParent(); 487 bool HasFunNoNaNAttr = 488 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 489 490 if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { 491 DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); 492 return true; 493 } 494 if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) { 495 DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); 496 return true; 497 } 498 if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) { 499 DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); 500 return true; 501 } 502 if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) { 503 DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); 504 return true; 505 } 506 if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) { 507 DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); 508 return true; 509 } 510 if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, 511 RedDes)) { 512 DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); 513 return true; 514 } 515 if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) { 516 DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); 517 return true; 518 } 519 if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { 520 DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); 521 return true; 522 } 523 if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) { 524 DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); 525 return true; 526 } 527 // Not a reduction of known type. 528 return false; 529 } 530 531 bool RecurrenceDescriptor::isFirstOrderRecurrence( 532 PHINode *Phi, Loop *TheLoop, 533 DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) { 534 535 // Ensure the phi node is in the loop header and has two incoming values. 536 if (Phi->getParent() != TheLoop->getHeader() || 537 Phi->getNumIncomingValues() != 2) 538 return false; 539 540 // Ensure the loop has a preheader and a single latch block. The loop 541 // vectorizer will need the latch to set up the next iteration of the loop. 542 auto *Preheader = TheLoop->getLoopPreheader(); 543 auto *Latch = TheLoop->getLoopLatch(); 544 if (!Preheader || !Latch) 545 return false; 546 547 // Ensure the phi node's incoming blocks are the loop preheader and latch. 548 if (Phi->getBasicBlockIndex(Preheader) < 0 || 549 Phi->getBasicBlockIndex(Latch) < 0) 550 return false; 551 552 // Get the previous value. The previous value comes from the latch edge while 553 // the initial value comes form the preheader edge. 554 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); 555 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) || 556 SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. 557 return false; 558 559 // Ensure every user of the phi node is dominated by the previous value. 560 // The dominance requirement ensures the loop vectorizer will not need to 561 // vectorize the initial value prior to the first iteration of the loop. 562 // TODO: Consider extending this sinking to handle other kinds of instructions 563 // and expressions, beyond sinking a single cast past Previous. 564 if (Phi->hasOneUse()) { 565 auto *I = Phi->user_back(); 566 if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() && 567 DT->dominates(Previous, I->user_back())) { 568 SinkAfter[I] = Previous; 569 return true; 570 } 571 } 572 573 for (User *U : Phi->users()) 574 if (auto *I = dyn_cast<Instruction>(U)) { 575 if (!DT->dominates(Previous, I)) 576 return false; 577 } 578 579 return true; 580 } 581 582 /// This function returns the identity element (or neutral element) for 583 /// the operation K. 584 Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, 585 Type *Tp) { 586 switch (K) { 587 case RK_IntegerXor: 588 case RK_IntegerAdd: 589 case RK_IntegerOr: 590 // Adding, Xoring, Oring zero to a number does not change it. 591 return ConstantInt::get(Tp, 0); 592 case RK_IntegerMult: 593 // Multiplying a number by 1 does not change it. 594 return ConstantInt::get(Tp, 1); 595 case RK_IntegerAnd: 596 // AND-ing a number with an all-1 value does not change it. 597 return ConstantInt::get(Tp, -1, true); 598 case RK_FloatMult: 599 // Multiplying a number by 1 does not change it. 600 return ConstantFP::get(Tp, 1.0L); 601 case RK_FloatAdd: 602 // Adding zero to a number does not change it. 603 return ConstantFP::get(Tp, 0.0L); 604 default: 605 llvm_unreachable("Unknown recurrence kind"); 606 } 607 } 608 609 /// This function translates the recurrence kind to an LLVM binary operator. 610 unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { 611 switch (Kind) { 612 case RK_IntegerAdd: 613 return Instruction::Add; 614 case RK_IntegerMult: 615 return Instruction::Mul; 616 case RK_IntegerOr: 617 return Instruction::Or; 618 case RK_IntegerAnd: 619 return Instruction::And; 620 case RK_IntegerXor: 621 return Instruction::Xor; 622 case RK_FloatMult: 623 return Instruction::FMul; 624 case RK_FloatAdd: 625 return Instruction::FAdd; 626 case RK_IntegerMinMax: 627 return Instruction::ICmp; 628 case RK_FloatMinMax: 629 return Instruction::FCmp; 630 default: 631 llvm_unreachable("Unknown recurrence operation"); 632 } 633 } 634 635 Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, 636 MinMaxRecurrenceKind RK, 637 Value *Left, Value *Right) { 638 CmpInst::Predicate P = CmpInst::ICMP_NE; 639 switch (RK) { 640 default: 641 llvm_unreachable("Unknown min/max recurrence kind"); 642 case MRK_UIntMin: 643 P = CmpInst::ICMP_ULT; 644 break; 645 case MRK_UIntMax: 646 P = CmpInst::ICMP_UGT; 647 break; 648 case MRK_SIntMin: 649 P = CmpInst::ICMP_SLT; 650 break; 651 case MRK_SIntMax: 652 P = CmpInst::ICMP_SGT; 653 break; 654 case MRK_FloatMin: 655 P = CmpInst::FCMP_OLT; 656 break; 657 case MRK_FloatMax: 658 P = CmpInst::FCMP_OGT; 659 break; 660 } 661 662 // We only match FP sequences with unsafe algebra, so we can unconditionally 663 // set it on any generated instructions. 664 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 665 FastMathFlags FMF; 666 FMF.setUnsafeAlgebra(); 667 Builder.setFastMathFlags(FMF); 668 669 Value *Cmp; 670 if (RK == MRK_FloatMin || RK == MRK_FloatMax) 671 Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); 672 else 673 Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); 674 675 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); 676 return Select; 677 } 678 679 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, 680 const SCEV *Step, BinaryOperator *BOp) 681 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { 682 assert(IK != IK_NoInduction && "Not an induction"); 683 684 // Start value type should match the induction kind and the value 685 // itself should not be null. 686 assert(StartValue && "StartValue is null"); 687 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && 688 "StartValue is not a pointer for pointer induction"); 689 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && 690 "StartValue is not an integer for integer induction"); 691 692 // Check the Step Value. It should be non-zero integer value. 693 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && 694 "Step value is zero"); 695 696 assert((IK != IK_PtrInduction || getConstIntStepValue()) && 697 "Step value should be constant for pointer induction"); 698 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && 699 "StepValue is not an integer"); 700 701 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && 702 "StepValue is not FP for FpInduction"); 703 assert((IK != IK_FpInduction || (InductionBinOp && 704 (InductionBinOp->getOpcode() == Instruction::FAdd || 705 InductionBinOp->getOpcode() == Instruction::FSub))) && 706 "Binary opcode should be specified for FP induction"); 707 } 708 709 int InductionDescriptor::getConsecutiveDirection() const { 710 ConstantInt *ConstStep = getConstIntStepValue(); 711 if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) 712 return ConstStep->getSExtValue(); 713 return 0; 714 } 715 716 ConstantInt *InductionDescriptor::getConstIntStepValue() const { 717 if (isa<SCEVConstant>(Step)) 718 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); 719 return nullptr; 720 } 721 722 Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, 723 ScalarEvolution *SE, 724 const DataLayout& DL) const { 725 726 SCEVExpander Exp(*SE, DL, "induction"); 727 assert(Index->getType() == Step->getType() && 728 "Index type does not match StepValue type"); 729 switch (IK) { 730 case IK_IntInduction: { 731 assert(Index->getType() == StartValue->getType() && 732 "Index type does not match StartValue type"); 733 734 // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution 735 // and calculate (Start + Index * Step) for all cases, without 736 // special handling for "isOne" and "isMinusOne". 737 // But in the real life the result code getting worse. We mix SCEV 738 // expressions and ADD/SUB operations and receive redundant 739 // intermediate values being calculated in different ways and 740 // Instcombine is unable to reduce them all. 741 742 if (getConstIntStepValue() && 743 getConstIntStepValue()->isMinusOne()) 744 return B.CreateSub(StartValue, Index); 745 if (getConstIntStepValue() && 746 getConstIntStepValue()->isOne()) 747 return B.CreateAdd(StartValue, Index); 748 const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), 749 SE->getMulExpr(Step, SE->getSCEV(Index))); 750 return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); 751 } 752 case IK_PtrInduction: { 753 assert(isa<SCEVConstant>(Step) && 754 "Expected constant step for pointer induction"); 755 const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); 756 Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); 757 return B.CreateGEP(nullptr, StartValue, Index); 758 } 759 case IK_FpInduction: { 760 assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); 761 assert(InductionBinOp && 762 (InductionBinOp->getOpcode() == Instruction::FAdd || 763 InductionBinOp->getOpcode() == Instruction::FSub) && 764 "Original bin op should be defined for FP induction"); 765 766 Value *StepValue = cast<SCEVUnknown>(Step)->getValue(); 767 768 // Floating point operations had to be 'fast' to enable the induction. 769 FastMathFlags Flags; 770 Flags.setUnsafeAlgebra(); 771 772 Value *MulExp = B.CreateFMul(StepValue, Index); 773 if (isa<Instruction>(MulExp)) 774 // We have to check, the MulExp may be a constant. 775 cast<Instruction>(MulExp)->setFastMathFlags(Flags); 776 777 Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue, 778 MulExp, "induction"); 779 if (isa<Instruction>(BOp)) 780 cast<Instruction>(BOp)->setFastMathFlags(Flags); 781 782 return BOp; 783 } 784 case IK_NoInduction: 785 return nullptr; 786 } 787 llvm_unreachable("invalid enum"); 788 } 789 790 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, 791 ScalarEvolution *SE, 792 InductionDescriptor &D) { 793 794 // Here we only handle FP induction variables. 795 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); 796 797 if (TheLoop->getHeader() != Phi->getParent()) 798 return false; 799 800 // The loop may have multiple entrances or multiple exits; we can analyze 801 // this phi if it has a unique entry value and a unique backedge value. 802 if (Phi->getNumIncomingValues() != 2) 803 return false; 804 Value *BEValue = nullptr, *StartValue = nullptr; 805 if (TheLoop->contains(Phi->getIncomingBlock(0))) { 806 BEValue = Phi->getIncomingValue(0); 807 StartValue = Phi->getIncomingValue(1); 808 } else { 809 assert(TheLoop->contains(Phi->getIncomingBlock(1)) && 810 "Unexpected Phi node in the loop"); 811 BEValue = Phi->getIncomingValue(1); 812 StartValue = Phi->getIncomingValue(0); 813 } 814 815 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); 816 if (!BOp) 817 return false; 818 819 Value *Addend = nullptr; 820 if (BOp->getOpcode() == Instruction::FAdd) { 821 if (BOp->getOperand(0) == Phi) 822 Addend = BOp->getOperand(1); 823 else if (BOp->getOperand(1) == Phi) 824 Addend = BOp->getOperand(0); 825 } else if (BOp->getOpcode() == Instruction::FSub) 826 if (BOp->getOperand(0) == Phi) 827 Addend = BOp->getOperand(1); 828 829 if (!Addend) 830 return false; 831 832 // The addend should be loop invariant 833 if (auto *I = dyn_cast<Instruction>(Addend)) 834 if (TheLoop->contains(I)) 835 return false; 836 837 // FP Step has unknown SCEV 838 const SCEV *Step = SE->getUnknown(Addend); 839 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); 840 return true; 841 } 842 843 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 844 PredicatedScalarEvolution &PSE, 845 InductionDescriptor &D, 846 bool Assume) { 847 Type *PhiTy = Phi->getType(); 848 849 // Handle integer and pointer inductions variables. 850 // Now we handle also FP induction but not trying to make a 851 // recurrent expression from the PHI node in-place. 852 853 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && 854 !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) 855 return false; 856 857 if (PhiTy->isFloatingPointTy()) 858 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); 859 860 const SCEV *PhiScev = PSE.getSCEV(Phi); 861 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 862 863 // We need this expression to be an AddRecExpr. 864 if (Assume && !AR) 865 AR = PSE.getAsAddRec(Phi); 866 867 if (!AR) { 868 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 869 return false; 870 } 871 872 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); 873 } 874 875 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 876 ScalarEvolution *SE, 877 InductionDescriptor &D, 878 const SCEV *Expr) { 879 Type *PhiTy = Phi->getType(); 880 // We only handle integer and pointer inductions variables. 881 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) 882 return false; 883 884 // Check that the PHI is consecutive. 885 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); 886 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 887 888 if (!AR) { 889 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 890 return false; 891 } 892 893 if (AR->getLoop() != TheLoop) { 894 // FIXME: We should treat this as a uniform. Unfortunately, we 895 // don't currently know how to handled uniform PHIs. 896 DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); 897 return false; 898 } 899 900 Value *StartValue = 901 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); 902 const SCEV *Step = AR->getStepRecurrence(*SE); 903 // Calculate the pointer stride and check if it is consecutive. 904 // The stride may be a constant or a loop invariant integer value. 905 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); 906 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) 907 return false; 908 909 if (PhiTy->isIntegerTy()) { 910 D = InductionDescriptor(StartValue, IK_IntInduction, Step); 911 return true; 912 } 913 914 assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); 915 // Pointer induction should be a constant. 916 if (!ConstStep) 917 return false; 918 919 ConstantInt *CV = ConstStep->getValue(); 920 Type *PointerElementType = PhiTy->getPointerElementType(); 921 // The pointer stride cannot be determined if the pointer element type is not 922 // sized. 923 if (!PointerElementType->isSized()) 924 return false; 925 926 const DataLayout &DL = Phi->getModule()->getDataLayout(); 927 int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); 928 if (!Size) 929 return false; 930 931 int64_t CVSize = CV->getSExtValue(); 932 if (CVSize % Size) 933 return false; 934 auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, 935 true /* signed */); 936 D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); 937 return true; 938 } 939 940 bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, 941 bool PreserveLCSSA) { 942 bool Changed = false; 943 944 // We re-use a vector for the in-loop predecesosrs. 945 SmallVector<BasicBlock *, 4> InLoopPredecessors; 946 947 auto RewriteExit = [&](BasicBlock *BB) { 948 assert(InLoopPredecessors.empty() && 949 "Must start with an empty predecessors list!"); 950 auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); 951 952 // See if there are any non-loop predecessors of this exit block and 953 // keep track of the in-loop predecessors. 954 bool IsDedicatedExit = true; 955 for (auto *PredBB : predecessors(BB)) 956 if (L->contains(PredBB)) { 957 if (isa<IndirectBrInst>(PredBB->getTerminator())) 958 // We cannot rewrite exiting edges from an indirectbr. 959 return false; 960 961 InLoopPredecessors.push_back(PredBB); 962 } else { 963 IsDedicatedExit = false; 964 } 965 966 assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); 967 968 // Nothing to do if this is already a dedicated exit. 969 if (IsDedicatedExit) 970 return false; 971 972 auto *NewExitBB = SplitBlockPredecessors( 973 BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA); 974 975 if (!NewExitBB) 976 DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: " 977 << *L << "\n"); 978 else 979 DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " 980 << NewExitBB->getName() << "\n"); 981 return true; 982 }; 983 984 // Walk the exit blocks directly rather than building up a data structure for 985 // them, but only visit each one once. 986 SmallPtrSet<BasicBlock *, 4> Visited; 987 for (auto *BB : L->blocks()) 988 for (auto *SuccBB : successors(BB)) { 989 // We're looking for exit blocks so skip in-loop successors. 990 if (L->contains(SuccBB)) 991 continue; 992 993 // Visit each exit block exactly once. 994 if (!Visited.insert(SuccBB).second) 995 continue; 996 997 Changed |= RewriteExit(SuccBB); 998 } 999 1000 return Changed; 1001 } 1002 1003 /// \brief Returns the instructions that use values defined in the loop. 1004 SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { 1005 SmallVector<Instruction *, 8> UsedOutside; 1006 1007 for (auto *Block : L->getBlocks()) 1008 // FIXME: I believe that this could use copy_if if the Inst reference could 1009 // be adapted into a pointer. 1010 for (auto &Inst : *Block) { 1011 auto Users = Inst.users(); 1012 if (any_of(Users, [&](User *U) { 1013 auto *Use = cast<Instruction>(U); 1014 return !L->contains(Use->getParent()); 1015 })) 1016 UsedOutside.push_back(&Inst); 1017 } 1018 1019 return UsedOutside; 1020 } 1021 1022 void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { 1023 // By definition, all loop passes need the LoopInfo analysis and the 1024 // Dominator tree it depends on. Because they all participate in the loop 1025 // pass manager, they must also preserve these. 1026 AU.addRequired<DominatorTreeWrapperPass>(); 1027 AU.addPreserved<DominatorTreeWrapperPass>(); 1028 AU.addRequired<LoopInfoWrapperPass>(); 1029 AU.addPreserved<LoopInfoWrapperPass>(); 1030 1031 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs 1032 // here because users shouldn't directly get them from this header. 1033 extern char &LoopSimplifyID; 1034 extern char &LCSSAID; 1035 AU.addRequiredID(LoopSimplifyID); 1036 AU.addPreservedID(LoopSimplifyID); 1037 AU.addRequiredID(LCSSAID); 1038 AU.addPreservedID(LCSSAID); 1039 // This is used in the LPPassManager to perform LCSSA verification on passes 1040 // which preserve lcssa form 1041 AU.addRequired<LCSSAVerificationPass>(); 1042 AU.addPreserved<LCSSAVerificationPass>(); 1043 1044 // Loop passes are designed to run inside of a loop pass manager which means 1045 // that any function analyses they require must be required by the first loop 1046 // pass in the manager (so that it is computed before the loop pass manager 1047 // runs) and preserved by all loop pasess in the manager. To make this 1048 // reasonably robust, the set needed for most loop passes is maintained here. 1049 // If your loop pass requires an analysis not listed here, you will need to 1050 // carefully audit the loop pass manager nesting structure that results. 1051 AU.addRequired<AAResultsWrapperPass>(); 1052 AU.addPreserved<AAResultsWrapperPass>(); 1053 AU.addPreserved<BasicAAWrapperPass>(); 1054 AU.addPreserved<GlobalsAAWrapperPass>(); 1055 AU.addPreserved<SCEVAAWrapperPass>(); 1056 AU.addRequired<ScalarEvolutionWrapperPass>(); 1057 AU.addPreserved<ScalarEvolutionWrapperPass>(); 1058 } 1059 1060 /// Manually defined generic "LoopPass" dependency initialization. This is used 1061 /// to initialize the exact set of passes from above in \c 1062 /// getLoopAnalysisUsage. It can be used within a loop pass's initialization 1063 /// with: 1064 /// 1065 /// INITIALIZE_PASS_DEPENDENCY(LoopPass) 1066 /// 1067 /// As-if "LoopPass" were a pass. 1068 void llvm::initializeLoopPassPass(PassRegistry &Registry) { 1069 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1070 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1071 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 1072 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 1073 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1074 INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) 1075 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 1076 INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) 1077 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 1078 } 1079 1080 /// \brief Find string metadata for loop 1081 /// 1082 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an 1083 /// operand or null otherwise. If the string metadata is not found return 1084 /// Optional's not-a-value. 1085 Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, 1086 StringRef Name) { 1087 MDNode *LoopID = TheLoop->getLoopID(); 1088 // Return none if LoopID is false. 1089 if (!LoopID) 1090 return None; 1091 1092 // First operand should refer to the loop id itself. 1093 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 1094 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 1095 1096 // Iterate over LoopID operands and look for MDString Metadata 1097 for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { 1098 MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); 1099 if (!MD) 1100 continue; 1101 MDString *S = dyn_cast<MDString>(MD->getOperand(0)); 1102 if (!S) 1103 continue; 1104 // Return true if MDString holds expected MetaData. 1105 if (Name.equals(S->getString())) 1106 switch (MD->getNumOperands()) { 1107 case 1: 1108 return nullptr; 1109 case 2: 1110 return &MD->getOperand(1); 1111 default: 1112 llvm_unreachable("loop metadata has 0 or 1 operand"); 1113 } 1114 } 1115 return None; 1116 } 1117 1118 /// Returns true if the instruction in a loop is guaranteed to execute at least 1119 /// once. 1120 bool llvm::isGuaranteedToExecute(const Instruction &Inst, 1121 const DominatorTree *DT, const Loop *CurLoop, 1122 const LoopSafetyInfo *SafetyInfo) { 1123 // We have to check to make sure that the instruction dominates all 1124 // of the exit blocks. If it doesn't, then there is a path out of the loop 1125 // which does not execute this instruction, so we can't hoist it. 1126 1127 // If the instruction is in the header block for the loop (which is very 1128 // common), it is always guaranteed to dominate the exit blocks. Since this 1129 // is a common case, and can save some work, check it now. 1130 if (Inst.getParent() == CurLoop->getHeader()) 1131 // If there's a throw in the header block, we can't guarantee we'll reach 1132 // Inst. 1133 return !SafetyInfo->HeaderMayThrow; 1134 1135 // Somewhere in this loop there is an instruction which may throw and make us 1136 // exit the loop. 1137 if (SafetyInfo->MayThrow) 1138 return false; 1139 1140 // Get the exit blocks for the current loop. 1141 SmallVector<BasicBlock *, 8> ExitBlocks; 1142 CurLoop->getExitBlocks(ExitBlocks); 1143 1144 // Verify that the block dominates each of the exit blocks of the loop. 1145 for (BasicBlock *ExitBlock : ExitBlocks) 1146 if (!DT->dominates(Inst.getParent(), ExitBlock)) 1147 return false; 1148 1149 // As a degenerate case, if the loop is statically infinite then we haven't 1150 // proven anything since there are no exit blocks. 1151 if (ExitBlocks.empty()) 1152 return false; 1153 1154 // FIXME: In general, we have to prove that the loop isn't an infinite loop. 1155 // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is 1156 // just a special case of this.) 1157 return true; 1158 } 1159 1160 Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { 1161 // Only support loops with a unique exiting block, and a latch. 1162 if (!L->getExitingBlock()) 1163 return None; 1164 1165 // Get the branch weights for the the loop's backedge. 1166 BranchInst *LatchBR = 1167 dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator()); 1168 if (!LatchBR || LatchBR->getNumSuccessors() != 2) 1169 return None; 1170 1171 assert((LatchBR->getSuccessor(0) == L->getHeader() || 1172 LatchBR->getSuccessor(1) == L->getHeader()) && 1173 "At least one edge out of the latch must go to the header"); 1174 1175 // To estimate the number of times the loop body was executed, we want to 1176 // know the number of times the backedge was taken, vs. the number of times 1177 // we exited the loop. 1178 uint64_t TrueVal, FalseVal; 1179 if (!LatchBR->extractProfMetadata(TrueVal, FalseVal)) 1180 return None; 1181 1182 if (!TrueVal || !FalseVal) 1183 return 0; 1184 1185 // Divide the count of the backedge by the count of the edge exiting the loop, 1186 // rounding to nearest. 1187 if (LatchBR->getSuccessor(0) == L->getHeader()) 1188 return (TrueVal + (FalseVal / 2)) / FalseVal; 1189 else 1190 return (FalseVal + (TrueVal / 2)) / TrueVal; 1191 } 1192 1193 /// \brief Adds a 'fast' flag to floating point operations. 1194 static Value *addFastMathFlag(Value *V) { 1195 if (isa<FPMathOperator>(V)) { 1196 FastMathFlags Flags; 1197 Flags.setUnsafeAlgebra(); 1198 cast<Instruction>(V)->setFastMathFlags(Flags); 1199 } 1200 return V; 1201 } 1202 1203 // Helper to generate a log2 shuffle reduction. 1204 Value * 1205 llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, 1206 RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, 1207 ArrayRef<Value *> RedOps) { 1208 unsigned VF = Src->getType()->getVectorNumElements(); 1209 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles 1210 // and vector ops, reducing the set of values being computed by half each 1211 // round. 1212 assert(isPowerOf2_32(VF) && 1213 "Reduction emission only supported for pow2 vectors!"); 1214 Value *TmpVec = Src; 1215 SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); 1216 for (unsigned i = VF; i != 1; i >>= 1) { 1217 // Move the upper half of the vector to the lower half. 1218 for (unsigned j = 0; j != i / 2; ++j) 1219 ShuffleMask[j] = Builder.getInt32(i / 2 + j); 1220 1221 // Fill the rest of the mask with undef. 1222 std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), 1223 UndefValue::get(Builder.getInt32Ty())); 1224 1225 Value *Shuf = Builder.CreateShuffleVector( 1226 TmpVec, UndefValue::get(TmpVec->getType()), 1227 ConstantVector::get(ShuffleMask), "rdx.shuf"); 1228 1229 if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 1230 // Floating point operations had to be 'fast' to enable the reduction. 1231 TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, 1232 TmpVec, Shuf, "bin.rdx")); 1233 } else { 1234 assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && 1235 "Invalid min/max"); 1236 TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec, 1237 Shuf); 1238 } 1239 if (!RedOps.empty()) 1240 propagateIRFlags(TmpVec, RedOps); 1241 } 1242 // The result is in the first element of the vector. 1243 return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); 1244 } 1245 1246 /// Create a simple vector reduction specified by an opcode and some 1247 /// flags (if generating min/max reductions). 1248 Value *llvm::createSimpleTargetReduction( 1249 IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, 1250 Value *Src, TargetTransformInfo::ReductionFlags Flags, 1251 ArrayRef<Value *> RedOps) { 1252 assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); 1253 1254 Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); 1255 std::function<Value*()> BuildFunc; 1256 using RD = RecurrenceDescriptor; 1257 RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; 1258 // TODO: Support creating ordered reductions. 1259 FastMathFlags FMFUnsafe; 1260 FMFUnsafe.setUnsafeAlgebra(); 1261 1262 switch (Opcode) { 1263 case Instruction::Add: 1264 BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; 1265 break; 1266 case Instruction::Mul: 1267 BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; 1268 break; 1269 case Instruction::And: 1270 BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; 1271 break; 1272 case Instruction::Or: 1273 BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; 1274 break; 1275 case Instruction::Xor: 1276 BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; 1277 break; 1278 case Instruction::FAdd: 1279 BuildFunc = [&]() { 1280 auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); 1281 cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); 1282 return Rdx; 1283 }; 1284 break; 1285 case Instruction::FMul: 1286 BuildFunc = [&]() { 1287 auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); 1288 cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); 1289 return Rdx; 1290 }; 1291 break; 1292 case Instruction::ICmp: 1293 if (Flags.IsMaxOp) { 1294 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; 1295 BuildFunc = [&]() { 1296 return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); 1297 }; 1298 } else { 1299 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; 1300 BuildFunc = [&]() { 1301 return Builder.CreateIntMinReduce(Src, Flags.IsSigned); 1302 }; 1303 } 1304 break; 1305 case Instruction::FCmp: 1306 if (Flags.IsMaxOp) { 1307 MinMaxKind = RD::MRK_FloatMax; 1308 BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; 1309 } else { 1310 MinMaxKind = RD::MRK_FloatMin; 1311 BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; 1312 } 1313 break; 1314 default: 1315 llvm_unreachable("Unhandled opcode"); 1316 break; 1317 } 1318 if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) 1319 return BuildFunc(); 1320 return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); 1321 } 1322 1323 /// Create a vector reduction using a given recurrence descriptor. 1324 Value *llvm::createTargetReduction(IRBuilder<> &Builder, 1325 const TargetTransformInfo *TTI, 1326 RecurrenceDescriptor &Desc, Value *Src, 1327 bool NoNaN) { 1328 // TODO: Support in-order reductions based on the recurrence descriptor. 1329 RecurrenceDescriptor::RecurrenceKind RecKind = Desc.getRecurrenceKind(); 1330 TargetTransformInfo::ReductionFlags Flags; 1331 Flags.NoNaN = NoNaN; 1332 auto getSimpleRdx = [&](unsigned Opc) { 1333 return createSimpleTargetReduction(Builder, TTI, Opc, Src, Flags); 1334 }; 1335 switch (RecKind) { 1336 case RecurrenceDescriptor::RK_FloatAdd: 1337 return getSimpleRdx(Instruction::FAdd); 1338 case RecurrenceDescriptor::RK_FloatMult: 1339 return getSimpleRdx(Instruction::FMul); 1340 case RecurrenceDescriptor::RK_IntegerAdd: 1341 return getSimpleRdx(Instruction::Add); 1342 case RecurrenceDescriptor::RK_IntegerMult: 1343 return getSimpleRdx(Instruction::Mul); 1344 case RecurrenceDescriptor::RK_IntegerAnd: 1345 return getSimpleRdx(Instruction::And); 1346 case RecurrenceDescriptor::RK_IntegerOr: 1347 return getSimpleRdx(Instruction::Or); 1348 case RecurrenceDescriptor::RK_IntegerXor: 1349 return getSimpleRdx(Instruction::Xor); 1350 case RecurrenceDescriptor::RK_IntegerMinMax: { 1351 switch (Desc.getMinMaxRecurrenceKind()) { 1352 case RecurrenceDescriptor::MRK_SIntMax: 1353 Flags.IsSigned = true; 1354 Flags.IsMaxOp = true; 1355 break; 1356 case RecurrenceDescriptor::MRK_UIntMax: 1357 Flags.IsMaxOp = true; 1358 break; 1359 case RecurrenceDescriptor::MRK_SIntMin: 1360 Flags.IsSigned = true; 1361 break; 1362 case RecurrenceDescriptor::MRK_UIntMin: 1363 break; 1364 default: 1365 llvm_unreachable("Unhandled MRK"); 1366 } 1367 return getSimpleRdx(Instruction::ICmp); 1368 } 1369 case RecurrenceDescriptor::RK_FloatMinMax: { 1370 Flags.IsMaxOp = 1371 Desc.getMinMaxRecurrenceKind() == RecurrenceDescriptor::MRK_FloatMax; 1372 return getSimpleRdx(Instruction::FCmp); 1373 } 1374 default: 1375 llvm_unreachable("Unhandled RecKind"); 1376 } 1377 } 1378 1379 void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { 1380 auto *VecOp = dyn_cast<Instruction>(I); 1381 if (!VecOp) 1382 return; 1383 auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0]) 1384 : dyn_cast<Instruction>(OpValue); 1385 if (!Intersection) 1386 return; 1387 const unsigned Opcode = Intersection->getOpcode(); 1388 VecOp->copyIRFlags(Intersection); 1389 for (auto *V : VL) { 1390 auto *Instr = dyn_cast<Instruction>(V); 1391 if (!Instr) 1392 continue; 1393 if (OpValue == nullptr || Opcode == Instr->getOpcode()) 1394 VecOp->andIRFlags(V); 1395 } 1396 } 1397