1 //===-- LoopUtils.cpp - Loop Utility functions -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines common loop utility functions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Utils/LoopUtils.h" 15 #include "llvm/ADT/ScopeExit.h" 16 #include "llvm/Analysis/AliasAnalysis.h" 17 #include "llvm/Analysis/BasicAliasAnalysis.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/LoopInfo.h" 20 #include "llvm/Analysis/LoopPass.h" 21 #include "llvm/Analysis/ScalarEvolution.h" 22 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 23 #include "llvm/Analysis/ScalarEvolutionExpander.h" 24 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 25 #include "llvm/Analysis/TargetTransformInfo.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Instructions.h" 28 #include "llvm/IR/Module.h" 29 #include "llvm/IR/PatternMatch.h" 30 #include "llvm/IR/ValueHandle.h" 31 #include "llvm/Pass.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 34 35 using namespace llvm; 36 using namespace llvm::PatternMatch; 37 38 #define DEBUG_TYPE "loop-utils" 39 40 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, 41 SmallPtrSetImpl<Instruction *> &Set) { 42 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) 43 if (!Set.count(dyn_cast<Instruction>(*Use))) 44 return false; 45 return true; 46 } 47 48 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { 49 switch (Kind) { 50 default: 51 break; 52 case RK_IntegerAdd: 53 case RK_IntegerMult: 54 case RK_IntegerOr: 55 case RK_IntegerAnd: 56 case RK_IntegerXor: 57 case RK_IntegerMinMax: 58 return true; 59 } 60 return false; 61 } 62 63 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { 64 return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); 65 } 66 67 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { 68 switch (Kind) { 69 default: 70 break; 71 case RK_IntegerAdd: 72 case RK_IntegerMult: 73 case RK_FloatAdd: 74 case RK_FloatMult: 75 return true; 76 } 77 return false; 78 } 79 80 Instruction * 81 RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT, 82 SmallPtrSetImpl<Instruction *> &Visited, 83 SmallPtrSetImpl<Instruction *> &CI) { 84 if (!Phi->hasOneUse()) 85 return Phi; 86 87 const APInt *M = nullptr; 88 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); 89 90 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT 91 // with a new integer type of the corresponding bit width. 92 if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { 93 int32_t Bits = (*M + 1).exactLogBase2(); 94 if (Bits > 0) { 95 RT = IntegerType::get(Phi->getContext(), Bits); 96 Visited.insert(Phi); 97 CI.insert(J); 98 return J; 99 } 100 } 101 return Phi; 102 } 103 104 bool RecurrenceDescriptor::getSourceExtensionKind( 105 Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned, 106 SmallPtrSetImpl<Instruction *> &Visited, 107 SmallPtrSetImpl<Instruction *> &CI) { 108 109 SmallVector<Instruction *, 8> Worklist; 110 bool FoundOneOperand = false; 111 unsigned DstSize = RT->getPrimitiveSizeInBits(); 112 Worklist.push_back(Exit); 113 114 // Traverse the instructions in the reduction expression, beginning with the 115 // exit value. 116 while (!Worklist.empty()) { 117 Instruction *I = Worklist.pop_back_val(); 118 for (Use &U : I->operands()) { 119 120 // Terminate the traversal if the operand is not an instruction, or we 121 // reach the starting value. 122 Instruction *J = dyn_cast<Instruction>(U.get()); 123 if (!J || J == Start) 124 continue; 125 126 // Otherwise, investigate the operation if it is also in the expression. 127 if (Visited.count(J)) { 128 Worklist.push_back(J); 129 continue; 130 } 131 132 // If the operand is not in Visited, it is not a reduction operation, but 133 // it does feed into one. Make sure it is either a single-use sign- or 134 // zero-extend instruction. 135 CastInst *Cast = dyn_cast<CastInst>(J); 136 bool IsSExtInst = isa<SExtInst>(J); 137 if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst)) 138 return false; 139 140 // Ensure the source type of the extend is no larger than the reduction 141 // type. It is not necessary for the types to be identical. 142 unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); 143 if (SrcSize > DstSize) 144 return false; 145 146 // Furthermore, ensure that all such extends are of the same kind. 147 if (FoundOneOperand) { 148 if (IsSigned != IsSExtInst) 149 return false; 150 } else { 151 FoundOneOperand = true; 152 IsSigned = IsSExtInst; 153 } 154 155 // Lastly, if the source type of the extend matches the reduction type, 156 // add the extend to CI so that we can avoid accounting for it in the 157 // cost model. 158 if (SrcSize == DstSize) 159 CI.insert(Cast); 160 } 161 } 162 return true; 163 } 164 165 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, 166 Loop *TheLoop, bool HasFunNoNaNAttr, 167 RecurrenceDescriptor &RedDes) { 168 if (Phi->getNumIncomingValues() != 2) 169 return false; 170 171 // Reduction variables are only found in the loop header block. 172 if (Phi->getParent() != TheLoop->getHeader()) 173 return false; 174 175 // Obtain the reduction start value from the value that comes from the loop 176 // preheader. 177 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); 178 179 // ExitInstruction is the single value which is used outside the loop. 180 // We only allow for a single reduction value to be used outside the loop. 181 // This includes users of the reduction, variables (which form a cycle 182 // which ends in the phi node). 183 Instruction *ExitInstruction = nullptr; 184 // Indicates that we found a reduction operation in our scan. 185 bool FoundReduxOp = false; 186 187 // We start with the PHI node and scan for all of the users of this 188 // instruction. All users must be instructions that can be used as reduction 189 // variables (such as ADD). We must have a single out-of-block user. The cycle 190 // must include the original PHI. 191 bool FoundStartPHI = false; 192 193 // To recognize min/max patterns formed by a icmp select sequence, we store 194 // the number of instruction we saw from the recognized min/max pattern, 195 // to make sure we only see exactly the two instructions. 196 unsigned NumCmpSelectPatternInst = 0; 197 InstDesc ReduxDesc(false, nullptr); 198 199 // Data used for determining if the recurrence has been type-promoted. 200 Type *RecurrenceType = Phi->getType(); 201 SmallPtrSet<Instruction *, 4> CastInsts; 202 Instruction *Start = Phi; 203 bool IsSigned = false; 204 205 SmallPtrSet<Instruction *, 8> VisitedInsts; 206 SmallVector<Instruction *, 8> Worklist; 207 208 // Return early if the recurrence kind does not match the type of Phi. If the 209 // recurrence kind is arithmetic, we attempt to look through AND operations 210 // resulting from the type promotion performed by InstCombine. Vector 211 // operations are not limited to the legal integer widths, so we may be able 212 // to evaluate the reduction in the narrower width. 213 if (RecurrenceType->isFloatingPointTy()) { 214 if (!isFloatingPointRecurrenceKind(Kind)) 215 return false; 216 } else { 217 if (!isIntegerRecurrenceKind(Kind)) 218 return false; 219 if (isArithmeticRecurrenceKind(Kind)) 220 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); 221 } 222 223 Worklist.push_back(Start); 224 VisitedInsts.insert(Start); 225 226 // A value in the reduction can be used: 227 // - By the reduction: 228 // - Reduction operation: 229 // - One use of reduction value (safe). 230 // - Multiple use of reduction value (not safe). 231 // - PHI: 232 // - All uses of the PHI must be the reduction (safe). 233 // - Otherwise, not safe. 234 // - By instructions outside of the loop (safe). 235 // * One value may have several outside users, but all outside 236 // uses must be of the same value. 237 // - By an instruction that is not part of the reduction (not safe). 238 // This is either: 239 // * An instruction type other than PHI or the reduction operation. 240 // * A PHI in the header other than the initial PHI. 241 while (!Worklist.empty()) { 242 Instruction *Cur = Worklist.back(); 243 Worklist.pop_back(); 244 245 // No Users. 246 // If the instruction has no users then this is a broken chain and can't be 247 // a reduction variable. 248 if (Cur->use_empty()) 249 return false; 250 251 bool IsAPhi = isa<PHINode>(Cur); 252 253 // A header PHI use other than the original PHI. 254 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) 255 return false; 256 257 // Reductions of instructions such as Div, and Sub is only possible if the 258 // LHS is the reduction variable. 259 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && 260 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && 261 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) 262 return false; 263 264 // Any reduction instruction must be of one of the allowed kinds. We ignore 265 // the starting value (the Phi or an AND instruction if the Phi has been 266 // type-promoted). 267 if (Cur != Start) { 268 ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); 269 if (!ReduxDesc.isRecurrence()) 270 return false; 271 } 272 273 // A reduction operation must only have one use of the reduction value. 274 if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && 275 hasMultipleUsesOf(Cur, VisitedInsts)) 276 return false; 277 278 // All inputs to a PHI node must be a reduction value. 279 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) 280 return false; 281 282 if (Kind == RK_IntegerMinMax && 283 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) 284 ++NumCmpSelectPatternInst; 285 if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) 286 ++NumCmpSelectPatternInst; 287 288 // Check whether we found a reduction operator. 289 FoundReduxOp |= !IsAPhi && Cur != Start; 290 291 // Process users of current instruction. Push non-PHI nodes after PHI nodes 292 // onto the stack. This way we are going to have seen all inputs to PHI 293 // nodes once we get to them. 294 SmallVector<Instruction *, 8> NonPHIs; 295 SmallVector<Instruction *, 8> PHIs; 296 for (User *U : Cur->users()) { 297 Instruction *UI = cast<Instruction>(U); 298 299 // Check if we found the exit user. 300 BasicBlock *Parent = UI->getParent(); 301 if (!TheLoop->contains(Parent)) { 302 // If we already know this instruction is used externally, move on to 303 // the next user. 304 if (ExitInstruction == Cur) 305 continue; 306 307 // Exit if you find multiple values used outside or if the header phi 308 // node is being used. In this case the user uses the value of the 309 // previous iteration, in which case we would loose "VF-1" iterations of 310 // the reduction operation if we vectorize. 311 if (ExitInstruction != nullptr || Cur == Phi) 312 return false; 313 314 // The instruction used by an outside user must be the last instruction 315 // before we feed back to the reduction phi. Otherwise, we loose VF-1 316 // operations on the value. 317 if (!is_contained(Phi->operands(), Cur)) 318 return false; 319 320 ExitInstruction = Cur; 321 continue; 322 } 323 324 // Process instructions only once (termination). Each reduction cycle 325 // value must only be used once, except by phi nodes and min/max 326 // reductions which are represented as a cmp followed by a select. 327 InstDesc IgnoredVal(false, nullptr); 328 if (VisitedInsts.insert(UI).second) { 329 if (isa<PHINode>(UI)) 330 PHIs.push_back(UI); 331 else 332 NonPHIs.push_back(UI); 333 } else if (!isa<PHINode>(UI) && 334 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && 335 !isa<SelectInst>(UI)) || 336 !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) 337 return false; 338 339 // Remember that we completed the cycle. 340 if (UI == Phi) 341 FoundStartPHI = true; 342 } 343 Worklist.append(PHIs.begin(), PHIs.end()); 344 Worklist.append(NonPHIs.begin(), NonPHIs.end()); 345 } 346 347 // This means we have seen one but not the other instruction of the 348 // pattern or more than just a select and cmp. 349 if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && 350 NumCmpSelectPatternInst != 2) 351 return false; 352 353 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) 354 return false; 355 356 // If we think Phi may have been type-promoted, we also need to ensure that 357 // all source operands of the reduction are either SExtInsts or ZEstInsts. If 358 // so, we will be able to evaluate the reduction in the narrower bit width. 359 if (Start != Phi) 360 if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType, 361 IsSigned, VisitedInsts, CastInsts)) 362 return false; 363 364 // We found a reduction var if we have reached the original phi node and we 365 // only have a single instruction with out-of-loop users. 366 367 // The ExitInstruction(Instruction which is allowed to have out-of-loop users) 368 // is saved as part of the RecurrenceDescriptor. 369 370 // Save the description of this reduction variable. 371 RecurrenceDescriptor RD( 372 RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), 373 ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); 374 RedDes = RD; 375 376 return true; 377 } 378 379 /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction 380 /// pattern corresponding to a min(X, Y) or max(X, Y). 381 RecurrenceDescriptor::InstDesc 382 RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { 383 384 assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && 385 "Expect a select instruction"); 386 Instruction *Cmp = nullptr; 387 SelectInst *Select = nullptr; 388 389 // We must handle the select(cmp()) as a single instruction. Advance to the 390 // select. 391 if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) { 392 if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin()))) 393 return InstDesc(false, I); 394 return InstDesc(Select, Prev.getMinMaxKind()); 395 } 396 397 // Only handle single use cases for now. 398 if (!(Select = dyn_cast<SelectInst>(I))) 399 return InstDesc(false, I); 400 if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && 401 !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) 402 return InstDesc(false, I); 403 if (!Cmp->hasOneUse()) 404 return InstDesc(false, I); 405 406 Value *CmpLeft; 407 Value *CmpRight; 408 409 // Look for a min/max pattern. 410 if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 411 return InstDesc(Select, MRK_UIntMin); 412 else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 413 return InstDesc(Select, MRK_UIntMax); 414 else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 415 return InstDesc(Select, MRK_SIntMax); 416 else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 417 return InstDesc(Select, MRK_SIntMin); 418 else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 419 return InstDesc(Select, MRK_FloatMin); 420 else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 421 return InstDesc(Select, MRK_FloatMax); 422 else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 423 return InstDesc(Select, MRK_FloatMin); 424 else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 425 return InstDesc(Select, MRK_FloatMax); 426 427 return InstDesc(false, I); 428 } 429 430 RecurrenceDescriptor::InstDesc 431 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, 432 InstDesc &Prev, bool HasFunNoNaNAttr) { 433 bool FP = I->getType()->isFloatingPointTy(); 434 Instruction *UAI = Prev.getUnsafeAlgebraInst(); 435 if (!UAI && FP && !I->hasUnsafeAlgebra()) 436 UAI = I; // Found an unsafe (unvectorizable) algebra instruction. 437 438 switch (I->getOpcode()) { 439 default: 440 return InstDesc(false, I); 441 case Instruction::PHI: 442 return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); 443 case Instruction::Sub: 444 case Instruction::Add: 445 return InstDesc(Kind == RK_IntegerAdd, I); 446 case Instruction::Mul: 447 return InstDesc(Kind == RK_IntegerMult, I); 448 case Instruction::And: 449 return InstDesc(Kind == RK_IntegerAnd, I); 450 case Instruction::Or: 451 return InstDesc(Kind == RK_IntegerOr, I); 452 case Instruction::Xor: 453 return InstDesc(Kind == RK_IntegerXor, I); 454 case Instruction::FMul: 455 return InstDesc(Kind == RK_FloatMult, I, UAI); 456 case Instruction::FSub: 457 case Instruction::FAdd: 458 return InstDesc(Kind == RK_FloatAdd, I, UAI); 459 case Instruction::FCmp: 460 case Instruction::ICmp: 461 case Instruction::Select: 462 if (Kind != RK_IntegerMinMax && 463 (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) 464 return InstDesc(false, I); 465 return isMinMaxSelectCmpPattern(I, Prev); 466 } 467 } 468 469 bool RecurrenceDescriptor::hasMultipleUsesOf( 470 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) { 471 unsigned NumUses = 0; 472 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; 473 ++Use) { 474 if (Insts.count(dyn_cast<Instruction>(*Use))) 475 ++NumUses; 476 if (NumUses > 1) 477 return true; 478 } 479 480 return false; 481 } 482 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, 483 RecurrenceDescriptor &RedDes) { 484 485 BasicBlock *Header = TheLoop->getHeader(); 486 Function &F = *Header->getParent(); 487 bool HasFunNoNaNAttr = 488 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 489 490 if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { 491 DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); 492 return true; 493 } 494 if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) { 495 DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); 496 return true; 497 } 498 if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) { 499 DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); 500 return true; 501 } 502 if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) { 503 DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); 504 return true; 505 } 506 if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) { 507 DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); 508 return true; 509 } 510 if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, 511 RedDes)) { 512 DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); 513 return true; 514 } 515 if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) { 516 DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); 517 return true; 518 } 519 if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { 520 DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); 521 return true; 522 } 523 if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) { 524 DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); 525 return true; 526 } 527 // Not a reduction of known type. 528 return false; 529 } 530 531 bool RecurrenceDescriptor::isFirstOrderRecurrence( 532 PHINode *Phi, Loop *TheLoop, 533 DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) { 534 535 // Ensure the phi node is in the loop header and has two incoming values. 536 if (Phi->getParent() != TheLoop->getHeader() || 537 Phi->getNumIncomingValues() != 2) 538 return false; 539 540 // Ensure the loop has a preheader and a single latch block. The loop 541 // vectorizer will need the latch to set up the next iteration of the loop. 542 auto *Preheader = TheLoop->getLoopPreheader(); 543 auto *Latch = TheLoop->getLoopLatch(); 544 if (!Preheader || !Latch) 545 return false; 546 547 // Ensure the phi node's incoming blocks are the loop preheader and latch. 548 if (Phi->getBasicBlockIndex(Preheader) < 0 || 549 Phi->getBasicBlockIndex(Latch) < 0) 550 return false; 551 552 // Get the previous value. The previous value comes from the latch edge while 553 // the initial value comes form the preheader edge. 554 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); 555 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) || 556 SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. 557 return false; 558 559 // Ensure every user of the phi node is dominated by the previous value. 560 // The dominance requirement ensures the loop vectorizer will not need to 561 // vectorize the initial value prior to the first iteration of the loop. 562 // TODO: Consider extending this sinking to handle other kinds of instructions 563 // and expressions, beyond sinking a single cast past Previous. 564 if (Phi->hasOneUse()) { 565 auto *I = Phi->user_back(); 566 if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() && 567 DT->dominates(Previous, I->user_back())) { 568 if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking. 569 SinkAfter[I] = Previous; 570 return true; 571 } 572 } 573 574 for (User *U : Phi->users()) 575 if (auto *I = dyn_cast<Instruction>(U)) { 576 if (!DT->dominates(Previous, I)) 577 return false; 578 } 579 580 return true; 581 } 582 583 /// This function returns the identity element (or neutral element) for 584 /// the operation K. 585 Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, 586 Type *Tp) { 587 switch (K) { 588 case RK_IntegerXor: 589 case RK_IntegerAdd: 590 case RK_IntegerOr: 591 // Adding, Xoring, Oring zero to a number does not change it. 592 return ConstantInt::get(Tp, 0); 593 case RK_IntegerMult: 594 // Multiplying a number by 1 does not change it. 595 return ConstantInt::get(Tp, 1); 596 case RK_IntegerAnd: 597 // AND-ing a number with an all-1 value does not change it. 598 return ConstantInt::get(Tp, -1, true); 599 case RK_FloatMult: 600 // Multiplying a number by 1 does not change it. 601 return ConstantFP::get(Tp, 1.0L); 602 case RK_FloatAdd: 603 // Adding zero to a number does not change it. 604 return ConstantFP::get(Tp, 0.0L); 605 default: 606 llvm_unreachable("Unknown recurrence kind"); 607 } 608 } 609 610 /// This function translates the recurrence kind to an LLVM binary operator. 611 unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { 612 switch (Kind) { 613 case RK_IntegerAdd: 614 return Instruction::Add; 615 case RK_IntegerMult: 616 return Instruction::Mul; 617 case RK_IntegerOr: 618 return Instruction::Or; 619 case RK_IntegerAnd: 620 return Instruction::And; 621 case RK_IntegerXor: 622 return Instruction::Xor; 623 case RK_FloatMult: 624 return Instruction::FMul; 625 case RK_FloatAdd: 626 return Instruction::FAdd; 627 case RK_IntegerMinMax: 628 return Instruction::ICmp; 629 case RK_FloatMinMax: 630 return Instruction::FCmp; 631 default: 632 llvm_unreachable("Unknown recurrence operation"); 633 } 634 } 635 636 Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, 637 MinMaxRecurrenceKind RK, 638 Value *Left, Value *Right) { 639 CmpInst::Predicate P = CmpInst::ICMP_NE; 640 switch (RK) { 641 default: 642 llvm_unreachable("Unknown min/max recurrence kind"); 643 case MRK_UIntMin: 644 P = CmpInst::ICMP_ULT; 645 break; 646 case MRK_UIntMax: 647 P = CmpInst::ICMP_UGT; 648 break; 649 case MRK_SIntMin: 650 P = CmpInst::ICMP_SLT; 651 break; 652 case MRK_SIntMax: 653 P = CmpInst::ICMP_SGT; 654 break; 655 case MRK_FloatMin: 656 P = CmpInst::FCMP_OLT; 657 break; 658 case MRK_FloatMax: 659 P = CmpInst::FCMP_OGT; 660 break; 661 } 662 663 // We only match FP sequences with unsafe algebra, so we can unconditionally 664 // set it on any generated instructions. 665 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 666 FastMathFlags FMF; 667 FMF.setUnsafeAlgebra(); 668 Builder.setFastMathFlags(FMF); 669 670 Value *Cmp; 671 if (RK == MRK_FloatMin || RK == MRK_FloatMax) 672 Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); 673 else 674 Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); 675 676 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); 677 return Select; 678 } 679 680 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, 681 const SCEV *Step, BinaryOperator *BOp) 682 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { 683 assert(IK != IK_NoInduction && "Not an induction"); 684 685 // Start value type should match the induction kind and the value 686 // itself should not be null. 687 assert(StartValue && "StartValue is null"); 688 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && 689 "StartValue is not a pointer for pointer induction"); 690 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && 691 "StartValue is not an integer for integer induction"); 692 693 // Check the Step Value. It should be non-zero integer value. 694 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && 695 "Step value is zero"); 696 697 assert((IK != IK_PtrInduction || getConstIntStepValue()) && 698 "Step value should be constant for pointer induction"); 699 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && 700 "StepValue is not an integer"); 701 702 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && 703 "StepValue is not FP for FpInduction"); 704 assert((IK != IK_FpInduction || (InductionBinOp && 705 (InductionBinOp->getOpcode() == Instruction::FAdd || 706 InductionBinOp->getOpcode() == Instruction::FSub))) && 707 "Binary opcode should be specified for FP induction"); 708 } 709 710 int InductionDescriptor::getConsecutiveDirection() const { 711 ConstantInt *ConstStep = getConstIntStepValue(); 712 if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) 713 return ConstStep->getSExtValue(); 714 return 0; 715 } 716 717 ConstantInt *InductionDescriptor::getConstIntStepValue() const { 718 if (isa<SCEVConstant>(Step)) 719 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); 720 return nullptr; 721 } 722 723 Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, 724 ScalarEvolution *SE, 725 const DataLayout& DL) const { 726 727 SCEVExpander Exp(*SE, DL, "induction"); 728 assert(Index->getType() == Step->getType() && 729 "Index type does not match StepValue type"); 730 switch (IK) { 731 case IK_IntInduction: { 732 assert(Index->getType() == StartValue->getType() && 733 "Index type does not match StartValue type"); 734 735 // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution 736 // and calculate (Start + Index * Step) for all cases, without 737 // special handling for "isOne" and "isMinusOne". 738 // But in the real life the result code getting worse. We mix SCEV 739 // expressions and ADD/SUB operations and receive redundant 740 // intermediate values being calculated in different ways and 741 // Instcombine is unable to reduce them all. 742 743 if (getConstIntStepValue() && 744 getConstIntStepValue()->isMinusOne()) 745 return B.CreateSub(StartValue, Index); 746 if (getConstIntStepValue() && 747 getConstIntStepValue()->isOne()) 748 return B.CreateAdd(StartValue, Index); 749 const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), 750 SE->getMulExpr(Step, SE->getSCEV(Index))); 751 return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); 752 } 753 case IK_PtrInduction: { 754 assert(isa<SCEVConstant>(Step) && 755 "Expected constant step for pointer induction"); 756 const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); 757 Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); 758 return B.CreateGEP(nullptr, StartValue, Index); 759 } 760 case IK_FpInduction: { 761 assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); 762 assert(InductionBinOp && 763 (InductionBinOp->getOpcode() == Instruction::FAdd || 764 InductionBinOp->getOpcode() == Instruction::FSub) && 765 "Original bin op should be defined for FP induction"); 766 767 Value *StepValue = cast<SCEVUnknown>(Step)->getValue(); 768 769 // Floating point operations had to be 'fast' to enable the induction. 770 FastMathFlags Flags; 771 Flags.setUnsafeAlgebra(); 772 773 Value *MulExp = B.CreateFMul(StepValue, Index); 774 if (isa<Instruction>(MulExp)) 775 // We have to check, the MulExp may be a constant. 776 cast<Instruction>(MulExp)->setFastMathFlags(Flags); 777 778 Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue, 779 MulExp, "induction"); 780 if (isa<Instruction>(BOp)) 781 cast<Instruction>(BOp)->setFastMathFlags(Flags); 782 783 return BOp; 784 } 785 case IK_NoInduction: 786 return nullptr; 787 } 788 llvm_unreachable("invalid enum"); 789 } 790 791 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, 792 ScalarEvolution *SE, 793 InductionDescriptor &D) { 794 795 // Here we only handle FP induction variables. 796 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); 797 798 if (TheLoop->getHeader() != Phi->getParent()) 799 return false; 800 801 // The loop may have multiple entrances or multiple exits; we can analyze 802 // this phi if it has a unique entry value and a unique backedge value. 803 if (Phi->getNumIncomingValues() != 2) 804 return false; 805 Value *BEValue = nullptr, *StartValue = nullptr; 806 if (TheLoop->contains(Phi->getIncomingBlock(0))) { 807 BEValue = Phi->getIncomingValue(0); 808 StartValue = Phi->getIncomingValue(1); 809 } else { 810 assert(TheLoop->contains(Phi->getIncomingBlock(1)) && 811 "Unexpected Phi node in the loop"); 812 BEValue = Phi->getIncomingValue(1); 813 StartValue = Phi->getIncomingValue(0); 814 } 815 816 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); 817 if (!BOp) 818 return false; 819 820 Value *Addend = nullptr; 821 if (BOp->getOpcode() == Instruction::FAdd) { 822 if (BOp->getOperand(0) == Phi) 823 Addend = BOp->getOperand(1); 824 else if (BOp->getOperand(1) == Phi) 825 Addend = BOp->getOperand(0); 826 } else if (BOp->getOpcode() == Instruction::FSub) 827 if (BOp->getOperand(0) == Phi) 828 Addend = BOp->getOperand(1); 829 830 if (!Addend) 831 return false; 832 833 // The addend should be loop invariant 834 if (auto *I = dyn_cast<Instruction>(Addend)) 835 if (TheLoop->contains(I)) 836 return false; 837 838 // FP Step has unknown SCEV 839 const SCEV *Step = SE->getUnknown(Addend); 840 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); 841 return true; 842 } 843 844 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 845 PredicatedScalarEvolution &PSE, 846 InductionDescriptor &D, 847 bool Assume) { 848 Type *PhiTy = Phi->getType(); 849 850 // Handle integer and pointer inductions variables. 851 // Now we handle also FP induction but not trying to make a 852 // recurrent expression from the PHI node in-place. 853 854 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && 855 !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) 856 return false; 857 858 if (PhiTy->isFloatingPointTy()) 859 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); 860 861 const SCEV *PhiScev = PSE.getSCEV(Phi); 862 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 863 864 // We need this expression to be an AddRecExpr. 865 if (Assume && !AR) 866 AR = PSE.getAsAddRec(Phi); 867 868 if (!AR) { 869 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 870 return false; 871 } 872 873 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); 874 } 875 876 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 877 ScalarEvolution *SE, 878 InductionDescriptor &D, 879 const SCEV *Expr) { 880 Type *PhiTy = Phi->getType(); 881 // We only handle integer and pointer inductions variables. 882 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) 883 return false; 884 885 // Check that the PHI is consecutive. 886 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); 887 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 888 889 if (!AR) { 890 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 891 return false; 892 } 893 894 if (AR->getLoop() != TheLoop) { 895 // FIXME: We should treat this as a uniform. Unfortunately, we 896 // don't currently know how to handled uniform PHIs. 897 DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); 898 return false; 899 } 900 901 Value *StartValue = 902 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); 903 const SCEV *Step = AR->getStepRecurrence(*SE); 904 // Calculate the pointer stride and check if it is consecutive. 905 // The stride may be a constant or a loop invariant integer value. 906 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); 907 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) 908 return false; 909 910 if (PhiTy->isIntegerTy()) { 911 D = InductionDescriptor(StartValue, IK_IntInduction, Step); 912 return true; 913 } 914 915 assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); 916 // Pointer induction should be a constant. 917 if (!ConstStep) 918 return false; 919 920 ConstantInt *CV = ConstStep->getValue(); 921 Type *PointerElementType = PhiTy->getPointerElementType(); 922 // The pointer stride cannot be determined if the pointer element type is not 923 // sized. 924 if (!PointerElementType->isSized()) 925 return false; 926 927 const DataLayout &DL = Phi->getModule()->getDataLayout(); 928 int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); 929 if (!Size) 930 return false; 931 932 int64_t CVSize = CV->getSExtValue(); 933 if (CVSize % Size) 934 return false; 935 auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, 936 true /* signed */); 937 D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); 938 return true; 939 } 940 941 bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, 942 bool PreserveLCSSA) { 943 bool Changed = false; 944 945 // We re-use a vector for the in-loop predecesosrs. 946 SmallVector<BasicBlock *, 4> InLoopPredecessors; 947 948 auto RewriteExit = [&](BasicBlock *BB) { 949 assert(InLoopPredecessors.empty() && 950 "Must start with an empty predecessors list!"); 951 auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); 952 953 // See if there are any non-loop predecessors of this exit block and 954 // keep track of the in-loop predecessors. 955 bool IsDedicatedExit = true; 956 for (auto *PredBB : predecessors(BB)) 957 if (L->contains(PredBB)) { 958 if (isa<IndirectBrInst>(PredBB->getTerminator())) 959 // We cannot rewrite exiting edges from an indirectbr. 960 return false; 961 962 InLoopPredecessors.push_back(PredBB); 963 } else { 964 IsDedicatedExit = false; 965 } 966 967 assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); 968 969 // Nothing to do if this is already a dedicated exit. 970 if (IsDedicatedExit) 971 return false; 972 973 auto *NewExitBB = SplitBlockPredecessors( 974 BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA); 975 976 if (!NewExitBB) 977 DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: " 978 << *L << "\n"); 979 else 980 DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " 981 << NewExitBB->getName() << "\n"); 982 return true; 983 }; 984 985 // Walk the exit blocks directly rather than building up a data structure for 986 // them, but only visit each one once. 987 SmallPtrSet<BasicBlock *, 4> Visited; 988 for (auto *BB : L->blocks()) 989 for (auto *SuccBB : successors(BB)) { 990 // We're looking for exit blocks so skip in-loop successors. 991 if (L->contains(SuccBB)) 992 continue; 993 994 // Visit each exit block exactly once. 995 if (!Visited.insert(SuccBB).second) 996 continue; 997 998 Changed |= RewriteExit(SuccBB); 999 } 1000 1001 return Changed; 1002 } 1003 1004 /// \brief Returns the instructions that use values defined in the loop. 1005 SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { 1006 SmallVector<Instruction *, 8> UsedOutside; 1007 1008 for (auto *Block : L->getBlocks()) 1009 // FIXME: I believe that this could use copy_if if the Inst reference could 1010 // be adapted into a pointer. 1011 for (auto &Inst : *Block) { 1012 auto Users = Inst.users(); 1013 if (any_of(Users, [&](User *U) { 1014 auto *Use = cast<Instruction>(U); 1015 return !L->contains(Use->getParent()); 1016 })) 1017 UsedOutside.push_back(&Inst); 1018 } 1019 1020 return UsedOutside; 1021 } 1022 1023 void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { 1024 // By definition, all loop passes need the LoopInfo analysis and the 1025 // Dominator tree it depends on. Because they all participate in the loop 1026 // pass manager, they must also preserve these. 1027 AU.addRequired<DominatorTreeWrapperPass>(); 1028 AU.addPreserved<DominatorTreeWrapperPass>(); 1029 AU.addRequired<LoopInfoWrapperPass>(); 1030 AU.addPreserved<LoopInfoWrapperPass>(); 1031 1032 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs 1033 // here because users shouldn't directly get them from this header. 1034 extern char &LoopSimplifyID; 1035 extern char &LCSSAID; 1036 AU.addRequiredID(LoopSimplifyID); 1037 AU.addPreservedID(LoopSimplifyID); 1038 AU.addRequiredID(LCSSAID); 1039 AU.addPreservedID(LCSSAID); 1040 // This is used in the LPPassManager to perform LCSSA verification on passes 1041 // which preserve lcssa form 1042 AU.addRequired<LCSSAVerificationPass>(); 1043 AU.addPreserved<LCSSAVerificationPass>(); 1044 1045 // Loop passes are designed to run inside of a loop pass manager which means 1046 // that any function analyses they require must be required by the first loop 1047 // pass in the manager (so that it is computed before the loop pass manager 1048 // runs) and preserved by all loop pasess in the manager. To make this 1049 // reasonably robust, the set needed for most loop passes is maintained here. 1050 // If your loop pass requires an analysis not listed here, you will need to 1051 // carefully audit the loop pass manager nesting structure that results. 1052 AU.addRequired<AAResultsWrapperPass>(); 1053 AU.addPreserved<AAResultsWrapperPass>(); 1054 AU.addPreserved<BasicAAWrapperPass>(); 1055 AU.addPreserved<GlobalsAAWrapperPass>(); 1056 AU.addPreserved<SCEVAAWrapperPass>(); 1057 AU.addRequired<ScalarEvolutionWrapperPass>(); 1058 AU.addPreserved<ScalarEvolutionWrapperPass>(); 1059 } 1060 1061 /// Manually defined generic "LoopPass" dependency initialization. This is used 1062 /// to initialize the exact set of passes from above in \c 1063 /// getLoopAnalysisUsage. It can be used within a loop pass's initialization 1064 /// with: 1065 /// 1066 /// INITIALIZE_PASS_DEPENDENCY(LoopPass) 1067 /// 1068 /// As-if "LoopPass" were a pass. 1069 void llvm::initializeLoopPassPass(PassRegistry &Registry) { 1070 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1071 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1072 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 1073 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 1074 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1075 INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) 1076 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 1077 INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) 1078 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 1079 } 1080 1081 /// \brief Find string metadata for loop 1082 /// 1083 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an 1084 /// operand or null otherwise. If the string metadata is not found return 1085 /// Optional's not-a-value. 1086 Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, 1087 StringRef Name) { 1088 MDNode *LoopID = TheLoop->getLoopID(); 1089 // Return none if LoopID is false. 1090 if (!LoopID) 1091 return None; 1092 1093 // First operand should refer to the loop id itself. 1094 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 1095 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 1096 1097 // Iterate over LoopID operands and look for MDString Metadata 1098 for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { 1099 MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); 1100 if (!MD) 1101 continue; 1102 MDString *S = dyn_cast<MDString>(MD->getOperand(0)); 1103 if (!S) 1104 continue; 1105 // Return true if MDString holds expected MetaData. 1106 if (Name.equals(S->getString())) 1107 switch (MD->getNumOperands()) { 1108 case 1: 1109 return nullptr; 1110 case 2: 1111 return &MD->getOperand(1); 1112 default: 1113 llvm_unreachable("loop metadata has 0 or 1 operand"); 1114 } 1115 } 1116 return None; 1117 } 1118 1119 /// Does a BFS from a given node to all of its children inside a given loop. 1120 /// The returned vector of nodes includes the starting point. 1121 SmallVector<DomTreeNode *, 16> 1122 llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { 1123 SmallVector<DomTreeNode *, 16> Worklist; 1124 auto AddRegionToWorklist = [&](DomTreeNode *DTN) { 1125 // Only include subregions in the top level loop. 1126 BasicBlock *BB = DTN->getBlock(); 1127 if (CurLoop->contains(BB)) 1128 Worklist.push_back(DTN); 1129 }; 1130 1131 AddRegionToWorklist(N); 1132 1133 for (size_t I = 0; I < Worklist.size(); I++) 1134 for (DomTreeNode *Child : Worklist[I]->getChildren()) 1135 AddRegionToWorklist(Child); 1136 1137 return Worklist; 1138 } 1139 1140 /// Returns true if the instruction in a loop is guaranteed to execute at least 1141 /// once. 1142 bool llvm::isGuaranteedToExecute(const Instruction &Inst, 1143 const DominatorTree *DT, const Loop *CurLoop, 1144 const LoopSafetyInfo *SafetyInfo) { 1145 // We have to check to make sure that the instruction dominates all 1146 // of the exit blocks. If it doesn't, then there is a path out of the loop 1147 // which does not execute this instruction, so we can't hoist it. 1148 1149 // If the instruction is in the header block for the loop (which is very 1150 // common), it is always guaranteed to dominate the exit blocks. Since this 1151 // is a common case, and can save some work, check it now. 1152 if (Inst.getParent() == CurLoop->getHeader()) 1153 // If there's a throw in the header block, we can't guarantee we'll reach 1154 // Inst. 1155 return !SafetyInfo->HeaderMayThrow; 1156 1157 // Somewhere in this loop there is an instruction which may throw and make us 1158 // exit the loop. 1159 if (SafetyInfo->MayThrow) 1160 return false; 1161 1162 // Get the exit blocks for the current loop. 1163 SmallVector<BasicBlock *, 8> ExitBlocks; 1164 CurLoop->getExitBlocks(ExitBlocks); 1165 1166 // Verify that the block dominates each of the exit blocks of the loop. 1167 for (BasicBlock *ExitBlock : ExitBlocks) 1168 if (!DT->dominates(Inst.getParent(), ExitBlock)) 1169 return false; 1170 1171 // As a degenerate case, if the loop is statically infinite then we haven't 1172 // proven anything since there are no exit blocks. 1173 if (ExitBlocks.empty()) 1174 return false; 1175 1176 // FIXME: In general, we have to prove that the loop isn't an infinite loop. 1177 // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is 1178 // just a special case of this.) 1179 return true; 1180 } 1181 1182 Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { 1183 // Only support loops with a unique exiting block, and a latch. 1184 if (!L->getExitingBlock()) 1185 return None; 1186 1187 // Get the branch weights for the the loop's backedge. 1188 BranchInst *LatchBR = 1189 dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator()); 1190 if (!LatchBR || LatchBR->getNumSuccessors() != 2) 1191 return None; 1192 1193 assert((LatchBR->getSuccessor(0) == L->getHeader() || 1194 LatchBR->getSuccessor(1) == L->getHeader()) && 1195 "At least one edge out of the latch must go to the header"); 1196 1197 // To estimate the number of times the loop body was executed, we want to 1198 // know the number of times the backedge was taken, vs. the number of times 1199 // we exited the loop. 1200 uint64_t TrueVal, FalseVal; 1201 if (!LatchBR->extractProfMetadata(TrueVal, FalseVal)) 1202 return None; 1203 1204 if (!TrueVal || !FalseVal) 1205 return 0; 1206 1207 // Divide the count of the backedge by the count of the edge exiting the loop, 1208 // rounding to nearest. 1209 if (LatchBR->getSuccessor(0) == L->getHeader()) 1210 return (TrueVal + (FalseVal / 2)) / FalseVal; 1211 else 1212 return (FalseVal + (TrueVal / 2)) / TrueVal; 1213 } 1214 1215 /// \brief Adds a 'fast' flag to floating point operations. 1216 static Value *addFastMathFlag(Value *V) { 1217 if (isa<FPMathOperator>(V)) { 1218 FastMathFlags Flags; 1219 Flags.setUnsafeAlgebra(); 1220 cast<Instruction>(V)->setFastMathFlags(Flags); 1221 } 1222 return V; 1223 } 1224 1225 // Helper to generate a log2 shuffle reduction. 1226 Value * 1227 llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, 1228 RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, 1229 ArrayRef<Value *> RedOps) { 1230 unsigned VF = Src->getType()->getVectorNumElements(); 1231 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles 1232 // and vector ops, reducing the set of values being computed by half each 1233 // round. 1234 assert(isPowerOf2_32(VF) && 1235 "Reduction emission only supported for pow2 vectors!"); 1236 Value *TmpVec = Src; 1237 SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); 1238 for (unsigned i = VF; i != 1; i >>= 1) { 1239 // Move the upper half of the vector to the lower half. 1240 for (unsigned j = 0; j != i / 2; ++j) 1241 ShuffleMask[j] = Builder.getInt32(i / 2 + j); 1242 1243 // Fill the rest of the mask with undef. 1244 std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), 1245 UndefValue::get(Builder.getInt32Ty())); 1246 1247 Value *Shuf = Builder.CreateShuffleVector( 1248 TmpVec, UndefValue::get(TmpVec->getType()), 1249 ConstantVector::get(ShuffleMask), "rdx.shuf"); 1250 1251 if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 1252 // Floating point operations had to be 'fast' to enable the reduction. 1253 TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, 1254 TmpVec, Shuf, "bin.rdx")); 1255 } else { 1256 assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && 1257 "Invalid min/max"); 1258 TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec, 1259 Shuf); 1260 } 1261 if (!RedOps.empty()) 1262 propagateIRFlags(TmpVec, RedOps); 1263 } 1264 // The result is in the first element of the vector. 1265 return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); 1266 } 1267 1268 /// Create a simple vector reduction specified by an opcode and some 1269 /// flags (if generating min/max reductions). 1270 Value *llvm::createSimpleTargetReduction( 1271 IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, 1272 Value *Src, TargetTransformInfo::ReductionFlags Flags, 1273 ArrayRef<Value *> RedOps) { 1274 assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); 1275 1276 Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); 1277 std::function<Value*()> BuildFunc; 1278 using RD = RecurrenceDescriptor; 1279 RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; 1280 // TODO: Support creating ordered reductions. 1281 FastMathFlags FMFUnsafe; 1282 FMFUnsafe.setUnsafeAlgebra(); 1283 1284 switch (Opcode) { 1285 case Instruction::Add: 1286 BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; 1287 break; 1288 case Instruction::Mul: 1289 BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; 1290 break; 1291 case Instruction::And: 1292 BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; 1293 break; 1294 case Instruction::Or: 1295 BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; 1296 break; 1297 case Instruction::Xor: 1298 BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; 1299 break; 1300 case Instruction::FAdd: 1301 BuildFunc = [&]() { 1302 auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); 1303 cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); 1304 return Rdx; 1305 }; 1306 break; 1307 case Instruction::FMul: 1308 BuildFunc = [&]() { 1309 auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); 1310 cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); 1311 return Rdx; 1312 }; 1313 break; 1314 case Instruction::ICmp: 1315 if (Flags.IsMaxOp) { 1316 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; 1317 BuildFunc = [&]() { 1318 return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); 1319 }; 1320 } else { 1321 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; 1322 BuildFunc = [&]() { 1323 return Builder.CreateIntMinReduce(Src, Flags.IsSigned); 1324 }; 1325 } 1326 break; 1327 case Instruction::FCmp: 1328 if (Flags.IsMaxOp) { 1329 MinMaxKind = RD::MRK_FloatMax; 1330 BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; 1331 } else { 1332 MinMaxKind = RD::MRK_FloatMin; 1333 BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; 1334 } 1335 break; 1336 default: 1337 llvm_unreachable("Unhandled opcode"); 1338 break; 1339 } 1340 if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) 1341 return BuildFunc(); 1342 return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); 1343 } 1344 1345 /// Create a vector reduction using a given recurrence descriptor. 1346 Value *llvm::createTargetReduction(IRBuilder<> &Builder, 1347 const TargetTransformInfo *TTI, 1348 RecurrenceDescriptor &Desc, Value *Src, 1349 bool NoNaN) { 1350 // TODO: Support in-order reductions based on the recurrence descriptor. 1351 RecurrenceDescriptor::RecurrenceKind RecKind = Desc.getRecurrenceKind(); 1352 TargetTransformInfo::ReductionFlags Flags; 1353 Flags.NoNaN = NoNaN; 1354 auto getSimpleRdx = [&](unsigned Opc) { 1355 return createSimpleTargetReduction(Builder, TTI, Opc, Src, Flags); 1356 }; 1357 switch (RecKind) { 1358 case RecurrenceDescriptor::RK_FloatAdd: 1359 return getSimpleRdx(Instruction::FAdd); 1360 case RecurrenceDescriptor::RK_FloatMult: 1361 return getSimpleRdx(Instruction::FMul); 1362 case RecurrenceDescriptor::RK_IntegerAdd: 1363 return getSimpleRdx(Instruction::Add); 1364 case RecurrenceDescriptor::RK_IntegerMult: 1365 return getSimpleRdx(Instruction::Mul); 1366 case RecurrenceDescriptor::RK_IntegerAnd: 1367 return getSimpleRdx(Instruction::And); 1368 case RecurrenceDescriptor::RK_IntegerOr: 1369 return getSimpleRdx(Instruction::Or); 1370 case RecurrenceDescriptor::RK_IntegerXor: 1371 return getSimpleRdx(Instruction::Xor); 1372 case RecurrenceDescriptor::RK_IntegerMinMax: { 1373 switch (Desc.getMinMaxRecurrenceKind()) { 1374 case RecurrenceDescriptor::MRK_SIntMax: 1375 Flags.IsSigned = true; 1376 Flags.IsMaxOp = true; 1377 break; 1378 case RecurrenceDescriptor::MRK_UIntMax: 1379 Flags.IsMaxOp = true; 1380 break; 1381 case RecurrenceDescriptor::MRK_SIntMin: 1382 Flags.IsSigned = true; 1383 break; 1384 case RecurrenceDescriptor::MRK_UIntMin: 1385 break; 1386 default: 1387 llvm_unreachable("Unhandled MRK"); 1388 } 1389 return getSimpleRdx(Instruction::ICmp); 1390 } 1391 case RecurrenceDescriptor::RK_FloatMinMax: { 1392 Flags.IsMaxOp = 1393 Desc.getMinMaxRecurrenceKind() == RecurrenceDescriptor::MRK_FloatMax; 1394 return getSimpleRdx(Instruction::FCmp); 1395 } 1396 default: 1397 llvm_unreachable("Unhandled RecKind"); 1398 } 1399 } 1400 1401 void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { 1402 auto *VecOp = dyn_cast<Instruction>(I); 1403 if (!VecOp) 1404 return; 1405 auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0]) 1406 : dyn_cast<Instruction>(OpValue); 1407 if (!Intersection) 1408 return; 1409 const unsigned Opcode = Intersection->getOpcode(); 1410 VecOp->copyIRFlags(Intersection); 1411 for (auto *V : VL) { 1412 auto *Instr = dyn_cast<Instruction>(V); 1413 if (!Instr) 1414 continue; 1415 if (OpValue == nullptr || Opcode == Instr->getOpcode()) 1416 VecOp->andIRFlags(V); 1417 } 1418 } 1419