1 //===-- LoopUtils.cpp - Loop Utility functions -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines common loop utility functions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Utils/LoopUtils.h" 15 #include "llvm/ADT/ScopeExit.h" 16 #include "llvm/Analysis/AliasAnalysis.h" 17 #include "llvm/Analysis/BasicAliasAnalysis.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/LoopInfo.h" 20 #include "llvm/Analysis/LoopPass.h" 21 #include "llvm/Analysis/ScalarEvolution.h" 22 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 23 #include "llvm/Analysis/ScalarEvolutionExpander.h" 24 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 25 #include "llvm/Analysis/TargetTransformInfo.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Instructions.h" 28 #include "llvm/IR/Module.h" 29 #include "llvm/IR/PatternMatch.h" 30 #include "llvm/IR/ValueHandle.h" 31 #include "llvm/Pass.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 34 35 using namespace llvm; 36 using namespace llvm::PatternMatch; 37 38 #define DEBUG_TYPE "loop-utils" 39 40 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, 41 SmallPtrSetImpl<Instruction *> &Set) { 42 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) 43 if (!Set.count(dyn_cast<Instruction>(*Use))) 44 return false; 45 return true; 46 } 47 48 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { 49 switch (Kind) { 50 default: 51 break; 52 case RK_IntegerAdd: 53 case RK_IntegerMult: 54 case RK_IntegerOr: 55 case RK_IntegerAnd: 56 case RK_IntegerXor: 57 case RK_IntegerMinMax: 58 return true; 59 } 60 return false; 61 } 62 63 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { 64 return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); 65 } 66 67 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { 68 switch (Kind) { 69 default: 70 break; 71 case RK_IntegerAdd: 72 case RK_IntegerMult: 73 case RK_FloatAdd: 74 case RK_FloatMult: 75 return true; 76 } 77 return false; 78 } 79 80 Instruction * 81 RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT, 82 SmallPtrSetImpl<Instruction *> &Visited, 83 SmallPtrSetImpl<Instruction *> &CI) { 84 if (!Phi->hasOneUse()) 85 return Phi; 86 87 const APInt *M = nullptr; 88 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); 89 90 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT 91 // with a new integer type of the corresponding bit width. 92 if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { 93 int32_t Bits = (*M + 1).exactLogBase2(); 94 if (Bits > 0) { 95 RT = IntegerType::get(Phi->getContext(), Bits); 96 Visited.insert(Phi); 97 CI.insert(J); 98 return J; 99 } 100 } 101 return Phi; 102 } 103 104 bool RecurrenceDescriptor::getSourceExtensionKind( 105 Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned, 106 SmallPtrSetImpl<Instruction *> &Visited, 107 SmallPtrSetImpl<Instruction *> &CI) { 108 109 SmallVector<Instruction *, 8> Worklist; 110 bool FoundOneOperand = false; 111 unsigned DstSize = RT->getPrimitiveSizeInBits(); 112 Worklist.push_back(Exit); 113 114 // Traverse the instructions in the reduction expression, beginning with the 115 // exit value. 116 while (!Worklist.empty()) { 117 Instruction *I = Worklist.pop_back_val(); 118 for (Use &U : I->operands()) { 119 120 // Terminate the traversal if the operand is not an instruction, or we 121 // reach the starting value. 122 Instruction *J = dyn_cast<Instruction>(U.get()); 123 if (!J || J == Start) 124 continue; 125 126 // Otherwise, investigate the operation if it is also in the expression. 127 if (Visited.count(J)) { 128 Worklist.push_back(J); 129 continue; 130 } 131 132 // If the operand is not in Visited, it is not a reduction operation, but 133 // it does feed into one. Make sure it is either a single-use sign- or 134 // zero-extend instruction. 135 CastInst *Cast = dyn_cast<CastInst>(J); 136 bool IsSExtInst = isa<SExtInst>(J); 137 if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst)) 138 return false; 139 140 // Ensure the source type of the extend is no larger than the reduction 141 // type. It is not necessary for the types to be identical. 142 unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); 143 if (SrcSize > DstSize) 144 return false; 145 146 // Furthermore, ensure that all such extends are of the same kind. 147 if (FoundOneOperand) { 148 if (IsSigned != IsSExtInst) 149 return false; 150 } else { 151 FoundOneOperand = true; 152 IsSigned = IsSExtInst; 153 } 154 155 // Lastly, if the source type of the extend matches the reduction type, 156 // add the extend to CI so that we can avoid accounting for it in the 157 // cost model. 158 if (SrcSize == DstSize) 159 CI.insert(Cast); 160 } 161 } 162 return true; 163 } 164 165 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, 166 Loop *TheLoop, bool HasFunNoNaNAttr, 167 RecurrenceDescriptor &RedDes) { 168 if (Phi->getNumIncomingValues() != 2) 169 return false; 170 171 // Reduction variables are only found in the loop header block. 172 if (Phi->getParent() != TheLoop->getHeader()) 173 return false; 174 175 // Obtain the reduction start value from the value that comes from the loop 176 // preheader. 177 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); 178 179 // ExitInstruction is the single value which is used outside the loop. 180 // We only allow for a single reduction value to be used outside the loop. 181 // This includes users of the reduction, variables (which form a cycle 182 // which ends in the phi node). 183 Instruction *ExitInstruction = nullptr; 184 // Indicates that we found a reduction operation in our scan. 185 bool FoundReduxOp = false; 186 187 // We start with the PHI node and scan for all of the users of this 188 // instruction. All users must be instructions that can be used as reduction 189 // variables (such as ADD). We must have a single out-of-block user. The cycle 190 // must include the original PHI. 191 bool FoundStartPHI = false; 192 193 // To recognize min/max patterns formed by a icmp select sequence, we store 194 // the number of instruction we saw from the recognized min/max pattern, 195 // to make sure we only see exactly the two instructions. 196 unsigned NumCmpSelectPatternInst = 0; 197 InstDesc ReduxDesc(false, nullptr); 198 199 // Data used for determining if the recurrence has been type-promoted. 200 Type *RecurrenceType = Phi->getType(); 201 SmallPtrSet<Instruction *, 4> CastInsts; 202 Instruction *Start = Phi; 203 bool IsSigned = false; 204 205 SmallPtrSet<Instruction *, 8> VisitedInsts; 206 SmallVector<Instruction *, 8> Worklist; 207 208 // Return early if the recurrence kind does not match the type of Phi. If the 209 // recurrence kind is arithmetic, we attempt to look through AND operations 210 // resulting from the type promotion performed by InstCombine. Vector 211 // operations are not limited to the legal integer widths, so we may be able 212 // to evaluate the reduction in the narrower width. 213 if (RecurrenceType->isFloatingPointTy()) { 214 if (!isFloatingPointRecurrenceKind(Kind)) 215 return false; 216 } else { 217 if (!isIntegerRecurrenceKind(Kind)) 218 return false; 219 if (isArithmeticRecurrenceKind(Kind)) 220 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); 221 } 222 223 Worklist.push_back(Start); 224 VisitedInsts.insert(Start); 225 226 // A value in the reduction can be used: 227 // - By the reduction: 228 // - Reduction operation: 229 // - One use of reduction value (safe). 230 // - Multiple use of reduction value (not safe). 231 // - PHI: 232 // - All uses of the PHI must be the reduction (safe). 233 // - Otherwise, not safe. 234 // - By instructions outside of the loop (safe). 235 // * One value may have several outside users, but all outside 236 // uses must be of the same value. 237 // - By an instruction that is not part of the reduction (not safe). 238 // This is either: 239 // * An instruction type other than PHI or the reduction operation. 240 // * A PHI in the header other than the initial PHI. 241 while (!Worklist.empty()) { 242 Instruction *Cur = Worklist.back(); 243 Worklist.pop_back(); 244 245 // No Users. 246 // If the instruction has no users then this is a broken chain and can't be 247 // a reduction variable. 248 if (Cur->use_empty()) 249 return false; 250 251 bool IsAPhi = isa<PHINode>(Cur); 252 253 // A header PHI use other than the original PHI. 254 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) 255 return false; 256 257 // Reductions of instructions such as Div, and Sub is only possible if the 258 // LHS is the reduction variable. 259 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && 260 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && 261 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) 262 return false; 263 264 // Any reduction instruction must be of one of the allowed kinds. We ignore 265 // the starting value (the Phi or an AND instruction if the Phi has been 266 // type-promoted). 267 if (Cur != Start) { 268 ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); 269 if (!ReduxDesc.isRecurrence()) 270 return false; 271 } 272 273 // A reduction operation must only have one use of the reduction value. 274 if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && 275 hasMultipleUsesOf(Cur, VisitedInsts)) 276 return false; 277 278 // All inputs to a PHI node must be a reduction value. 279 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) 280 return false; 281 282 if (Kind == RK_IntegerMinMax && 283 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) 284 ++NumCmpSelectPatternInst; 285 if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) 286 ++NumCmpSelectPatternInst; 287 288 // Check whether we found a reduction operator. 289 FoundReduxOp |= !IsAPhi && Cur != Start; 290 291 // Process users of current instruction. Push non-PHI nodes after PHI nodes 292 // onto the stack. This way we are going to have seen all inputs to PHI 293 // nodes once we get to them. 294 SmallVector<Instruction *, 8> NonPHIs; 295 SmallVector<Instruction *, 8> PHIs; 296 for (User *U : Cur->users()) { 297 Instruction *UI = cast<Instruction>(U); 298 299 // Check if we found the exit user. 300 BasicBlock *Parent = UI->getParent(); 301 if (!TheLoop->contains(Parent)) { 302 // If we already know this instruction is used externally, move on to 303 // the next user. 304 if (ExitInstruction == Cur) 305 continue; 306 307 // Exit if you find multiple values used outside or if the header phi 308 // node is being used. In this case the user uses the value of the 309 // previous iteration, in which case we would loose "VF-1" iterations of 310 // the reduction operation if we vectorize. 311 if (ExitInstruction != nullptr || Cur == Phi) 312 return false; 313 314 // The instruction used by an outside user must be the last instruction 315 // before we feed back to the reduction phi. Otherwise, we loose VF-1 316 // operations on the value. 317 if (!is_contained(Phi->operands(), Cur)) 318 return false; 319 320 ExitInstruction = Cur; 321 continue; 322 } 323 324 // Process instructions only once (termination). Each reduction cycle 325 // value must only be used once, except by phi nodes and min/max 326 // reductions which are represented as a cmp followed by a select. 327 InstDesc IgnoredVal(false, nullptr); 328 if (VisitedInsts.insert(UI).second) { 329 if (isa<PHINode>(UI)) 330 PHIs.push_back(UI); 331 else 332 NonPHIs.push_back(UI); 333 } else if (!isa<PHINode>(UI) && 334 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && 335 !isa<SelectInst>(UI)) || 336 !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) 337 return false; 338 339 // Remember that we completed the cycle. 340 if (UI == Phi) 341 FoundStartPHI = true; 342 } 343 Worklist.append(PHIs.begin(), PHIs.end()); 344 Worklist.append(NonPHIs.begin(), NonPHIs.end()); 345 } 346 347 // This means we have seen one but not the other instruction of the 348 // pattern or more than just a select and cmp. 349 if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && 350 NumCmpSelectPatternInst != 2) 351 return false; 352 353 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) 354 return false; 355 356 // If we think Phi may have been type-promoted, we also need to ensure that 357 // all source operands of the reduction are either SExtInsts or ZEstInsts. If 358 // so, we will be able to evaluate the reduction in the narrower bit width. 359 if (Start != Phi) 360 if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType, 361 IsSigned, VisitedInsts, CastInsts)) 362 return false; 363 364 // We found a reduction var if we have reached the original phi node and we 365 // only have a single instruction with out-of-loop users. 366 367 // The ExitInstruction(Instruction which is allowed to have out-of-loop users) 368 // is saved as part of the RecurrenceDescriptor. 369 370 // Save the description of this reduction variable. 371 RecurrenceDescriptor RD( 372 RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), 373 ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); 374 RedDes = RD; 375 376 return true; 377 } 378 379 /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction 380 /// pattern corresponding to a min(X, Y) or max(X, Y). 381 RecurrenceDescriptor::InstDesc 382 RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { 383 384 assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && 385 "Expect a select instruction"); 386 Instruction *Cmp = nullptr; 387 SelectInst *Select = nullptr; 388 389 // We must handle the select(cmp()) as a single instruction. Advance to the 390 // select. 391 if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) { 392 if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin()))) 393 return InstDesc(false, I); 394 return InstDesc(Select, Prev.getMinMaxKind()); 395 } 396 397 // Only handle single use cases for now. 398 if (!(Select = dyn_cast<SelectInst>(I))) 399 return InstDesc(false, I); 400 if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && 401 !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) 402 return InstDesc(false, I); 403 if (!Cmp->hasOneUse()) 404 return InstDesc(false, I); 405 406 Value *CmpLeft; 407 Value *CmpRight; 408 409 // Look for a min/max pattern. 410 if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 411 return InstDesc(Select, MRK_UIntMin); 412 else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 413 return InstDesc(Select, MRK_UIntMax); 414 else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 415 return InstDesc(Select, MRK_SIntMax); 416 else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 417 return InstDesc(Select, MRK_SIntMin); 418 else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 419 return InstDesc(Select, MRK_FloatMin); 420 else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 421 return InstDesc(Select, MRK_FloatMax); 422 else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 423 return InstDesc(Select, MRK_FloatMin); 424 else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 425 return InstDesc(Select, MRK_FloatMax); 426 427 return InstDesc(false, I); 428 } 429 430 RecurrenceDescriptor::InstDesc 431 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, 432 InstDesc &Prev, bool HasFunNoNaNAttr) { 433 bool FP = I->getType()->isFloatingPointTy(); 434 Instruction *UAI = Prev.getUnsafeAlgebraInst(); 435 if (!UAI && FP && !I->hasUnsafeAlgebra()) 436 UAI = I; // Found an unsafe (unvectorizable) algebra instruction. 437 438 switch (I->getOpcode()) { 439 default: 440 return InstDesc(false, I); 441 case Instruction::PHI: 442 return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); 443 case Instruction::Sub: 444 case Instruction::Add: 445 return InstDesc(Kind == RK_IntegerAdd, I); 446 case Instruction::Mul: 447 return InstDesc(Kind == RK_IntegerMult, I); 448 case Instruction::And: 449 return InstDesc(Kind == RK_IntegerAnd, I); 450 case Instruction::Or: 451 return InstDesc(Kind == RK_IntegerOr, I); 452 case Instruction::Xor: 453 return InstDesc(Kind == RK_IntegerXor, I); 454 case Instruction::FMul: 455 return InstDesc(Kind == RK_FloatMult, I, UAI); 456 case Instruction::FSub: 457 case Instruction::FAdd: 458 return InstDesc(Kind == RK_FloatAdd, I, UAI); 459 case Instruction::FCmp: 460 case Instruction::ICmp: 461 case Instruction::Select: 462 if (Kind != RK_IntegerMinMax && 463 (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) 464 return InstDesc(false, I); 465 return isMinMaxSelectCmpPattern(I, Prev); 466 } 467 } 468 469 bool RecurrenceDescriptor::hasMultipleUsesOf( 470 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) { 471 unsigned NumUses = 0; 472 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; 473 ++Use) { 474 if (Insts.count(dyn_cast<Instruction>(*Use))) 475 ++NumUses; 476 if (NumUses > 1) 477 return true; 478 } 479 480 return false; 481 } 482 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, 483 RecurrenceDescriptor &RedDes) { 484 485 BasicBlock *Header = TheLoop->getHeader(); 486 Function &F = *Header->getParent(); 487 bool HasFunNoNaNAttr = 488 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 489 490 if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { 491 DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); 492 return true; 493 } 494 if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) { 495 DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); 496 return true; 497 } 498 if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) { 499 DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); 500 return true; 501 } 502 if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) { 503 DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); 504 return true; 505 } 506 if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) { 507 DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); 508 return true; 509 } 510 if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, 511 RedDes)) { 512 DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); 513 return true; 514 } 515 if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) { 516 DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); 517 return true; 518 } 519 if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { 520 DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); 521 return true; 522 } 523 if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) { 524 DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); 525 return true; 526 } 527 // Not a reduction of known type. 528 return false; 529 } 530 531 bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, 532 DominatorTree *DT) { 533 534 // Ensure the phi node is in the loop header and has two incoming values. 535 if (Phi->getParent() != TheLoop->getHeader() || 536 Phi->getNumIncomingValues() != 2) 537 return false; 538 539 // Ensure the loop has a preheader and a single latch block. The loop 540 // vectorizer will need the latch to set up the next iteration of the loop. 541 auto *Preheader = TheLoop->getLoopPreheader(); 542 auto *Latch = TheLoop->getLoopLatch(); 543 if (!Preheader || !Latch) 544 return false; 545 546 // Ensure the phi node's incoming blocks are the loop preheader and latch. 547 if (Phi->getBasicBlockIndex(Preheader) < 0 || 548 Phi->getBasicBlockIndex(Latch) < 0) 549 return false; 550 551 // Get the previous value. The previous value comes from the latch edge while 552 // the initial value comes form the preheader edge. 553 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); 554 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous)) 555 return false; 556 557 // Ensure every user of the phi node is dominated by the previous value. 558 // The dominance requirement ensures the loop vectorizer will not need to 559 // vectorize the initial value prior to the first iteration of the loop. 560 for (User *U : Phi->users()) 561 if (auto *I = dyn_cast<Instruction>(U)) { 562 if (!DT->dominates(Previous, I)) 563 return false; 564 } 565 566 return true; 567 } 568 569 /// This function returns the identity element (or neutral element) for 570 /// the operation K. 571 Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, 572 Type *Tp) { 573 switch (K) { 574 case RK_IntegerXor: 575 case RK_IntegerAdd: 576 case RK_IntegerOr: 577 // Adding, Xoring, Oring zero to a number does not change it. 578 return ConstantInt::get(Tp, 0); 579 case RK_IntegerMult: 580 // Multiplying a number by 1 does not change it. 581 return ConstantInt::get(Tp, 1); 582 case RK_IntegerAnd: 583 // AND-ing a number with an all-1 value does not change it. 584 return ConstantInt::get(Tp, -1, true); 585 case RK_FloatMult: 586 // Multiplying a number by 1 does not change it. 587 return ConstantFP::get(Tp, 1.0L); 588 case RK_FloatAdd: 589 // Adding zero to a number does not change it. 590 return ConstantFP::get(Tp, 0.0L); 591 default: 592 llvm_unreachable("Unknown recurrence kind"); 593 } 594 } 595 596 /// This function translates the recurrence kind to an LLVM binary operator. 597 unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { 598 switch (Kind) { 599 case RK_IntegerAdd: 600 return Instruction::Add; 601 case RK_IntegerMult: 602 return Instruction::Mul; 603 case RK_IntegerOr: 604 return Instruction::Or; 605 case RK_IntegerAnd: 606 return Instruction::And; 607 case RK_IntegerXor: 608 return Instruction::Xor; 609 case RK_FloatMult: 610 return Instruction::FMul; 611 case RK_FloatAdd: 612 return Instruction::FAdd; 613 case RK_IntegerMinMax: 614 return Instruction::ICmp; 615 case RK_FloatMinMax: 616 return Instruction::FCmp; 617 default: 618 llvm_unreachable("Unknown recurrence operation"); 619 } 620 } 621 622 Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, 623 MinMaxRecurrenceKind RK, 624 Value *Left, Value *Right) { 625 CmpInst::Predicate P = CmpInst::ICMP_NE; 626 switch (RK) { 627 default: 628 llvm_unreachable("Unknown min/max recurrence kind"); 629 case MRK_UIntMin: 630 P = CmpInst::ICMP_ULT; 631 break; 632 case MRK_UIntMax: 633 P = CmpInst::ICMP_UGT; 634 break; 635 case MRK_SIntMin: 636 P = CmpInst::ICMP_SLT; 637 break; 638 case MRK_SIntMax: 639 P = CmpInst::ICMP_SGT; 640 break; 641 case MRK_FloatMin: 642 P = CmpInst::FCMP_OLT; 643 break; 644 case MRK_FloatMax: 645 P = CmpInst::FCMP_OGT; 646 break; 647 } 648 649 // We only match FP sequences with unsafe algebra, so we can unconditionally 650 // set it on any generated instructions. 651 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 652 FastMathFlags FMF; 653 FMF.setUnsafeAlgebra(); 654 Builder.setFastMathFlags(FMF); 655 656 Value *Cmp; 657 if (RK == MRK_FloatMin || RK == MRK_FloatMax) 658 Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); 659 else 660 Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); 661 662 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); 663 return Select; 664 } 665 666 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, 667 const SCEV *Step, BinaryOperator *BOp) 668 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { 669 assert(IK != IK_NoInduction && "Not an induction"); 670 671 // Start value type should match the induction kind and the value 672 // itself should not be null. 673 assert(StartValue && "StartValue is null"); 674 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && 675 "StartValue is not a pointer for pointer induction"); 676 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && 677 "StartValue is not an integer for integer induction"); 678 679 // Check the Step Value. It should be non-zero integer value. 680 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && 681 "Step value is zero"); 682 683 assert((IK != IK_PtrInduction || getConstIntStepValue()) && 684 "Step value should be constant for pointer induction"); 685 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && 686 "StepValue is not an integer"); 687 688 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && 689 "StepValue is not FP for FpInduction"); 690 assert((IK != IK_FpInduction || (InductionBinOp && 691 (InductionBinOp->getOpcode() == Instruction::FAdd || 692 InductionBinOp->getOpcode() == Instruction::FSub))) && 693 "Binary opcode should be specified for FP induction"); 694 } 695 696 int InductionDescriptor::getConsecutiveDirection() const { 697 ConstantInt *ConstStep = getConstIntStepValue(); 698 if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) 699 return ConstStep->getSExtValue(); 700 return 0; 701 } 702 703 ConstantInt *InductionDescriptor::getConstIntStepValue() const { 704 if (isa<SCEVConstant>(Step)) 705 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); 706 return nullptr; 707 } 708 709 Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, 710 ScalarEvolution *SE, 711 const DataLayout& DL) const { 712 713 SCEVExpander Exp(*SE, DL, "induction"); 714 assert(Index->getType() == Step->getType() && 715 "Index type does not match StepValue type"); 716 switch (IK) { 717 case IK_IntInduction: { 718 assert(Index->getType() == StartValue->getType() && 719 "Index type does not match StartValue type"); 720 721 // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution 722 // and calculate (Start + Index * Step) for all cases, without 723 // special handling for "isOne" and "isMinusOne". 724 // But in the real life the result code getting worse. We mix SCEV 725 // expressions and ADD/SUB operations and receive redundant 726 // intermediate values being calculated in different ways and 727 // Instcombine is unable to reduce them all. 728 729 if (getConstIntStepValue() && 730 getConstIntStepValue()->isMinusOne()) 731 return B.CreateSub(StartValue, Index); 732 if (getConstIntStepValue() && 733 getConstIntStepValue()->isOne()) 734 return B.CreateAdd(StartValue, Index); 735 const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), 736 SE->getMulExpr(Step, SE->getSCEV(Index))); 737 return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); 738 } 739 case IK_PtrInduction: { 740 assert(isa<SCEVConstant>(Step) && 741 "Expected constant step for pointer induction"); 742 const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); 743 Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); 744 return B.CreateGEP(nullptr, StartValue, Index); 745 } 746 case IK_FpInduction: { 747 assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); 748 assert(InductionBinOp && 749 (InductionBinOp->getOpcode() == Instruction::FAdd || 750 InductionBinOp->getOpcode() == Instruction::FSub) && 751 "Original bin op should be defined for FP induction"); 752 753 Value *StepValue = cast<SCEVUnknown>(Step)->getValue(); 754 755 // Floating point operations had to be 'fast' to enable the induction. 756 FastMathFlags Flags; 757 Flags.setUnsafeAlgebra(); 758 759 Value *MulExp = B.CreateFMul(StepValue, Index); 760 if (isa<Instruction>(MulExp)) 761 // We have to check, the MulExp may be a constant. 762 cast<Instruction>(MulExp)->setFastMathFlags(Flags); 763 764 Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue, 765 MulExp, "induction"); 766 if (isa<Instruction>(BOp)) 767 cast<Instruction>(BOp)->setFastMathFlags(Flags); 768 769 return BOp; 770 } 771 case IK_NoInduction: 772 return nullptr; 773 } 774 llvm_unreachable("invalid enum"); 775 } 776 777 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, 778 ScalarEvolution *SE, 779 InductionDescriptor &D) { 780 781 // Here we only handle FP induction variables. 782 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); 783 784 if (TheLoop->getHeader() != Phi->getParent()) 785 return false; 786 787 // The loop may have multiple entrances or multiple exits; we can analyze 788 // this phi if it has a unique entry value and a unique backedge value. 789 if (Phi->getNumIncomingValues() != 2) 790 return false; 791 Value *BEValue = nullptr, *StartValue = nullptr; 792 if (TheLoop->contains(Phi->getIncomingBlock(0))) { 793 BEValue = Phi->getIncomingValue(0); 794 StartValue = Phi->getIncomingValue(1); 795 } else { 796 assert(TheLoop->contains(Phi->getIncomingBlock(1)) && 797 "Unexpected Phi node in the loop"); 798 BEValue = Phi->getIncomingValue(1); 799 StartValue = Phi->getIncomingValue(0); 800 } 801 802 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); 803 if (!BOp) 804 return false; 805 806 Value *Addend = nullptr; 807 if (BOp->getOpcode() == Instruction::FAdd) { 808 if (BOp->getOperand(0) == Phi) 809 Addend = BOp->getOperand(1); 810 else if (BOp->getOperand(1) == Phi) 811 Addend = BOp->getOperand(0); 812 } else if (BOp->getOpcode() == Instruction::FSub) 813 if (BOp->getOperand(0) == Phi) 814 Addend = BOp->getOperand(1); 815 816 if (!Addend) 817 return false; 818 819 // The addend should be loop invariant 820 if (auto *I = dyn_cast<Instruction>(Addend)) 821 if (TheLoop->contains(I)) 822 return false; 823 824 // FP Step has unknown SCEV 825 const SCEV *Step = SE->getUnknown(Addend); 826 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); 827 return true; 828 } 829 830 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 831 PredicatedScalarEvolution &PSE, 832 InductionDescriptor &D, 833 bool Assume) { 834 Type *PhiTy = Phi->getType(); 835 836 // Handle integer and pointer inductions variables. 837 // Now we handle also FP induction but not trying to make a 838 // recurrent expression from the PHI node in-place. 839 840 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && 841 !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) 842 return false; 843 844 if (PhiTy->isFloatingPointTy()) 845 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); 846 847 const SCEV *PhiScev = PSE.getSCEV(Phi); 848 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 849 850 // We need this expression to be an AddRecExpr. 851 if (Assume && !AR) 852 AR = PSE.getAsAddRec(Phi); 853 854 if (!AR) { 855 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 856 return false; 857 } 858 859 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); 860 } 861 862 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 863 ScalarEvolution *SE, 864 InductionDescriptor &D, 865 const SCEV *Expr) { 866 Type *PhiTy = Phi->getType(); 867 // We only handle integer and pointer inductions variables. 868 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) 869 return false; 870 871 // Check that the PHI is consecutive. 872 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); 873 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 874 875 if (!AR) { 876 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 877 return false; 878 } 879 880 if (AR->getLoop() != TheLoop) { 881 // FIXME: We should treat this as a uniform. Unfortunately, we 882 // don't currently know how to handled uniform PHIs. 883 DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); 884 return false; 885 } 886 887 Value *StartValue = 888 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); 889 const SCEV *Step = AR->getStepRecurrence(*SE); 890 // Calculate the pointer stride and check if it is consecutive. 891 // The stride may be a constant or a loop invariant integer value. 892 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); 893 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) 894 return false; 895 896 if (PhiTy->isIntegerTy()) { 897 D = InductionDescriptor(StartValue, IK_IntInduction, Step); 898 return true; 899 } 900 901 assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); 902 // Pointer induction should be a constant. 903 if (!ConstStep) 904 return false; 905 906 ConstantInt *CV = ConstStep->getValue(); 907 Type *PointerElementType = PhiTy->getPointerElementType(); 908 // The pointer stride cannot be determined if the pointer element type is not 909 // sized. 910 if (!PointerElementType->isSized()) 911 return false; 912 913 const DataLayout &DL = Phi->getModule()->getDataLayout(); 914 int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); 915 if (!Size) 916 return false; 917 918 int64_t CVSize = CV->getSExtValue(); 919 if (CVSize % Size) 920 return false; 921 auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, 922 true /* signed */); 923 D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); 924 return true; 925 } 926 927 bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, 928 bool PreserveLCSSA) { 929 bool Changed = false; 930 931 // We re-use a vector for the in-loop predecesosrs. 932 SmallVector<BasicBlock *, 4> InLoopPredecessors; 933 934 auto RewriteExit = [&](BasicBlock *BB) { 935 assert(InLoopPredecessors.empty() && 936 "Must start with an empty predecessors list!"); 937 auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); 938 939 // See if there are any non-loop predecessors of this exit block and 940 // keep track of the in-loop predecessors. 941 bool IsDedicatedExit = true; 942 for (auto *PredBB : predecessors(BB)) 943 if (L->contains(PredBB)) { 944 if (isa<IndirectBrInst>(PredBB->getTerminator())) 945 // We cannot rewrite exiting edges from an indirectbr. 946 return false; 947 948 InLoopPredecessors.push_back(PredBB); 949 } else { 950 IsDedicatedExit = false; 951 } 952 953 assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); 954 955 // Nothing to do if this is already a dedicated exit. 956 if (IsDedicatedExit) 957 return false; 958 959 auto *NewExitBB = SplitBlockPredecessors( 960 BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA); 961 962 if (!NewExitBB) 963 DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: " 964 << *L << "\n"); 965 else 966 DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " 967 << NewExitBB->getName() << "\n"); 968 return true; 969 }; 970 971 // Walk the exit blocks directly rather than building up a data structure for 972 // them, but only visit each one once. 973 SmallPtrSet<BasicBlock *, 4> Visited; 974 for (auto *BB : L->blocks()) 975 for (auto *SuccBB : successors(BB)) { 976 // We're looking for exit blocks so skip in-loop successors. 977 if (L->contains(SuccBB)) 978 continue; 979 980 // Visit each exit block exactly once. 981 if (!Visited.insert(SuccBB).second) 982 continue; 983 984 Changed |= RewriteExit(SuccBB); 985 } 986 987 return Changed; 988 } 989 990 /// \brief Returns the instructions that use values defined in the loop. 991 SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { 992 SmallVector<Instruction *, 8> UsedOutside; 993 994 for (auto *Block : L->getBlocks()) 995 // FIXME: I believe that this could use copy_if if the Inst reference could 996 // be adapted into a pointer. 997 for (auto &Inst : *Block) { 998 auto Users = Inst.users(); 999 if (any_of(Users, [&](User *U) { 1000 auto *Use = cast<Instruction>(U); 1001 return !L->contains(Use->getParent()); 1002 })) 1003 UsedOutside.push_back(&Inst); 1004 } 1005 1006 return UsedOutside; 1007 } 1008 1009 void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { 1010 // By definition, all loop passes need the LoopInfo analysis and the 1011 // Dominator tree it depends on. Because they all participate in the loop 1012 // pass manager, they must also preserve these. 1013 AU.addRequired<DominatorTreeWrapperPass>(); 1014 AU.addPreserved<DominatorTreeWrapperPass>(); 1015 AU.addRequired<LoopInfoWrapperPass>(); 1016 AU.addPreserved<LoopInfoWrapperPass>(); 1017 1018 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs 1019 // here because users shouldn't directly get them from this header. 1020 extern char &LoopSimplifyID; 1021 extern char &LCSSAID; 1022 AU.addRequiredID(LoopSimplifyID); 1023 AU.addPreservedID(LoopSimplifyID); 1024 AU.addRequiredID(LCSSAID); 1025 AU.addPreservedID(LCSSAID); 1026 // This is used in the LPPassManager to perform LCSSA verification on passes 1027 // which preserve lcssa form 1028 AU.addRequired<LCSSAVerificationPass>(); 1029 AU.addPreserved<LCSSAVerificationPass>(); 1030 1031 // Loop passes are designed to run inside of a loop pass manager which means 1032 // that any function analyses they require must be required by the first loop 1033 // pass in the manager (so that it is computed before the loop pass manager 1034 // runs) and preserved by all loop pasess in the manager. To make this 1035 // reasonably robust, the set needed for most loop passes is maintained here. 1036 // If your loop pass requires an analysis not listed here, you will need to 1037 // carefully audit the loop pass manager nesting structure that results. 1038 AU.addRequired<AAResultsWrapperPass>(); 1039 AU.addPreserved<AAResultsWrapperPass>(); 1040 AU.addPreserved<BasicAAWrapperPass>(); 1041 AU.addPreserved<GlobalsAAWrapperPass>(); 1042 AU.addPreserved<SCEVAAWrapperPass>(); 1043 AU.addRequired<ScalarEvolutionWrapperPass>(); 1044 AU.addPreserved<ScalarEvolutionWrapperPass>(); 1045 } 1046 1047 /// Manually defined generic "LoopPass" dependency initialization. This is used 1048 /// to initialize the exact set of passes from above in \c 1049 /// getLoopAnalysisUsage. It can be used within a loop pass's initialization 1050 /// with: 1051 /// 1052 /// INITIALIZE_PASS_DEPENDENCY(LoopPass) 1053 /// 1054 /// As-if "LoopPass" were a pass. 1055 void llvm::initializeLoopPassPass(PassRegistry &Registry) { 1056 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1057 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1058 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 1059 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 1060 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1061 INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) 1062 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 1063 INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) 1064 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 1065 } 1066 1067 /// \brief Find string metadata for loop 1068 /// 1069 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an 1070 /// operand or null otherwise. If the string metadata is not found return 1071 /// Optional's not-a-value. 1072 Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, 1073 StringRef Name) { 1074 MDNode *LoopID = TheLoop->getLoopID(); 1075 // Return none if LoopID is false. 1076 if (!LoopID) 1077 return None; 1078 1079 // First operand should refer to the loop id itself. 1080 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 1081 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 1082 1083 // Iterate over LoopID operands and look for MDString Metadata 1084 for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { 1085 MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); 1086 if (!MD) 1087 continue; 1088 MDString *S = dyn_cast<MDString>(MD->getOperand(0)); 1089 if (!S) 1090 continue; 1091 // Return true if MDString holds expected MetaData. 1092 if (Name.equals(S->getString())) 1093 switch (MD->getNumOperands()) { 1094 case 1: 1095 return nullptr; 1096 case 2: 1097 return &MD->getOperand(1); 1098 default: 1099 llvm_unreachable("loop metadata has 0 or 1 operand"); 1100 } 1101 } 1102 return None; 1103 } 1104 1105 /// Returns true if the instruction in a loop is guaranteed to execute at least 1106 /// once. 1107 bool llvm::isGuaranteedToExecute(const Instruction &Inst, 1108 const DominatorTree *DT, const Loop *CurLoop, 1109 const LoopSafetyInfo *SafetyInfo) { 1110 // We have to check to make sure that the instruction dominates all 1111 // of the exit blocks. If it doesn't, then there is a path out of the loop 1112 // which does not execute this instruction, so we can't hoist it. 1113 1114 // If the instruction is in the header block for the loop (which is very 1115 // common), it is always guaranteed to dominate the exit blocks. Since this 1116 // is a common case, and can save some work, check it now. 1117 if (Inst.getParent() == CurLoop->getHeader()) 1118 // If there's a throw in the header block, we can't guarantee we'll reach 1119 // Inst. 1120 return !SafetyInfo->HeaderMayThrow; 1121 1122 // Somewhere in this loop there is an instruction which may throw and make us 1123 // exit the loop. 1124 if (SafetyInfo->MayThrow) 1125 return false; 1126 1127 // Get the exit blocks for the current loop. 1128 SmallVector<BasicBlock *, 8> ExitBlocks; 1129 CurLoop->getExitBlocks(ExitBlocks); 1130 1131 // Verify that the block dominates each of the exit blocks of the loop. 1132 for (BasicBlock *ExitBlock : ExitBlocks) 1133 if (!DT->dominates(Inst.getParent(), ExitBlock)) 1134 return false; 1135 1136 // As a degenerate case, if the loop is statically infinite then we haven't 1137 // proven anything since there are no exit blocks. 1138 if (ExitBlocks.empty()) 1139 return false; 1140 1141 // FIXME: In general, we have to prove that the loop isn't an infinite loop. 1142 // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is 1143 // just a special case of this.) 1144 return true; 1145 } 1146 1147 Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { 1148 // Only support loops with a unique exiting block, and a latch. 1149 if (!L->getExitingBlock()) 1150 return None; 1151 1152 // Get the branch weights for the the loop's backedge. 1153 BranchInst *LatchBR = 1154 dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator()); 1155 if (!LatchBR || LatchBR->getNumSuccessors() != 2) 1156 return None; 1157 1158 assert((LatchBR->getSuccessor(0) == L->getHeader() || 1159 LatchBR->getSuccessor(1) == L->getHeader()) && 1160 "At least one edge out of the latch must go to the header"); 1161 1162 // To estimate the number of times the loop body was executed, we want to 1163 // know the number of times the backedge was taken, vs. the number of times 1164 // we exited the loop. 1165 uint64_t TrueVal, FalseVal; 1166 if (!LatchBR->extractProfMetadata(TrueVal, FalseVal)) 1167 return None; 1168 1169 if (!TrueVal || !FalseVal) 1170 return 0; 1171 1172 // Divide the count of the backedge by the count of the edge exiting the loop, 1173 // rounding to nearest. 1174 if (LatchBR->getSuccessor(0) == L->getHeader()) 1175 return (TrueVal + (FalseVal / 2)) / FalseVal; 1176 else 1177 return (FalseVal + (TrueVal / 2)) / TrueVal; 1178 } 1179 1180 /// \brief Adds a 'fast' flag to floating point operations. 1181 static Value *addFastMathFlag(Value *V) { 1182 if (isa<FPMathOperator>(V)) { 1183 FastMathFlags Flags; 1184 Flags.setUnsafeAlgebra(); 1185 cast<Instruction>(V)->setFastMathFlags(Flags); 1186 } 1187 return V; 1188 } 1189 1190 // Helper to generate a log2 shuffle reduction. 1191 Value * 1192 llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, 1193 RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, 1194 ArrayRef<Value *> RedOps) { 1195 unsigned VF = Src->getType()->getVectorNumElements(); 1196 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles 1197 // and vector ops, reducing the set of values being computed by half each 1198 // round. 1199 assert(isPowerOf2_32(VF) && 1200 "Reduction emission only supported for pow2 vectors!"); 1201 Value *TmpVec = Src; 1202 SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); 1203 for (unsigned i = VF; i != 1; i >>= 1) { 1204 // Move the upper half of the vector to the lower half. 1205 for (unsigned j = 0; j != i / 2; ++j) 1206 ShuffleMask[j] = Builder.getInt32(i / 2 + j); 1207 1208 // Fill the rest of the mask with undef. 1209 std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), 1210 UndefValue::get(Builder.getInt32Ty())); 1211 1212 Value *Shuf = Builder.CreateShuffleVector( 1213 TmpVec, UndefValue::get(TmpVec->getType()), 1214 ConstantVector::get(ShuffleMask), "rdx.shuf"); 1215 1216 if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 1217 // Floating point operations had to be 'fast' to enable the reduction. 1218 TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, 1219 TmpVec, Shuf, "bin.rdx")); 1220 } else { 1221 assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && 1222 "Invalid min/max"); 1223 TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec, 1224 Shuf); 1225 } 1226 if (!RedOps.empty()) 1227 propagateIRFlags(TmpVec, RedOps); 1228 } 1229 // The result is in the first element of the vector. 1230 return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); 1231 } 1232 1233 /// Create a simple vector reduction specified by an opcode and some 1234 /// flags (if generating min/max reductions). 1235 Value *llvm::createSimpleTargetReduction( 1236 IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, 1237 Value *Src, TargetTransformInfo::ReductionFlags Flags, 1238 ArrayRef<Value *> RedOps) { 1239 assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); 1240 1241 Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); 1242 std::function<Value*()> BuildFunc; 1243 using RD = RecurrenceDescriptor; 1244 RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; 1245 // TODO: Support creating ordered reductions. 1246 FastMathFlags FMFUnsafe; 1247 FMFUnsafe.setUnsafeAlgebra(); 1248 1249 switch (Opcode) { 1250 case Instruction::Add: 1251 BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; 1252 break; 1253 case Instruction::Mul: 1254 BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; 1255 break; 1256 case Instruction::And: 1257 BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; 1258 break; 1259 case Instruction::Or: 1260 BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; 1261 break; 1262 case Instruction::Xor: 1263 BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; 1264 break; 1265 case Instruction::FAdd: 1266 BuildFunc = [&]() { 1267 auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); 1268 cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); 1269 return Rdx; 1270 }; 1271 break; 1272 case Instruction::FMul: 1273 BuildFunc = [&]() { 1274 auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); 1275 cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); 1276 return Rdx; 1277 }; 1278 break; 1279 case Instruction::ICmp: 1280 if (Flags.IsMaxOp) { 1281 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; 1282 BuildFunc = [&]() { 1283 return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); 1284 }; 1285 } else { 1286 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; 1287 BuildFunc = [&]() { 1288 return Builder.CreateIntMinReduce(Src, Flags.IsSigned); 1289 }; 1290 } 1291 break; 1292 case Instruction::FCmp: 1293 if (Flags.IsMaxOp) { 1294 MinMaxKind = RD::MRK_FloatMax; 1295 BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; 1296 } else { 1297 MinMaxKind = RD::MRK_FloatMin; 1298 BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; 1299 } 1300 break; 1301 default: 1302 llvm_unreachable("Unhandled opcode"); 1303 break; 1304 } 1305 if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) 1306 return BuildFunc(); 1307 return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); 1308 } 1309 1310 /// Create a vector reduction using a given recurrence descriptor. 1311 Value *llvm::createTargetReduction(IRBuilder<> &Builder, 1312 const TargetTransformInfo *TTI, 1313 RecurrenceDescriptor &Desc, Value *Src, 1314 bool NoNaN) { 1315 // TODO: Support in-order reductions based on the recurrence descriptor. 1316 RecurrenceDescriptor::RecurrenceKind RecKind = Desc.getRecurrenceKind(); 1317 TargetTransformInfo::ReductionFlags Flags; 1318 Flags.NoNaN = NoNaN; 1319 auto getSimpleRdx = [&](unsigned Opc) { 1320 return createSimpleTargetReduction(Builder, TTI, Opc, Src, Flags); 1321 }; 1322 switch (RecKind) { 1323 case RecurrenceDescriptor::RK_FloatAdd: 1324 return getSimpleRdx(Instruction::FAdd); 1325 case RecurrenceDescriptor::RK_FloatMult: 1326 return getSimpleRdx(Instruction::FMul); 1327 case RecurrenceDescriptor::RK_IntegerAdd: 1328 return getSimpleRdx(Instruction::Add); 1329 case RecurrenceDescriptor::RK_IntegerMult: 1330 return getSimpleRdx(Instruction::Mul); 1331 case RecurrenceDescriptor::RK_IntegerAnd: 1332 return getSimpleRdx(Instruction::And); 1333 case RecurrenceDescriptor::RK_IntegerOr: 1334 return getSimpleRdx(Instruction::Or); 1335 case RecurrenceDescriptor::RK_IntegerXor: 1336 return getSimpleRdx(Instruction::Xor); 1337 case RecurrenceDescriptor::RK_IntegerMinMax: { 1338 switch (Desc.getMinMaxRecurrenceKind()) { 1339 case RecurrenceDescriptor::MRK_SIntMax: 1340 Flags.IsSigned = true; 1341 Flags.IsMaxOp = true; 1342 break; 1343 case RecurrenceDescriptor::MRK_UIntMax: 1344 Flags.IsMaxOp = true; 1345 break; 1346 case RecurrenceDescriptor::MRK_SIntMin: 1347 Flags.IsSigned = true; 1348 break; 1349 case RecurrenceDescriptor::MRK_UIntMin: 1350 break; 1351 default: 1352 llvm_unreachable("Unhandled MRK"); 1353 } 1354 return getSimpleRdx(Instruction::ICmp); 1355 } 1356 case RecurrenceDescriptor::RK_FloatMinMax: { 1357 Flags.IsMaxOp = 1358 Desc.getMinMaxRecurrenceKind() == RecurrenceDescriptor::MRK_FloatMax; 1359 return getSimpleRdx(Instruction::FCmp); 1360 } 1361 default: 1362 llvm_unreachable("Unhandled RecKind"); 1363 } 1364 } 1365 1366 void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL) { 1367 if (auto *VecOp = dyn_cast<Instruction>(I)) { 1368 if (auto *I0 = dyn_cast<Instruction>(VL[0])) { 1369 // VecOVp is initialized to the 0th scalar, so start counting from index 1370 // '1'. 1371 VecOp->copyIRFlags(I0); 1372 for (int i = 1, e = VL.size(); i < e; ++i) { 1373 if (auto *Scalar = dyn_cast<Instruction>(VL[i])) 1374 VecOp->andIRFlags(Scalar); 1375 } 1376 } 1377 } 1378 } 1379