1 //===-- LoopUtils.cpp - Loop Utility functions -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines common loop utility functions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Utils/LoopUtils.h" 15 #include "llvm/ADT/ScopeExit.h" 16 #include "llvm/Analysis/AliasAnalysis.h" 17 #include "llvm/Analysis/BasicAliasAnalysis.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/InstructionSimplify.h" 20 #include "llvm/Analysis/LoopInfo.h" 21 #include "llvm/Analysis/LoopPass.h" 22 #include "llvm/Analysis/ScalarEvolution.h" 23 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 24 #include "llvm/Analysis/ScalarEvolutionExpander.h" 25 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 26 #include "llvm/Analysis/TargetTransformInfo.h" 27 #include "llvm/Analysis/ValueTracking.h" 28 #include "llvm/IR/Dominators.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/Module.h" 31 #include "llvm/IR/PatternMatch.h" 32 #include "llvm/IR/ValueHandle.h" 33 #include "llvm/Pass.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/KnownBits.h" 36 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 37 38 using namespace llvm; 39 using namespace llvm::PatternMatch; 40 41 #define DEBUG_TYPE "loop-utils" 42 43 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, 44 SmallPtrSetImpl<Instruction *> &Set) { 45 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) 46 if (!Set.count(dyn_cast<Instruction>(*Use))) 47 return false; 48 return true; 49 } 50 51 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { 52 switch (Kind) { 53 default: 54 break; 55 case RK_IntegerAdd: 56 case RK_IntegerMult: 57 case RK_IntegerOr: 58 case RK_IntegerAnd: 59 case RK_IntegerXor: 60 case RK_IntegerMinMax: 61 return true; 62 } 63 return false; 64 } 65 66 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { 67 return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); 68 } 69 70 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { 71 switch (Kind) { 72 default: 73 break; 74 case RK_IntegerAdd: 75 case RK_IntegerMult: 76 case RK_FloatAdd: 77 case RK_FloatMult: 78 return true; 79 } 80 return false; 81 } 82 83 /// Determines if Phi may have been type-promoted. If Phi has a single user 84 /// that ANDs the Phi with a type mask, return the user. RT is updated to 85 /// account for the narrower bit width represented by the mask, and the AND 86 /// instruction is added to CI. 87 static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, 88 SmallPtrSetImpl<Instruction *> &Visited, 89 SmallPtrSetImpl<Instruction *> &CI) { 90 if (!Phi->hasOneUse()) 91 return Phi; 92 93 const APInt *M = nullptr; 94 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); 95 96 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT 97 // with a new integer type of the corresponding bit width. 98 if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { 99 int32_t Bits = (*M + 1).exactLogBase2(); 100 if (Bits > 0) { 101 RT = IntegerType::get(Phi->getContext(), Bits); 102 Visited.insert(Phi); 103 CI.insert(J); 104 return J; 105 } 106 } 107 return Phi; 108 } 109 110 /// Compute the minimal bit width needed to represent a reduction whose exit 111 /// instruction is given by Exit. 112 static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit, 113 DemandedBits *DB, 114 AssumptionCache *AC, 115 DominatorTree *DT) { 116 bool IsSigned = false; 117 const DataLayout &DL = Exit->getModule()->getDataLayout(); 118 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType()); 119 120 if (DB) { 121 // Use the demanded bits analysis to determine the bits that are live out 122 // of the exit instruction, rounding up to the nearest power of two. If the 123 // use of demanded bits results in a smaller bit width, we know the value 124 // must be positive (i.e., IsSigned = false), because if this were not the 125 // case, the sign bit would have been demanded. 126 auto Mask = DB->getDemandedBits(Exit); 127 MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros(); 128 } 129 130 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) { 131 // If demanded bits wasn't able to limit the bit width, we can try to use 132 // value tracking instead. This can be the case, for example, if the value 133 // may be negative. 134 auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT); 135 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType()); 136 MaxBitWidth = NumTypeBits - NumSignBits; 137 KnownBits Bits = computeKnownBits(Exit, DL); 138 if (!Bits.isNonNegative()) { 139 // If the value is not known to be non-negative, we set IsSigned to true, 140 // meaning that we will use sext instructions instead of zext 141 // instructions to restore the original type. 142 IsSigned = true; 143 if (!Bits.isNegative()) 144 // If the value is not known to be negative, we don't known what the 145 // upper bit is, and therefore, we don't know what kind of extend we 146 // will need. In this case, just increase the bit width by one bit and 147 // use sext. 148 ++MaxBitWidth; 149 } 150 } 151 if (!isPowerOf2_64(MaxBitWidth)) 152 MaxBitWidth = NextPowerOf2(MaxBitWidth); 153 154 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth), 155 IsSigned); 156 } 157 158 /// Collect cast instructions that can be ignored in the vectorizer's cost 159 /// model, given a reduction exit value and the minimal type in which the 160 /// reduction can be represented. 161 static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, 162 Type *RecurrenceType, 163 SmallPtrSetImpl<Instruction *> &Casts) { 164 165 SmallVector<Instruction *, 8> Worklist; 166 SmallPtrSet<Instruction *, 8> Visited; 167 Worklist.push_back(Exit); 168 169 while (!Worklist.empty()) { 170 Instruction *Val = Worklist.pop_back_val(); 171 Visited.insert(Val); 172 if (auto *Cast = dyn_cast<CastInst>(Val)) 173 if (Cast->getSrcTy() == RecurrenceType) { 174 // If the source type of a cast instruction is equal to the recurrence 175 // type, it will be eliminated, and should be ignored in the vectorizer 176 // cost model. 177 Casts.insert(Cast); 178 continue; 179 } 180 181 // Add all operands to the work list if they are loop-varying values that 182 // we haven't yet visited. 183 for (Value *O : cast<User>(Val)->operands()) 184 if (auto *I = dyn_cast<Instruction>(O)) 185 if (TheLoop->contains(I) && !Visited.count(I)) 186 Worklist.push_back(I); 187 } 188 } 189 190 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, 191 Loop *TheLoop, bool HasFunNoNaNAttr, 192 RecurrenceDescriptor &RedDes, 193 DemandedBits *DB, 194 AssumptionCache *AC, 195 DominatorTree *DT) { 196 if (Phi->getNumIncomingValues() != 2) 197 return false; 198 199 // Reduction variables are only found in the loop header block. 200 if (Phi->getParent() != TheLoop->getHeader()) 201 return false; 202 203 // Obtain the reduction start value from the value that comes from the loop 204 // preheader. 205 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); 206 207 // ExitInstruction is the single value which is used outside the loop. 208 // We only allow for a single reduction value to be used outside the loop. 209 // This includes users of the reduction, variables (which form a cycle 210 // which ends in the phi node). 211 Instruction *ExitInstruction = nullptr; 212 // Indicates that we found a reduction operation in our scan. 213 bool FoundReduxOp = false; 214 215 // We start with the PHI node and scan for all of the users of this 216 // instruction. All users must be instructions that can be used as reduction 217 // variables (such as ADD). We must have a single out-of-block user. The cycle 218 // must include the original PHI. 219 bool FoundStartPHI = false; 220 221 // To recognize min/max patterns formed by a icmp select sequence, we store 222 // the number of instruction we saw from the recognized min/max pattern, 223 // to make sure we only see exactly the two instructions. 224 unsigned NumCmpSelectPatternInst = 0; 225 InstDesc ReduxDesc(false, nullptr); 226 227 // Data used for determining if the recurrence has been type-promoted. 228 Type *RecurrenceType = Phi->getType(); 229 SmallPtrSet<Instruction *, 4> CastInsts; 230 Instruction *Start = Phi; 231 bool IsSigned = false; 232 233 SmallPtrSet<Instruction *, 8> VisitedInsts; 234 SmallVector<Instruction *, 8> Worklist; 235 236 // Return early if the recurrence kind does not match the type of Phi. If the 237 // recurrence kind is arithmetic, we attempt to look through AND operations 238 // resulting from the type promotion performed by InstCombine. Vector 239 // operations are not limited to the legal integer widths, so we may be able 240 // to evaluate the reduction in the narrower width. 241 if (RecurrenceType->isFloatingPointTy()) { 242 if (!isFloatingPointRecurrenceKind(Kind)) 243 return false; 244 } else { 245 if (!isIntegerRecurrenceKind(Kind)) 246 return false; 247 if (isArithmeticRecurrenceKind(Kind)) 248 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); 249 } 250 251 Worklist.push_back(Start); 252 VisitedInsts.insert(Start); 253 254 // A value in the reduction can be used: 255 // - By the reduction: 256 // - Reduction operation: 257 // - One use of reduction value (safe). 258 // - Multiple use of reduction value (not safe). 259 // - PHI: 260 // - All uses of the PHI must be the reduction (safe). 261 // - Otherwise, not safe. 262 // - By instructions outside of the loop (safe). 263 // * One value may have several outside users, but all outside 264 // uses must be of the same value. 265 // - By an instruction that is not part of the reduction (not safe). 266 // This is either: 267 // * An instruction type other than PHI or the reduction operation. 268 // * A PHI in the header other than the initial PHI. 269 while (!Worklist.empty()) { 270 Instruction *Cur = Worklist.back(); 271 Worklist.pop_back(); 272 273 // No Users. 274 // If the instruction has no users then this is a broken chain and can't be 275 // a reduction variable. 276 if (Cur->use_empty()) 277 return false; 278 279 bool IsAPhi = isa<PHINode>(Cur); 280 281 // A header PHI use other than the original PHI. 282 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) 283 return false; 284 285 // Reductions of instructions such as Div, and Sub is only possible if the 286 // LHS is the reduction variable. 287 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && 288 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && 289 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) 290 return false; 291 292 // Any reduction instruction must be of one of the allowed kinds. We ignore 293 // the starting value (the Phi or an AND instruction if the Phi has been 294 // type-promoted). 295 if (Cur != Start) { 296 ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); 297 if (!ReduxDesc.isRecurrence()) 298 return false; 299 } 300 301 // A reduction operation must only have one use of the reduction value. 302 if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && 303 hasMultipleUsesOf(Cur, VisitedInsts)) 304 return false; 305 306 // All inputs to a PHI node must be a reduction value. 307 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) 308 return false; 309 310 if (Kind == RK_IntegerMinMax && 311 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) 312 ++NumCmpSelectPatternInst; 313 if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) 314 ++NumCmpSelectPatternInst; 315 316 // Check whether we found a reduction operator. 317 FoundReduxOp |= !IsAPhi && Cur != Start; 318 319 // Process users of current instruction. Push non-PHI nodes after PHI nodes 320 // onto the stack. This way we are going to have seen all inputs to PHI 321 // nodes once we get to them. 322 SmallVector<Instruction *, 8> NonPHIs; 323 SmallVector<Instruction *, 8> PHIs; 324 for (User *U : Cur->users()) { 325 Instruction *UI = cast<Instruction>(U); 326 327 // Check if we found the exit user. 328 BasicBlock *Parent = UI->getParent(); 329 if (!TheLoop->contains(Parent)) { 330 // If we already know this instruction is used externally, move on to 331 // the next user. 332 if (ExitInstruction == Cur) 333 continue; 334 335 // Exit if you find multiple values used outside or if the header phi 336 // node is being used. In this case the user uses the value of the 337 // previous iteration, in which case we would loose "VF-1" iterations of 338 // the reduction operation if we vectorize. 339 if (ExitInstruction != nullptr || Cur == Phi) 340 return false; 341 342 // The instruction used by an outside user must be the last instruction 343 // before we feed back to the reduction phi. Otherwise, we loose VF-1 344 // operations on the value. 345 if (!is_contained(Phi->operands(), Cur)) 346 return false; 347 348 ExitInstruction = Cur; 349 continue; 350 } 351 352 // Process instructions only once (termination). Each reduction cycle 353 // value must only be used once, except by phi nodes and min/max 354 // reductions which are represented as a cmp followed by a select. 355 InstDesc IgnoredVal(false, nullptr); 356 if (VisitedInsts.insert(UI).second) { 357 if (isa<PHINode>(UI)) 358 PHIs.push_back(UI); 359 else 360 NonPHIs.push_back(UI); 361 } else if (!isa<PHINode>(UI) && 362 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && 363 !isa<SelectInst>(UI)) || 364 !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) 365 return false; 366 367 // Remember that we completed the cycle. 368 if (UI == Phi) 369 FoundStartPHI = true; 370 } 371 Worklist.append(PHIs.begin(), PHIs.end()); 372 Worklist.append(NonPHIs.begin(), NonPHIs.end()); 373 } 374 375 // This means we have seen one but not the other instruction of the 376 // pattern or more than just a select and cmp. 377 if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && 378 NumCmpSelectPatternInst != 2) 379 return false; 380 381 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) 382 return false; 383 384 if (Start != Phi) { 385 // If the starting value is not the same as the phi node, we speculatively 386 // looked through an 'and' instruction when evaluating a potential 387 // arithmetic reduction to determine if it may have been type-promoted. 388 // 389 // We now compute the minimal bit width that is required to represent the 390 // reduction. If this is the same width that was indicated by the 'and', we 391 // can represent the reduction in the smaller type. The 'and' instruction 392 // will be eliminated since it will essentially be a cast instruction that 393 // can be ignore in the cost model. If we compute a different type than we 394 // did when evaluating the 'and', the 'and' will not be eliminated, and we 395 // will end up with different kinds of operations in the recurrence 396 // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is 397 // the case. 398 // 399 // The vectorizer relies on InstCombine to perform the actual 400 // type-shrinking. It does this by inserting instructions to truncate the 401 // exit value of the reduction to the width indicated by RecurrenceType and 402 // then extend this value back to the original width. If IsSigned is false, 403 // a 'zext' instruction will be generated; otherwise, a 'sext' will be 404 // used. 405 // 406 // TODO: We should not rely on InstCombine to rewrite the reduction in the 407 // smaller type. We should just generate a correctly typed expression 408 // to begin with. 409 Type *ComputedType; 410 std::tie(ComputedType, IsSigned) = 411 computeRecurrenceType(ExitInstruction, DB, AC, DT); 412 if (ComputedType != RecurrenceType) 413 return false; 414 415 // The recurrence expression will be represented in a narrower type. If 416 // there are any cast instructions that will be unnecessary, collect them 417 // in CastInsts. Note that the 'and' instruction was already included in 418 // this list. 419 // 420 // TODO: A better way to represent this may be to tag in some way all the 421 // instructions that are a part of the reduction. The vectorizer cost 422 // model could then apply the recurrence type to these instructions, 423 // without needing a white list of instructions to ignore. 424 collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts); 425 } 426 427 // We found a reduction var if we have reached the original phi node and we 428 // only have a single instruction with out-of-loop users. 429 430 // The ExitInstruction(Instruction which is allowed to have out-of-loop users) 431 // is saved as part of the RecurrenceDescriptor. 432 433 // Save the description of this reduction variable. 434 RecurrenceDescriptor RD( 435 RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), 436 ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); 437 RedDes = RD; 438 439 return true; 440 } 441 442 /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction 443 /// pattern corresponding to a min(X, Y) or max(X, Y). 444 RecurrenceDescriptor::InstDesc 445 RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { 446 447 assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && 448 "Expect a select instruction"); 449 Instruction *Cmp = nullptr; 450 SelectInst *Select = nullptr; 451 452 // We must handle the select(cmp()) as a single instruction. Advance to the 453 // select. 454 if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) { 455 if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin()))) 456 return InstDesc(false, I); 457 return InstDesc(Select, Prev.getMinMaxKind()); 458 } 459 460 // Only handle single use cases for now. 461 if (!(Select = dyn_cast<SelectInst>(I))) 462 return InstDesc(false, I); 463 if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && 464 !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) 465 return InstDesc(false, I); 466 if (!Cmp->hasOneUse()) 467 return InstDesc(false, I); 468 469 Value *CmpLeft; 470 Value *CmpRight; 471 472 // Look for a min/max pattern. 473 if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 474 return InstDesc(Select, MRK_UIntMin); 475 else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 476 return InstDesc(Select, MRK_UIntMax); 477 else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 478 return InstDesc(Select, MRK_SIntMax); 479 else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 480 return InstDesc(Select, MRK_SIntMin); 481 else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 482 return InstDesc(Select, MRK_FloatMin); 483 else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 484 return InstDesc(Select, MRK_FloatMax); 485 else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 486 return InstDesc(Select, MRK_FloatMin); 487 else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 488 return InstDesc(Select, MRK_FloatMax); 489 490 return InstDesc(false, I); 491 } 492 493 RecurrenceDescriptor::InstDesc 494 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, 495 InstDesc &Prev, bool HasFunNoNaNAttr) { 496 bool FP = I->getType()->isFloatingPointTy(); 497 Instruction *UAI = Prev.getUnsafeAlgebraInst(); 498 if (!UAI && FP && !I->isFast()) 499 UAI = I; // Found an unsafe (unvectorizable) algebra instruction. 500 501 switch (I->getOpcode()) { 502 default: 503 return InstDesc(false, I); 504 case Instruction::PHI: 505 return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); 506 case Instruction::Sub: 507 case Instruction::Add: 508 return InstDesc(Kind == RK_IntegerAdd, I); 509 case Instruction::Mul: 510 return InstDesc(Kind == RK_IntegerMult, I); 511 case Instruction::And: 512 return InstDesc(Kind == RK_IntegerAnd, I); 513 case Instruction::Or: 514 return InstDesc(Kind == RK_IntegerOr, I); 515 case Instruction::Xor: 516 return InstDesc(Kind == RK_IntegerXor, I); 517 case Instruction::FMul: 518 return InstDesc(Kind == RK_FloatMult, I, UAI); 519 case Instruction::FSub: 520 case Instruction::FAdd: 521 return InstDesc(Kind == RK_FloatAdd, I, UAI); 522 case Instruction::FCmp: 523 case Instruction::ICmp: 524 case Instruction::Select: 525 if (Kind != RK_IntegerMinMax && 526 (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) 527 return InstDesc(false, I); 528 return isMinMaxSelectCmpPattern(I, Prev); 529 } 530 } 531 532 bool RecurrenceDescriptor::hasMultipleUsesOf( 533 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) { 534 unsigned NumUses = 0; 535 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; 536 ++Use) { 537 if (Insts.count(dyn_cast<Instruction>(*Use))) 538 ++NumUses; 539 if (NumUses > 1) 540 return true; 541 } 542 543 return false; 544 } 545 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, 546 RecurrenceDescriptor &RedDes, 547 DemandedBits *DB, AssumptionCache *AC, 548 DominatorTree *DT) { 549 550 BasicBlock *Header = TheLoop->getHeader(); 551 Function &F = *Header->getParent(); 552 bool HasFunNoNaNAttr = 553 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 554 555 if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 556 AC, DT)) { 557 DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); 558 return true; 559 } 560 if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, 561 AC, DT)) { 562 DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); 563 return true; 564 } 565 if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB, 566 AC, DT)) { 567 DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); 568 return true; 569 } 570 if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 571 AC, DT)) { 572 DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); 573 return true; 574 } 575 if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB, 576 AC, DT)) { 577 DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); 578 return true; 579 } 580 if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes, 581 DB, AC, DT)) { 582 DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); 583 return true; 584 } 585 if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, 586 AC, DT)) { 587 DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); 588 return true; 589 } 590 if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 591 AC, DT)) { 592 DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); 593 return true; 594 } 595 if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB, 596 AC, DT)) { 597 DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); 598 return true; 599 } 600 // Not a reduction of known type. 601 return false; 602 } 603 604 bool RecurrenceDescriptor::isFirstOrderRecurrence( 605 PHINode *Phi, Loop *TheLoop, 606 DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) { 607 608 // Ensure the phi node is in the loop header and has two incoming values. 609 if (Phi->getParent() != TheLoop->getHeader() || 610 Phi->getNumIncomingValues() != 2) 611 return false; 612 613 // Ensure the loop has a preheader and a single latch block. The loop 614 // vectorizer will need the latch to set up the next iteration of the loop. 615 auto *Preheader = TheLoop->getLoopPreheader(); 616 auto *Latch = TheLoop->getLoopLatch(); 617 if (!Preheader || !Latch) 618 return false; 619 620 // Ensure the phi node's incoming blocks are the loop preheader and latch. 621 if (Phi->getBasicBlockIndex(Preheader) < 0 || 622 Phi->getBasicBlockIndex(Latch) < 0) 623 return false; 624 625 // Get the previous value. The previous value comes from the latch edge while 626 // the initial value comes form the preheader edge. 627 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); 628 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) || 629 SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. 630 return false; 631 632 // Ensure every user of the phi node is dominated by the previous value. 633 // The dominance requirement ensures the loop vectorizer will not need to 634 // vectorize the initial value prior to the first iteration of the loop. 635 // TODO: Consider extending this sinking to handle other kinds of instructions 636 // and expressions, beyond sinking a single cast past Previous. 637 if (Phi->hasOneUse()) { 638 auto *I = Phi->user_back(); 639 if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() && 640 DT->dominates(Previous, I->user_back())) { 641 if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking. 642 SinkAfter[I] = Previous; 643 return true; 644 } 645 } 646 647 for (User *U : Phi->users()) 648 if (auto *I = dyn_cast<Instruction>(U)) { 649 if (!DT->dominates(Previous, I)) 650 return false; 651 } 652 653 return true; 654 } 655 656 /// This function returns the identity element (or neutral element) for 657 /// the operation K. 658 Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, 659 Type *Tp) { 660 switch (K) { 661 case RK_IntegerXor: 662 case RK_IntegerAdd: 663 case RK_IntegerOr: 664 // Adding, Xoring, Oring zero to a number does not change it. 665 return ConstantInt::get(Tp, 0); 666 case RK_IntegerMult: 667 // Multiplying a number by 1 does not change it. 668 return ConstantInt::get(Tp, 1); 669 case RK_IntegerAnd: 670 // AND-ing a number with an all-1 value does not change it. 671 return ConstantInt::get(Tp, -1, true); 672 case RK_FloatMult: 673 // Multiplying a number by 1 does not change it. 674 return ConstantFP::get(Tp, 1.0L); 675 case RK_FloatAdd: 676 // Adding zero to a number does not change it. 677 return ConstantFP::get(Tp, 0.0L); 678 default: 679 llvm_unreachable("Unknown recurrence kind"); 680 } 681 } 682 683 /// This function translates the recurrence kind to an LLVM binary operator. 684 unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { 685 switch (Kind) { 686 case RK_IntegerAdd: 687 return Instruction::Add; 688 case RK_IntegerMult: 689 return Instruction::Mul; 690 case RK_IntegerOr: 691 return Instruction::Or; 692 case RK_IntegerAnd: 693 return Instruction::And; 694 case RK_IntegerXor: 695 return Instruction::Xor; 696 case RK_FloatMult: 697 return Instruction::FMul; 698 case RK_FloatAdd: 699 return Instruction::FAdd; 700 case RK_IntegerMinMax: 701 return Instruction::ICmp; 702 case RK_FloatMinMax: 703 return Instruction::FCmp; 704 default: 705 llvm_unreachable("Unknown recurrence operation"); 706 } 707 } 708 709 Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, 710 MinMaxRecurrenceKind RK, 711 Value *Left, Value *Right) { 712 CmpInst::Predicate P = CmpInst::ICMP_NE; 713 switch (RK) { 714 default: 715 llvm_unreachable("Unknown min/max recurrence kind"); 716 case MRK_UIntMin: 717 P = CmpInst::ICMP_ULT; 718 break; 719 case MRK_UIntMax: 720 P = CmpInst::ICMP_UGT; 721 break; 722 case MRK_SIntMin: 723 P = CmpInst::ICMP_SLT; 724 break; 725 case MRK_SIntMax: 726 P = CmpInst::ICMP_SGT; 727 break; 728 case MRK_FloatMin: 729 P = CmpInst::FCMP_OLT; 730 break; 731 case MRK_FloatMax: 732 P = CmpInst::FCMP_OGT; 733 break; 734 } 735 736 // We only match FP sequences that are 'fast', so we can unconditionally 737 // set it on any generated instructions. 738 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 739 FastMathFlags FMF; 740 FMF.setFast(); 741 Builder.setFastMathFlags(FMF); 742 743 Value *Cmp; 744 if (RK == MRK_FloatMin || RK == MRK_FloatMax) 745 Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); 746 else 747 Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); 748 749 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); 750 return Select; 751 } 752 753 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, 754 const SCEV *Step, BinaryOperator *BOp, 755 SmallVectorImpl<Instruction *> *Casts) 756 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { 757 assert(IK != IK_NoInduction && "Not an induction"); 758 759 // Start value type should match the induction kind and the value 760 // itself should not be null. 761 assert(StartValue && "StartValue is null"); 762 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && 763 "StartValue is not a pointer for pointer induction"); 764 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && 765 "StartValue is not an integer for integer induction"); 766 767 // Check the Step Value. It should be non-zero integer value. 768 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && 769 "Step value is zero"); 770 771 assert((IK != IK_PtrInduction || getConstIntStepValue()) && 772 "Step value should be constant for pointer induction"); 773 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && 774 "StepValue is not an integer"); 775 776 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && 777 "StepValue is not FP for FpInduction"); 778 assert((IK != IK_FpInduction || (InductionBinOp && 779 (InductionBinOp->getOpcode() == Instruction::FAdd || 780 InductionBinOp->getOpcode() == Instruction::FSub))) && 781 "Binary opcode should be specified for FP induction"); 782 783 if (Casts) { 784 for (auto &Inst : *Casts) { 785 RedundantCasts.push_back(Inst); 786 } 787 } 788 } 789 790 int InductionDescriptor::getConsecutiveDirection() const { 791 ConstantInt *ConstStep = getConstIntStepValue(); 792 if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) 793 return ConstStep->getSExtValue(); 794 return 0; 795 } 796 797 ConstantInt *InductionDescriptor::getConstIntStepValue() const { 798 if (isa<SCEVConstant>(Step)) 799 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); 800 return nullptr; 801 } 802 803 Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, 804 ScalarEvolution *SE, 805 const DataLayout& DL) const { 806 807 SCEVExpander Exp(*SE, DL, "induction"); 808 assert(Index->getType() == Step->getType() && 809 "Index type does not match StepValue type"); 810 switch (IK) { 811 case IK_IntInduction: { 812 assert(Index->getType() == StartValue->getType() && 813 "Index type does not match StartValue type"); 814 815 // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution 816 // and calculate (Start + Index * Step) for all cases, without 817 // special handling for "isOne" and "isMinusOne". 818 // But in the real life the result code getting worse. We mix SCEV 819 // expressions and ADD/SUB operations and receive redundant 820 // intermediate values being calculated in different ways and 821 // Instcombine is unable to reduce them all. 822 823 if (getConstIntStepValue() && 824 getConstIntStepValue()->isMinusOne()) 825 return B.CreateSub(StartValue, Index); 826 if (getConstIntStepValue() && 827 getConstIntStepValue()->isOne()) 828 return B.CreateAdd(StartValue, Index); 829 const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), 830 SE->getMulExpr(Step, SE->getSCEV(Index))); 831 return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); 832 } 833 case IK_PtrInduction: { 834 assert(isa<SCEVConstant>(Step) && 835 "Expected constant step for pointer induction"); 836 const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); 837 Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); 838 return B.CreateGEP(nullptr, StartValue, Index); 839 } 840 case IK_FpInduction: { 841 assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); 842 assert(InductionBinOp && 843 (InductionBinOp->getOpcode() == Instruction::FAdd || 844 InductionBinOp->getOpcode() == Instruction::FSub) && 845 "Original bin op should be defined for FP induction"); 846 847 Value *StepValue = cast<SCEVUnknown>(Step)->getValue(); 848 849 // Floating point operations had to be 'fast' to enable the induction. 850 FastMathFlags Flags; 851 Flags.setFast(); 852 853 Value *MulExp = B.CreateFMul(StepValue, Index); 854 if (isa<Instruction>(MulExp)) 855 // We have to check, the MulExp may be a constant. 856 cast<Instruction>(MulExp)->setFastMathFlags(Flags); 857 858 Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue, 859 MulExp, "induction"); 860 if (isa<Instruction>(BOp)) 861 cast<Instruction>(BOp)->setFastMathFlags(Flags); 862 863 return BOp; 864 } 865 case IK_NoInduction: 866 return nullptr; 867 } 868 llvm_unreachable("invalid enum"); 869 } 870 871 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, 872 ScalarEvolution *SE, 873 InductionDescriptor &D) { 874 875 // Here we only handle FP induction variables. 876 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); 877 878 if (TheLoop->getHeader() != Phi->getParent()) 879 return false; 880 881 // The loop may have multiple entrances or multiple exits; we can analyze 882 // this phi if it has a unique entry value and a unique backedge value. 883 if (Phi->getNumIncomingValues() != 2) 884 return false; 885 Value *BEValue = nullptr, *StartValue = nullptr; 886 if (TheLoop->contains(Phi->getIncomingBlock(0))) { 887 BEValue = Phi->getIncomingValue(0); 888 StartValue = Phi->getIncomingValue(1); 889 } else { 890 assert(TheLoop->contains(Phi->getIncomingBlock(1)) && 891 "Unexpected Phi node in the loop"); 892 BEValue = Phi->getIncomingValue(1); 893 StartValue = Phi->getIncomingValue(0); 894 } 895 896 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); 897 if (!BOp) 898 return false; 899 900 Value *Addend = nullptr; 901 if (BOp->getOpcode() == Instruction::FAdd) { 902 if (BOp->getOperand(0) == Phi) 903 Addend = BOp->getOperand(1); 904 else if (BOp->getOperand(1) == Phi) 905 Addend = BOp->getOperand(0); 906 } else if (BOp->getOpcode() == Instruction::FSub) 907 if (BOp->getOperand(0) == Phi) 908 Addend = BOp->getOperand(1); 909 910 if (!Addend) 911 return false; 912 913 // The addend should be loop invariant 914 if (auto *I = dyn_cast<Instruction>(Addend)) 915 if (TheLoop->contains(I)) 916 return false; 917 918 // FP Step has unknown SCEV 919 const SCEV *Step = SE->getUnknown(Addend); 920 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); 921 return true; 922 } 923 924 /// This function is called when we suspect that the update-chain of a phi node 925 /// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, 926 /// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime 927 /// predicate P under which the SCEV expression for the phi can be the 928 /// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the 929 /// cast instructions that are involved in the update-chain of this induction. 930 /// A caller that adds the required runtime predicate can be free to drop these 931 /// cast instructions, and compute the phi using \p AR (instead of some scev 932 /// expression with casts). 933 /// 934 /// For example, without a predicate the scev expression can take the following 935 /// form: 936 /// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) 937 /// 938 /// It corresponds to the following IR sequence: 939 /// %for.body: 940 /// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] 941 /// %casted_phi = "ExtTrunc i64 %x" 942 /// %add = add i64 %casted_phi, %step 943 /// 944 /// where %x is given in \p PN, 945 /// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate, 946 /// and the IR sequence that "ExtTrunc i64 %x" represents can take one of 947 /// several forms, for example, such as: 948 /// ExtTrunc1: %casted_phi = and %x, 2^n-1 949 /// or: 950 /// ExtTrunc2: %t = shl %x, m 951 /// %casted_phi = ashr %t, m 952 /// 953 /// If we are able to find such sequence, we return the instructions 954 /// we found, namely %casted_phi and the instructions on its use-def chain up 955 /// to the phi (not including the phi). 956 static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, 957 const SCEVUnknown *PhiScev, 958 const SCEVAddRecExpr *AR, 959 SmallVectorImpl<Instruction *> &CastInsts) { 960 961 assert(CastInsts.empty() && "CastInsts is expected to be empty."); 962 auto *PN = cast<PHINode>(PhiScev->getValue()); 963 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); 964 const Loop *L = AR->getLoop(); 965 966 // Find any cast instructions that participate in the def-use chain of 967 // PhiScev in the loop. 968 // FORNOW/TODO: We currently expect the def-use chain to include only 969 // two-operand instructions, where one of the operands is an invariant. 970 // createAddRecFromPHIWithCasts() currently does not support anything more 971 // involved than that, so we keep the search simple. This can be 972 // extended/generalized as needed. 973 974 auto getDef = [&](const Value *Val) -> Value * { 975 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val); 976 if (!BinOp) 977 return nullptr; 978 Value *Op0 = BinOp->getOperand(0); 979 Value *Op1 = BinOp->getOperand(1); 980 Value *Def = nullptr; 981 if (L->isLoopInvariant(Op0)) 982 Def = Op1; 983 else if (L->isLoopInvariant(Op1)) 984 Def = Op0; 985 return Def; 986 }; 987 988 // Look for the instruction that defines the induction via the 989 // loop backedge. 990 BasicBlock *Latch = L->getLoopLatch(); 991 if (!Latch) 992 return false; 993 Value *Val = PN->getIncomingValueForBlock(Latch); 994 if (!Val) 995 return false; 996 997 // Follow the def-use chain until the induction phi is reached. 998 // If on the way we encounter a Value that has the same SCEV Expr as the 999 // phi node, we can consider the instructions we visit from that point 1000 // as part of the cast-sequence that can be ignored. 1001 bool InCastSequence = false; 1002 auto *Inst = dyn_cast<Instruction>(Val); 1003 while (Val != PN) { 1004 // If we encountered a phi node other than PN, or if we left the loop, 1005 // we bail out. 1006 if (!Inst || !L->contains(Inst)) { 1007 return false; 1008 } 1009 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val)); 1010 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR)) 1011 InCastSequence = true; 1012 if (InCastSequence) { 1013 // Only the last instruction in the cast sequence is expected to have 1014 // uses outside the induction def-use chain. 1015 if (!CastInsts.empty()) 1016 if (!Inst->hasOneUse()) 1017 return false; 1018 CastInsts.push_back(Inst); 1019 } 1020 Val = getDef(Val); 1021 if (!Val) 1022 return false; 1023 Inst = dyn_cast<Instruction>(Val); 1024 } 1025 1026 return InCastSequence; 1027 } 1028 1029 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 1030 PredicatedScalarEvolution &PSE, 1031 InductionDescriptor &D, 1032 bool Assume) { 1033 Type *PhiTy = Phi->getType(); 1034 1035 // Handle integer and pointer inductions variables. 1036 // Now we handle also FP induction but not trying to make a 1037 // recurrent expression from the PHI node in-place. 1038 1039 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && 1040 !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) 1041 return false; 1042 1043 if (PhiTy->isFloatingPointTy()) 1044 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); 1045 1046 const SCEV *PhiScev = PSE.getSCEV(Phi); 1047 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1048 1049 // We need this expression to be an AddRecExpr. 1050 if (Assume && !AR) 1051 AR = PSE.getAsAddRec(Phi); 1052 1053 if (!AR) { 1054 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1055 return false; 1056 } 1057 1058 // Record any Cast instructions that participate in the induction update 1059 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev); 1060 // If we started from an UnknownSCEV, and managed to build an addRecurrence 1061 // only after enabling Assume with PSCEV, this means we may have encountered 1062 // cast instructions that required adding a runtime check in order to 1063 // guarantee the correctness of the AddRecurence respresentation of the 1064 // induction. 1065 if (PhiScev != AR && SymbolicPhi) { 1066 SmallVector<Instruction *, 2> Casts; 1067 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts)) 1068 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts); 1069 } 1070 1071 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); 1072 } 1073 1074 bool InductionDescriptor::isInductionPHI( 1075 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, 1076 InductionDescriptor &D, const SCEV *Expr, 1077 SmallVectorImpl<Instruction *> *CastsToIgnore) { 1078 Type *PhiTy = Phi->getType(); 1079 // We only handle integer and pointer inductions variables. 1080 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) 1081 return false; 1082 1083 // Check that the PHI is consecutive. 1084 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); 1085 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1086 1087 if (!AR) { 1088 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1089 return false; 1090 } 1091 1092 if (AR->getLoop() != TheLoop) { 1093 // FIXME: We should treat this as a uniform. Unfortunately, we 1094 // don't currently know how to handled uniform PHIs. 1095 DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); 1096 return false; 1097 } 1098 1099 Value *StartValue = 1100 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); 1101 const SCEV *Step = AR->getStepRecurrence(*SE); 1102 // Calculate the pointer stride and check if it is consecutive. 1103 // The stride may be a constant or a loop invariant integer value. 1104 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); 1105 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) 1106 return false; 1107 1108 if (PhiTy->isIntegerTy()) { 1109 D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr, 1110 CastsToIgnore); 1111 return true; 1112 } 1113 1114 assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); 1115 // Pointer induction should be a constant. 1116 if (!ConstStep) 1117 return false; 1118 1119 ConstantInt *CV = ConstStep->getValue(); 1120 Type *PointerElementType = PhiTy->getPointerElementType(); 1121 // The pointer stride cannot be determined if the pointer element type is not 1122 // sized. 1123 if (!PointerElementType->isSized()) 1124 return false; 1125 1126 const DataLayout &DL = Phi->getModule()->getDataLayout(); 1127 int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); 1128 if (!Size) 1129 return false; 1130 1131 int64_t CVSize = CV->getSExtValue(); 1132 if (CVSize % Size) 1133 return false; 1134 auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, 1135 true /* signed */); 1136 D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); 1137 return true; 1138 } 1139 1140 bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, 1141 bool PreserveLCSSA) { 1142 bool Changed = false; 1143 1144 // We re-use a vector for the in-loop predecesosrs. 1145 SmallVector<BasicBlock *, 4> InLoopPredecessors; 1146 1147 auto RewriteExit = [&](BasicBlock *BB) { 1148 assert(InLoopPredecessors.empty() && 1149 "Must start with an empty predecessors list!"); 1150 auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); 1151 1152 // See if there are any non-loop predecessors of this exit block and 1153 // keep track of the in-loop predecessors. 1154 bool IsDedicatedExit = true; 1155 for (auto *PredBB : predecessors(BB)) 1156 if (L->contains(PredBB)) { 1157 if (isa<IndirectBrInst>(PredBB->getTerminator())) 1158 // We cannot rewrite exiting edges from an indirectbr. 1159 return false; 1160 1161 InLoopPredecessors.push_back(PredBB); 1162 } else { 1163 IsDedicatedExit = false; 1164 } 1165 1166 assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); 1167 1168 // Nothing to do if this is already a dedicated exit. 1169 if (IsDedicatedExit) 1170 return false; 1171 1172 auto *NewExitBB = SplitBlockPredecessors( 1173 BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA); 1174 1175 if (!NewExitBB) 1176 DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: " 1177 << *L << "\n"); 1178 else 1179 DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " 1180 << NewExitBB->getName() << "\n"); 1181 return true; 1182 }; 1183 1184 // Walk the exit blocks directly rather than building up a data structure for 1185 // them, but only visit each one once. 1186 SmallPtrSet<BasicBlock *, 4> Visited; 1187 for (auto *BB : L->blocks()) 1188 for (auto *SuccBB : successors(BB)) { 1189 // We're looking for exit blocks so skip in-loop successors. 1190 if (L->contains(SuccBB)) 1191 continue; 1192 1193 // Visit each exit block exactly once. 1194 if (!Visited.insert(SuccBB).second) 1195 continue; 1196 1197 Changed |= RewriteExit(SuccBB); 1198 } 1199 1200 return Changed; 1201 } 1202 1203 /// \brief Returns the instructions that use values defined in the loop. 1204 SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { 1205 SmallVector<Instruction *, 8> UsedOutside; 1206 1207 for (auto *Block : L->getBlocks()) 1208 // FIXME: I believe that this could use copy_if if the Inst reference could 1209 // be adapted into a pointer. 1210 for (auto &Inst : *Block) { 1211 auto Users = Inst.users(); 1212 if (any_of(Users, [&](User *U) { 1213 auto *Use = cast<Instruction>(U); 1214 return !L->contains(Use->getParent()); 1215 })) 1216 UsedOutside.push_back(&Inst); 1217 } 1218 1219 return UsedOutside; 1220 } 1221 1222 void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { 1223 // By definition, all loop passes need the LoopInfo analysis and the 1224 // Dominator tree it depends on. Because they all participate in the loop 1225 // pass manager, they must also preserve these. 1226 AU.addRequired<DominatorTreeWrapperPass>(); 1227 AU.addPreserved<DominatorTreeWrapperPass>(); 1228 AU.addRequired<LoopInfoWrapperPass>(); 1229 AU.addPreserved<LoopInfoWrapperPass>(); 1230 1231 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs 1232 // here because users shouldn't directly get them from this header. 1233 extern char &LoopSimplifyID; 1234 extern char &LCSSAID; 1235 AU.addRequiredID(LoopSimplifyID); 1236 AU.addPreservedID(LoopSimplifyID); 1237 AU.addRequiredID(LCSSAID); 1238 AU.addPreservedID(LCSSAID); 1239 // This is used in the LPPassManager to perform LCSSA verification on passes 1240 // which preserve lcssa form 1241 AU.addRequired<LCSSAVerificationPass>(); 1242 AU.addPreserved<LCSSAVerificationPass>(); 1243 1244 // Loop passes are designed to run inside of a loop pass manager which means 1245 // that any function analyses they require must be required by the first loop 1246 // pass in the manager (so that it is computed before the loop pass manager 1247 // runs) and preserved by all loop pasess in the manager. To make this 1248 // reasonably robust, the set needed for most loop passes is maintained here. 1249 // If your loop pass requires an analysis not listed here, you will need to 1250 // carefully audit the loop pass manager nesting structure that results. 1251 AU.addRequired<AAResultsWrapperPass>(); 1252 AU.addPreserved<AAResultsWrapperPass>(); 1253 AU.addPreserved<BasicAAWrapperPass>(); 1254 AU.addPreserved<GlobalsAAWrapperPass>(); 1255 AU.addPreserved<SCEVAAWrapperPass>(); 1256 AU.addRequired<ScalarEvolutionWrapperPass>(); 1257 AU.addPreserved<ScalarEvolutionWrapperPass>(); 1258 } 1259 1260 /// Manually defined generic "LoopPass" dependency initialization. This is used 1261 /// to initialize the exact set of passes from above in \c 1262 /// getLoopAnalysisUsage. It can be used within a loop pass's initialization 1263 /// with: 1264 /// 1265 /// INITIALIZE_PASS_DEPENDENCY(LoopPass) 1266 /// 1267 /// As-if "LoopPass" were a pass. 1268 void llvm::initializeLoopPassPass(PassRegistry &Registry) { 1269 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1270 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1271 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 1272 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 1273 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1274 INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) 1275 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 1276 INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) 1277 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 1278 } 1279 1280 /// \brief Find string metadata for loop 1281 /// 1282 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an 1283 /// operand or null otherwise. If the string metadata is not found return 1284 /// Optional's not-a-value. 1285 Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, 1286 StringRef Name) { 1287 MDNode *LoopID = TheLoop->getLoopID(); 1288 // Return none if LoopID is false. 1289 if (!LoopID) 1290 return None; 1291 1292 // First operand should refer to the loop id itself. 1293 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 1294 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 1295 1296 // Iterate over LoopID operands and look for MDString Metadata 1297 for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { 1298 MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); 1299 if (!MD) 1300 continue; 1301 MDString *S = dyn_cast<MDString>(MD->getOperand(0)); 1302 if (!S) 1303 continue; 1304 // Return true if MDString holds expected MetaData. 1305 if (Name.equals(S->getString())) 1306 switch (MD->getNumOperands()) { 1307 case 1: 1308 return nullptr; 1309 case 2: 1310 return &MD->getOperand(1); 1311 default: 1312 llvm_unreachable("loop metadata has 0 or 1 operand"); 1313 } 1314 } 1315 return None; 1316 } 1317 1318 /// Does a BFS from a given node to all of its children inside a given loop. 1319 /// The returned vector of nodes includes the starting point. 1320 SmallVector<DomTreeNode *, 16> 1321 llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { 1322 SmallVector<DomTreeNode *, 16> Worklist; 1323 auto AddRegionToWorklist = [&](DomTreeNode *DTN) { 1324 // Only include subregions in the top level loop. 1325 BasicBlock *BB = DTN->getBlock(); 1326 if (CurLoop->contains(BB)) 1327 Worklist.push_back(DTN); 1328 }; 1329 1330 AddRegionToWorklist(N); 1331 1332 for (size_t I = 0; I < Worklist.size(); I++) 1333 for (DomTreeNode *Child : Worklist[I]->getChildren()) 1334 AddRegionToWorklist(Child); 1335 1336 return Worklist; 1337 } 1338 1339 void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, 1340 ScalarEvolution *SE = nullptr, 1341 LoopInfo *LI = nullptr) { 1342 assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!"); 1343 auto *Preheader = L->getLoopPreheader(); 1344 assert(Preheader && "Preheader should exist!"); 1345 1346 // Now that we know the removal is safe, remove the loop by changing the 1347 // branch from the preheader to go to the single exit block. 1348 // 1349 // Because we're deleting a large chunk of code at once, the sequence in which 1350 // we remove things is very important to avoid invalidation issues. 1351 1352 // Tell ScalarEvolution that the loop is deleted. Do this before 1353 // deleting the loop so that ScalarEvolution can look at the loop 1354 // to determine what it needs to clean up. 1355 if (SE) 1356 SE->forgetLoop(L); 1357 1358 auto *ExitBlock = L->getUniqueExitBlock(); 1359 assert(ExitBlock && "Should have a unique exit block!"); 1360 assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); 1361 1362 auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator()); 1363 assert(OldBr && "Preheader must end with a branch"); 1364 assert(OldBr->isUnconditional() && "Preheader must have a single successor"); 1365 // Connect the preheader to the exit block. Keep the old edge to the header 1366 // around to perform the dominator tree update in two separate steps 1367 // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge 1368 // preheader -> header. 1369 // 1370 // 1371 // 0. Preheader 1. Preheader 2. Preheader 1372 // | | | | 1373 // V | V | 1374 // Header <--\ | Header <--\ | Header <--\ 1375 // | | | | | | | | | | | 1376 // | V | | | V | | | V | 1377 // | Body --/ | | Body --/ | | Body --/ 1378 // V V V V V 1379 // Exit Exit Exit 1380 // 1381 // By doing this is two separate steps we can perform the dominator tree 1382 // update without using the batch update API. 1383 // 1384 // Even when the loop is never executed, we cannot remove the edge from the 1385 // source block to the exit block. Consider the case where the unexecuted loop 1386 // branches back to an outer loop. If we deleted the loop and removed the edge 1387 // coming to this inner loop, this will break the outer loop structure (by 1388 // deleting the backedge of the outer loop). If the outer loop is indeed a 1389 // non-loop, it will be deleted in a future iteration of loop deletion pass. 1390 IRBuilder<> Builder(OldBr); 1391 Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); 1392 // Remove the old branch. The conditional branch becomes a new terminator. 1393 OldBr->eraseFromParent(); 1394 1395 // Rewrite phis in the exit block to get their inputs from the Preheader 1396 // instead of the exiting block. 1397 for (PHINode &P : ExitBlock->phis()) { 1398 // Set the zero'th element of Phi to be from the preheader and remove all 1399 // other incoming values. Given the loop has dedicated exits, all other 1400 // incoming values must be from the exiting blocks. 1401 int PredIndex = 0; 1402 P.setIncomingBlock(PredIndex, Preheader); 1403 // Removes all incoming values from all other exiting blocks (including 1404 // duplicate values from an exiting block). 1405 // Nuke all entries except the zero'th entry which is the preheader entry. 1406 // NOTE! We need to remove Incoming Values in the reverse order as done 1407 // below, to keep the indices valid for deletion (removeIncomingValues 1408 // updates getNumIncomingValues and shifts all values down into the operand 1409 // being deleted). 1410 for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i) 1411 P.removeIncomingValue(e - i, false); 1412 1413 assert((P.getNumIncomingValues() == 1 && 1414 P.getIncomingBlock(PredIndex) == Preheader) && 1415 "Should have exactly one value and that's from the preheader!"); 1416 } 1417 1418 // Disconnect the loop body by branching directly to its exit. 1419 Builder.SetInsertPoint(Preheader->getTerminator()); 1420 Builder.CreateBr(ExitBlock); 1421 // Remove the old branch. 1422 Preheader->getTerminator()->eraseFromParent(); 1423 1424 if (DT) { 1425 // Update the dominator tree by informing it about the new edge from the 1426 // preheader to the exit. 1427 DT->insertEdge(Preheader, ExitBlock); 1428 // Inform the dominator tree about the removed edge. 1429 DT->deleteEdge(Preheader, L->getHeader()); 1430 } 1431 1432 // Given LCSSA form is satisfied, we should not have users of instructions 1433 // within the dead loop outside of the loop. However, LCSSA doesn't take 1434 // unreachable uses into account. We handle them here. 1435 // We could do it after drop all references (in this case all users in the 1436 // loop will be already eliminated and we have less work to do but according 1437 // to API doc of User::dropAllReferences only valid operation after dropping 1438 // references, is deletion. So let's substitute all usages of 1439 // instruction from the loop with undef value of corresponding type first. 1440 for (auto *Block : L->blocks()) 1441 for (Instruction &I : *Block) { 1442 auto *Undef = UndefValue::get(I.getType()); 1443 for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E;) { 1444 Use &U = *UI; 1445 ++UI; 1446 if (auto *Usr = dyn_cast<Instruction>(U.getUser())) 1447 if (L->contains(Usr->getParent())) 1448 continue; 1449 // If we have a DT then we can check that uses outside a loop only in 1450 // unreachable block. 1451 if (DT) 1452 assert(!DT->isReachableFromEntry(U) && 1453 "Unexpected user in reachable block"); 1454 U.set(Undef); 1455 } 1456 } 1457 1458 // Remove the block from the reference counting scheme, so that we can 1459 // delete it freely later. 1460 for (auto *Block : L->blocks()) 1461 Block->dropAllReferences(); 1462 1463 if (LI) { 1464 // Erase the instructions and the blocks without having to worry 1465 // about ordering because we already dropped the references. 1466 // NOTE: This iteration is safe because erasing the block does not remove 1467 // its entry from the loop's block list. We do that in the next section. 1468 for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end(); 1469 LpI != LpE; ++LpI) 1470 (*LpI)->eraseFromParent(); 1471 1472 // Finally, the blocks from loopinfo. This has to happen late because 1473 // otherwise our loop iterators won't work. 1474 1475 SmallPtrSet<BasicBlock *, 8> blocks; 1476 blocks.insert(L->block_begin(), L->block_end()); 1477 for (BasicBlock *BB : blocks) 1478 LI->removeBlock(BB); 1479 1480 // The last step is to update LoopInfo now that we've eliminated this loop. 1481 LI->erase(L); 1482 } 1483 } 1484 1485 /// Computes loop safety information, checks loop body & header 1486 /// for the possibility of may throw exception. 1487 /// 1488 void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) { 1489 assert(CurLoop != nullptr && "CurLoop cant be null"); 1490 BasicBlock *Header = CurLoop->getHeader(); 1491 // Setting default safety values. 1492 SafetyInfo->MayThrow = false; 1493 SafetyInfo->HeaderMayThrow = false; 1494 // Iterate over header and compute safety info. 1495 SafetyInfo->HeaderMayThrow = 1496 !isGuaranteedToTransferExecutionToSuccessor(Header); 1497 1498 SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow; 1499 // Iterate over loop instructions and compute safety info. 1500 // Skip header as it has been computed and stored in HeaderMayThrow. 1501 // The first block in loopinfo.Blocks is guaranteed to be the header. 1502 assert(Header == *CurLoop->getBlocks().begin() && 1503 "First block must be header"); 1504 for (Loop::block_iterator BB = std::next(CurLoop->block_begin()), 1505 BBE = CurLoop->block_end(); 1506 (BB != BBE) && !SafetyInfo->MayThrow; ++BB) 1507 SafetyInfo->MayThrow |= 1508 !isGuaranteedToTransferExecutionToSuccessor(*BB); 1509 1510 // Compute funclet colors if we might sink/hoist in a function with a funclet 1511 // personality routine. 1512 Function *Fn = CurLoop->getHeader()->getParent(); 1513 if (Fn->hasPersonalityFn()) 1514 if (Constant *PersonalityFn = Fn->getPersonalityFn()) 1515 if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))) 1516 SafetyInfo->BlockColors = colorEHFunclets(*Fn); 1517 } 1518 1519 /// Return true if we can prove that the given ExitBlock is not reached on the 1520 /// first iteration of the given loop. That is, the backedge of the loop must 1521 /// be executed before the ExitBlock is executed in any dynamic execution trace. 1522 static bool CanProveNotTakenFirstIteration(BasicBlock *ExitBlock, 1523 const DominatorTree *DT, 1524 const Loop *CurLoop) { 1525 auto *CondExitBlock = ExitBlock->getSinglePredecessor(); 1526 if (!CondExitBlock) 1527 // expect unique exits 1528 return false; 1529 assert(CurLoop->contains(CondExitBlock) && "meaning of exit block"); 1530 auto *BI = dyn_cast<BranchInst>(CondExitBlock->getTerminator()); 1531 if (!BI || !BI->isConditional()) 1532 return false; 1533 auto *Cond = dyn_cast<CmpInst>(BI->getCondition()); 1534 if (!Cond) 1535 return false; 1536 // todo: this would be a lot more powerful if we used scev, but all the 1537 // plumbing is currently missing to pass a pointer in from the pass 1538 // Check for cmp (phi [x, preheader] ...), y where (pred x, y is known 1539 auto *LHS = dyn_cast<PHINode>(Cond->getOperand(0)); 1540 auto *RHS = Cond->getOperand(1); 1541 if (!LHS || LHS->getParent() != CurLoop->getHeader()) 1542 return false; 1543 auto DL = ExitBlock->getModule()->getDataLayout(); 1544 auto *IVStart = LHS->getIncomingValueForBlock(CurLoop->getLoopPreheader()); 1545 auto *SimpleValOrNull = SimplifyCmpInst(Cond->getPredicate(), 1546 IVStart, RHS, 1547 {DL, /*TLI*/ nullptr, 1548 DT, /*AC*/ nullptr, BI}); 1549 auto *SimpleCst = dyn_cast_or_null<Constant>(SimpleValOrNull); 1550 if (!SimpleCst) 1551 return false; 1552 if (ExitBlock == BI->getSuccessor(0)) 1553 return SimpleCst->isZeroValue(); 1554 assert(ExitBlock == BI->getSuccessor(1) && "implied by above"); 1555 return SimpleCst->isAllOnesValue(); 1556 } 1557 1558 /// Returns true if the instruction in a loop is guaranteed to execute at least 1559 /// once. 1560 bool llvm::isGuaranteedToExecute(const Instruction &Inst, 1561 const DominatorTree *DT, const Loop *CurLoop, 1562 const LoopSafetyInfo *SafetyInfo) { 1563 // We have to check to make sure that the instruction dominates all 1564 // of the exit blocks. If it doesn't, then there is a path out of the loop 1565 // which does not execute this instruction, so we can't hoist it. 1566 1567 // If the instruction is in the header block for the loop (which is very 1568 // common), it is always guaranteed to dominate the exit blocks. Since this 1569 // is a common case, and can save some work, check it now. 1570 if (Inst.getParent() == CurLoop->getHeader()) 1571 // If there's a throw in the header block, we can't guarantee we'll reach 1572 // Inst. 1573 return !SafetyInfo->HeaderMayThrow; 1574 1575 // Somewhere in this loop there is an instruction which may throw and make us 1576 // exit the loop. 1577 if (SafetyInfo->MayThrow) 1578 return false; 1579 1580 // Note: There are two styles of reasoning intermixed below for 1581 // implementation efficiency reasons. They are: 1582 // 1) If we can prove that the instruction dominates all exit blocks, then we 1583 // know the instruction must have executed on *some* iteration before we 1584 // exit. We do not prove *which* iteration the instruction must execute on. 1585 // 2) If we can prove that the instruction dominates the latch and all exits 1586 // which might be taken on the first iteration, we know the instruction must 1587 // execute on the first iteration. This second style allows a conditional 1588 // exit before the instruction of interest which is provably not taken on the 1589 // first iteration. This is a quite common case for range check like 1590 // patterns. TODO: support loops with multiple latches. 1591 1592 const bool InstDominatesLatch = 1593 CurLoop->getLoopLatch() != nullptr && 1594 DT->dominates(Inst.getParent(), CurLoop->getLoopLatch()); 1595 1596 // Get the exit blocks for the current loop. 1597 SmallVector<BasicBlock *, 8> ExitBlocks; 1598 CurLoop->getExitBlocks(ExitBlocks); 1599 1600 // Verify that the block dominates each of the exit blocks of the loop. 1601 for (BasicBlock *ExitBlock : ExitBlocks) 1602 if (!DT->dominates(Inst.getParent(), ExitBlock)) 1603 if (!InstDominatesLatch || 1604 !CanProveNotTakenFirstIteration(ExitBlock, DT, CurLoop)) 1605 return false; 1606 1607 // As a degenerate case, if the loop is statically infinite then we haven't 1608 // proven anything since there are no exit blocks. 1609 if (ExitBlocks.empty()) 1610 return false; 1611 1612 // FIXME: In general, we have to prove that the loop isn't an infinite loop. 1613 // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is 1614 // just a special case of this.) 1615 return true; 1616 } 1617 1618 Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { 1619 // Only support loops with a unique exiting block, and a latch. 1620 if (!L->getExitingBlock()) 1621 return None; 1622 1623 // Get the branch weights for the loop's backedge. 1624 BranchInst *LatchBR = 1625 dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator()); 1626 if (!LatchBR || LatchBR->getNumSuccessors() != 2) 1627 return None; 1628 1629 assert((LatchBR->getSuccessor(0) == L->getHeader() || 1630 LatchBR->getSuccessor(1) == L->getHeader()) && 1631 "At least one edge out of the latch must go to the header"); 1632 1633 // To estimate the number of times the loop body was executed, we want to 1634 // know the number of times the backedge was taken, vs. the number of times 1635 // we exited the loop. 1636 uint64_t TrueVal, FalseVal; 1637 if (!LatchBR->extractProfMetadata(TrueVal, FalseVal)) 1638 return None; 1639 1640 if (!TrueVal || !FalseVal) 1641 return 0; 1642 1643 // Divide the count of the backedge by the count of the edge exiting the loop, 1644 // rounding to nearest. 1645 if (LatchBR->getSuccessor(0) == L->getHeader()) 1646 return (TrueVal + (FalseVal / 2)) / FalseVal; 1647 else 1648 return (FalseVal + (TrueVal / 2)) / TrueVal; 1649 } 1650 1651 /// \brief Adds a 'fast' flag to floating point operations. 1652 static Value *addFastMathFlag(Value *V) { 1653 if (isa<FPMathOperator>(V)) { 1654 FastMathFlags Flags; 1655 Flags.setFast(); 1656 cast<Instruction>(V)->setFastMathFlags(Flags); 1657 } 1658 return V; 1659 } 1660 1661 // Helper to generate a log2 shuffle reduction. 1662 Value * 1663 llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, 1664 RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, 1665 ArrayRef<Value *> RedOps) { 1666 unsigned VF = Src->getType()->getVectorNumElements(); 1667 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles 1668 // and vector ops, reducing the set of values being computed by half each 1669 // round. 1670 assert(isPowerOf2_32(VF) && 1671 "Reduction emission only supported for pow2 vectors!"); 1672 Value *TmpVec = Src; 1673 SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); 1674 for (unsigned i = VF; i != 1; i >>= 1) { 1675 // Move the upper half of the vector to the lower half. 1676 for (unsigned j = 0; j != i / 2; ++j) 1677 ShuffleMask[j] = Builder.getInt32(i / 2 + j); 1678 1679 // Fill the rest of the mask with undef. 1680 std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), 1681 UndefValue::get(Builder.getInt32Ty())); 1682 1683 Value *Shuf = Builder.CreateShuffleVector( 1684 TmpVec, UndefValue::get(TmpVec->getType()), 1685 ConstantVector::get(ShuffleMask), "rdx.shuf"); 1686 1687 if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 1688 // Floating point operations had to be 'fast' to enable the reduction. 1689 TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, 1690 TmpVec, Shuf, "bin.rdx")); 1691 } else { 1692 assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && 1693 "Invalid min/max"); 1694 TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec, 1695 Shuf); 1696 } 1697 if (!RedOps.empty()) 1698 propagateIRFlags(TmpVec, RedOps); 1699 } 1700 // The result is in the first element of the vector. 1701 return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); 1702 } 1703 1704 /// Create a simple vector reduction specified by an opcode and some 1705 /// flags (if generating min/max reductions). 1706 Value *llvm::createSimpleTargetReduction( 1707 IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, 1708 Value *Src, TargetTransformInfo::ReductionFlags Flags, 1709 ArrayRef<Value *> RedOps) { 1710 assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); 1711 1712 Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); 1713 std::function<Value*()> BuildFunc; 1714 using RD = RecurrenceDescriptor; 1715 RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; 1716 // TODO: Support creating ordered reductions. 1717 FastMathFlags FMFFast; 1718 FMFFast.setFast(); 1719 1720 switch (Opcode) { 1721 case Instruction::Add: 1722 BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; 1723 break; 1724 case Instruction::Mul: 1725 BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; 1726 break; 1727 case Instruction::And: 1728 BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; 1729 break; 1730 case Instruction::Or: 1731 BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; 1732 break; 1733 case Instruction::Xor: 1734 BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; 1735 break; 1736 case Instruction::FAdd: 1737 BuildFunc = [&]() { 1738 auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); 1739 cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); 1740 return Rdx; 1741 }; 1742 break; 1743 case Instruction::FMul: 1744 BuildFunc = [&]() { 1745 auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); 1746 cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); 1747 return Rdx; 1748 }; 1749 break; 1750 case Instruction::ICmp: 1751 if (Flags.IsMaxOp) { 1752 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; 1753 BuildFunc = [&]() { 1754 return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); 1755 }; 1756 } else { 1757 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; 1758 BuildFunc = [&]() { 1759 return Builder.CreateIntMinReduce(Src, Flags.IsSigned); 1760 }; 1761 } 1762 break; 1763 case Instruction::FCmp: 1764 if (Flags.IsMaxOp) { 1765 MinMaxKind = RD::MRK_FloatMax; 1766 BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; 1767 } else { 1768 MinMaxKind = RD::MRK_FloatMin; 1769 BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; 1770 } 1771 break; 1772 default: 1773 llvm_unreachable("Unhandled opcode"); 1774 break; 1775 } 1776 if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) 1777 return BuildFunc(); 1778 return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); 1779 } 1780 1781 /// Create a vector reduction using a given recurrence descriptor. 1782 Value *llvm::createTargetReduction(IRBuilder<> &B, 1783 const TargetTransformInfo *TTI, 1784 RecurrenceDescriptor &Desc, Value *Src, 1785 bool NoNaN) { 1786 // TODO: Support in-order reductions based on the recurrence descriptor. 1787 using RD = RecurrenceDescriptor; 1788 RD::RecurrenceKind RecKind = Desc.getRecurrenceKind(); 1789 TargetTransformInfo::ReductionFlags Flags; 1790 Flags.NoNaN = NoNaN; 1791 switch (RecKind) { 1792 case RD::RK_FloatAdd: 1793 return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags); 1794 case RD::RK_FloatMult: 1795 return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags); 1796 case RD::RK_IntegerAdd: 1797 return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags); 1798 case RD::RK_IntegerMult: 1799 return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags); 1800 case RD::RK_IntegerAnd: 1801 return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags); 1802 case RD::RK_IntegerOr: 1803 return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags); 1804 case RD::RK_IntegerXor: 1805 return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags); 1806 case RD::RK_IntegerMinMax: { 1807 RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); 1808 Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax); 1809 Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin); 1810 return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags); 1811 } 1812 case RD::RK_FloatMinMax: { 1813 Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax; 1814 return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags); 1815 } 1816 default: 1817 llvm_unreachable("Unhandled RecKind"); 1818 } 1819 } 1820 1821 void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { 1822 auto *VecOp = dyn_cast<Instruction>(I); 1823 if (!VecOp) 1824 return; 1825 auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0]) 1826 : dyn_cast<Instruction>(OpValue); 1827 if (!Intersection) 1828 return; 1829 const unsigned Opcode = Intersection->getOpcode(); 1830 VecOp->copyIRFlags(Intersection); 1831 for (auto *V : VL) { 1832 auto *Instr = dyn_cast<Instruction>(V); 1833 if (!Instr) 1834 continue; 1835 if (OpValue == nullptr || Opcode == Instr->getOpcode()) 1836 VecOp->andIRFlags(V); 1837 } 1838 } 1839