1 //===-- LoopUtils.cpp - Loop Utility functions -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines common loop utility functions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Utils/LoopUtils.h" 15 #include "llvm/ADT/ScopeExit.h" 16 #include "llvm/Analysis/AliasAnalysis.h" 17 #include "llvm/Analysis/BasicAliasAnalysis.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/InstructionSimplify.h" 20 #include "llvm/Analysis/LoopInfo.h" 21 #include "llvm/Analysis/LoopPass.h" 22 #include "llvm/Analysis/MustExecute.h" 23 #include "llvm/Analysis/ScalarEvolution.h" 24 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 25 #include "llvm/Analysis/ScalarEvolutionExpander.h" 26 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 27 #include "llvm/Analysis/TargetTransformInfo.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/IR/DomTreeUpdater.h" 30 #include "llvm/IR/Dominators.h" 31 #include "llvm/IR/Instructions.h" 32 #include "llvm/IR/Module.h" 33 #include "llvm/IR/PatternMatch.h" 34 #include "llvm/IR/ValueHandle.h" 35 #include "llvm/Pass.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/KnownBits.h" 38 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 39 40 using namespace llvm; 41 using namespace llvm::PatternMatch; 42 43 #define DEBUG_TYPE "loop-utils" 44 45 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, 46 SmallPtrSetImpl<Instruction *> &Set) { 47 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) 48 if (!Set.count(dyn_cast<Instruction>(*Use))) 49 return false; 50 return true; 51 } 52 53 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { 54 switch (Kind) { 55 default: 56 break; 57 case RK_IntegerAdd: 58 case RK_IntegerMult: 59 case RK_IntegerOr: 60 case RK_IntegerAnd: 61 case RK_IntegerXor: 62 case RK_IntegerMinMax: 63 return true; 64 } 65 return false; 66 } 67 68 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { 69 return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); 70 } 71 72 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { 73 switch (Kind) { 74 default: 75 break; 76 case RK_IntegerAdd: 77 case RK_IntegerMult: 78 case RK_FloatAdd: 79 case RK_FloatMult: 80 return true; 81 } 82 return false; 83 } 84 85 /// Determines if Phi may have been type-promoted. If Phi has a single user 86 /// that ANDs the Phi with a type mask, return the user. RT is updated to 87 /// account for the narrower bit width represented by the mask, and the AND 88 /// instruction is added to CI. 89 static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, 90 SmallPtrSetImpl<Instruction *> &Visited, 91 SmallPtrSetImpl<Instruction *> &CI) { 92 if (!Phi->hasOneUse()) 93 return Phi; 94 95 const APInt *M = nullptr; 96 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); 97 98 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT 99 // with a new integer type of the corresponding bit width. 100 if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { 101 int32_t Bits = (*M + 1).exactLogBase2(); 102 if (Bits > 0) { 103 RT = IntegerType::get(Phi->getContext(), Bits); 104 Visited.insert(Phi); 105 CI.insert(J); 106 return J; 107 } 108 } 109 return Phi; 110 } 111 112 /// Compute the minimal bit width needed to represent a reduction whose exit 113 /// instruction is given by Exit. 114 static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit, 115 DemandedBits *DB, 116 AssumptionCache *AC, 117 DominatorTree *DT) { 118 bool IsSigned = false; 119 const DataLayout &DL = Exit->getModule()->getDataLayout(); 120 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType()); 121 122 if (DB) { 123 // Use the demanded bits analysis to determine the bits that are live out 124 // of the exit instruction, rounding up to the nearest power of two. If the 125 // use of demanded bits results in a smaller bit width, we know the value 126 // must be positive (i.e., IsSigned = false), because if this were not the 127 // case, the sign bit would have been demanded. 128 auto Mask = DB->getDemandedBits(Exit); 129 MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros(); 130 } 131 132 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) { 133 // If demanded bits wasn't able to limit the bit width, we can try to use 134 // value tracking instead. This can be the case, for example, if the value 135 // may be negative. 136 auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT); 137 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType()); 138 MaxBitWidth = NumTypeBits - NumSignBits; 139 KnownBits Bits = computeKnownBits(Exit, DL); 140 if (!Bits.isNonNegative()) { 141 // If the value is not known to be non-negative, we set IsSigned to true, 142 // meaning that we will use sext instructions instead of zext 143 // instructions to restore the original type. 144 IsSigned = true; 145 if (!Bits.isNegative()) 146 // If the value is not known to be negative, we don't known what the 147 // upper bit is, and therefore, we don't know what kind of extend we 148 // will need. In this case, just increase the bit width by one bit and 149 // use sext. 150 ++MaxBitWidth; 151 } 152 } 153 if (!isPowerOf2_64(MaxBitWidth)) 154 MaxBitWidth = NextPowerOf2(MaxBitWidth); 155 156 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth), 157 IsSigned); 158 } 159 160 /// Collect cast instructions that can be ignored in the vectorizer's cost 161 /// model, given a reduction exit value and the minimal type in which the 162 /// reduction can be represented. 163 static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, 164 Type *RecurrenceType, 165 SmallPtrSetImpl<Instruction *> &Casts) { 166 167 SmallVector<Instruction *, 8> Worklist; 168 SmallPtrSet<Instruction *, 8> Visited; 169 Worklist.push_back(Exit); 170 171 while (!Worklist.empty()) { 172 Instruction *Val = Worklist.pop_back_val(); 173 Visited.insert(Val); 174 if (auto *Cast = dyn_cast<CastInst>(Val)) 175 if (Cast->getSrcTy() == RecurrenceType) { 176 // If the source type of a cast instruction is equal to the recurrence 177 // type, it will be eliminated, and should be ignored in the vectorizer 178 // cost model. 179 Casts.insert(Cast); 180 continue; 181 } 182 183 // Add all operands to the work list if they are loop-varying values that 184 // we haven't yet visited. 185 for (Value *O : cast<User>(Val)->operands()) 186 if (auto *I = dyn_cast<Instruction>(O)) 187 if (TheLoop->contains(I) && !Visited.count(I)) 188 Worklist.push_back(I); 189 } 190 } 191 192 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, 193 Loop *TheLoop, bool HasFunNoNaNAttr, 194 RecurrenceDescriptor &RedDes, 195 DemandedBits *DB, 196 AssumptionCache *AC, 197 DominatorTree *DT) { 198 if (Phi->getNumIncomingValues() != 2) 199 return false; 200 201 // Reduction variables are only found in the loop header block. 202 if (Phi->getParent() != TheLoop->getHeader()) 203 return false; 204 205 // Obtain the reduction start value from the value that comes from the loop 206 // preheader. 207 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); 208 209 // ExitInstruction is the single value which is used outside the loop. 210 // We only allow for a single reduction value to be used outside the loop. 211 // This includes users of the reduction, variables (which form a cycle 212 // which ends in the phi node). 213 Instruction *ExitInstruction = nullptr; 214 // Indicates that we found a reduction operation in our scan. 215 bool FoundReduxOp = false; 216 217 // We start with the PHI node and scan for all of the users of this 218 // instruction. All users must be instructions that can be used as reduction 219 // variables (such as ADD). We must have a single out-of-block user. The cycle 220 // must include the original PHI. 221 bool FoundStartPHI = false; 222 223 // To recognize min/max patterns formed by a icmp select sequence, we store 224 // the number of instruction we saw from the recognized min/max pattern, 225 // to make sure we only see exactly the two instructions. 226 unsigned NumCmpSelectPatternInst = 0; 227 InstDesc ReduxDesc(false, nullptr); 228 229 // Data used for determining if the recurrence has been type-promoted. 230 Type *RecurrenceType = Phi->getType(); 231 SmallPtrSet<Instruction *, 4> CastInsts; 232 Instruction *Start = Phi; 233 bool IsSigned = false; 234 235 SmallPtrSet<Instruction *, 8> VisitedInsts; 236 SmallVector<Instruction *, 8> Worklist; 237 238 // Return early if the recurrence kind does not match the type of Phi. If the 239 // recurrence kind is arithmetic, we attempt to look through AND operations 240 // resulting from the type promotion performed by InstCombine. Vector 241 // operations are not limited to the legal integer widths, so we may be able 242 // to evaluate the reduction in the narrower width. 243 if (RecurrenceType->isFloatingPointTy()) { 244 if (!isFloatingPointRecurrenceKind(Kind)) 245 return false; 246 } else { 247 if (!isIntegerRecurrenceKind(Kind)) 248 return false; 249 if (isArithmeticRecurrenceKind(Kind)) 250 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); 251 } 252 253 Worklist.push_back(Start); 254 VisitedInsts.insert(Start); 255 256 // A value in the reduction can be used: 257 // - By the reduction: 258 // - Reduction operation: 259 // - One use of reduction value (safe). 260 // - Multiple use of reduction value (not safe). 261 // - PHI: 262 // - All uses of the PHI must be the reduction (safe). 263 // - Otherwise, not safe. 264 // - By instructions outside of the loop (safe). 265 // * One value may have several outside users, but all outside 266 // uses must be of the same value. 267 // - By an instruction that is not part of the reduction (not safe). 268 // This is either: 269 // * An instruction type other than PHI or the reduction operation. 270 // * A PHI in the header other than the initial PHI. 271 while (!Worklist.empty()) { 272 Instruction *Cur = Worklist.back(); 273 Worklist.pop_back(); 274 275 // No Users. 276 // If the instruction has no users then this is a broken chain and can't be 277 // a reduction variable. 278 if (Cur->use_empty()) 279 return false; 280 281 bool IsAPhi = isa<PHINode>(Cur); 282 283 // A header PHI use other than the original PHI. 284 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) 285 return false; 286 287 // Reductions of instructions such as Div, and Sub is only possible if the 288 // LHS is the reduction variable. 289 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && 290 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && 291 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) 292 return false; 293 294 // Any reduction instruction must be of one of the allowed kinds. We ignore 295 // the starting value (the Phi or an AND instruction if the Phi has been 296 // type-promoted). 297 if (Cur != Start) { 298 ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); 299 if (!ReduxDesc.isRecurrence()) 300 return false; 301 } 302 303 // A reduction operation must only have one use of the reduction value. 304 if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && 305 hasMultipleUsesOf(Cur, VisitedInsts)) 306 return false; 307 308 // All inputs to a PHI node must be a reduction value. 309 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) 310 return false; 311 312 if (Kind == RK_IntegerMinMax && 313 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) 314 ++NumCmpSelectPatternInst; 315 if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) 316 ++NumCmpSelectPatternInst; 317 318 // Check whether we found a reduction operator. 319 FoundReduxOp |= !IsAPhi && Cur != Start; 320 321 // Process users of current instruction. Push non-PHI nodes after PHI nodes 322 // onto the stack. This way we are going to have seen all inputs to PHI 323 // nodes once we get to them. 324 SmallVector<Instruction *, 8> NonPHIs; 325 SmallVector<Instruction *, 8> PHIs; 326 for (User *U : Cur->users()) { 327 Instruction *UI = cast<Instruction>(U); 328 329 // Check if we found the exit user. 330 BasicBlock *Parent = UI->getParent(); 331 if (!TheLoop->contains(Parent)) { 332 // If we already know this instruction is used externally, move on to 333 // the next user. 334 if (ExitInstruction == Cur) 335 continue; 336 337 // Exit if you find multiple values used outside or if the header phi 338 // node is being used. In this case the user uses the value of the 339 // previous iteration, in which case we would loose "VF-1" iterations of 340 // the reduction operation if we vectorize. 341 if (ExitInstruction != nullptr || Cur == Phi) 342 return false; 343 344 // The instruction used by an outside user must be the last instruction 345 // before we feed back to the reduction phi. Otherwise, we loose VF-1 346 // operations on the value. 347 if (!is_contained(Phi->operands(), Cur)) 348 return false; 349 350 ExitInstruction = Cur; 351 continue; 352 } 353 354 // Process instructions only once (termination). Each reduction cycle 355 // value must only be used once, except by phi nodes and min/max 356 // reductions which are represented as a cmp followed by a select. 357 InstDesc IgnoredVal(false, nullptr); 358 if (VisitedInsts.insert(UI).second) { 359 if (isa<PHINode>(UI)) 360 PHIs.push_back(UI); 361 else 362 NonPHIs.push_back(UI); 363 } else if (!isa<PHINode>(UI) && 364 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && 365 !isa<SelectInst>(UI)) || 366 !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) 367 return false; 368 369 // Remember that we completed the cycle. 370 if (UI == Phi) 371 FoundStartPHI = true; 372 } 373 Worklist.append(PHIs.begin(), PHIs.end()); 374 Worklist.append(NonPHIs.begin(), NonPHIs.end()); 375 } 376 377 // This means we have seen one but not the other instruction of the 378 // pattern or more than just a select and cmp. 379 if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && 380 NumCmpSelectPatternInst != 2) 381 return false; 382 383 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) 384 return false; 385 386 if (Start != Phi) { 387 // If the starting value is not the same as the phi node, we speculatively 388 // looked through an 'and' instruction when evaluating a potential 389 // arithmetic reduction to determine if it may have been type-promoted. 390 // 391 // We now compute the minimal bit width that is required to represent the 392 // reduction. If this is the same width that was indicated by the 'and', we 393 // can represent the reduction in the smaller type. The 'and' instruction 394 // will be eliminated since it will essentially be a cast instruction that 395 // can be ignore in the cost model. If we compute a different type than we 396 // did when evaluating the 'and', the 'and' will not be eliminated, and we 397 // will end up with different kinds of operations in the recurrence 398 // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is 399 // the case. 400 // 401 // The vectorizer relies on InstCombine to perform the actual 402 // type-shrinking. It does this by inserting instructions to truncate the 403 // exit value of the reduction to the width indicated by RecurrenceType and 404 // then extend this value back to the original width. If IsSigned is false, 405 // a 'zext' instruction will be generated; otherwise, a 'sext' will be 406 // used. 407 // 408 // TODO: We should not rely on InstCombine to rewrite the reduction in the 409 // smaller type. We should just generate a correctly typed expression 410 // to begin with. 411 Type *ComputedType; 412 std::tie(ComputedType, IsSigned) = 413 computeRecurrenceType(ExitInstruction, DB, AC, DT); 414 if (ComputedType != RecurrenceType) 415 return false; 416 417 // The recurrence expression will be represented in a narrower type. If 418 // there are any cast instructions that will be unnecessary, collect them 419 // in CastInsts. Note that the 'and' instruction was already included in 420 // this list. 421 // 422 // TODO: A better way to represent this may be to tag in some way all the 423 // instructions that are a part of the reduction. The vectorizer cost 424 // model could then apply the recurrence type to these instructions, 425 // without needing a white list of instructions to ignore. 426 collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts); 427 } 428 429 // We found a reduction var if we have reached the original phi node and we 430 // only have a single instruction with out-of-loop users. 431 432 // The ExitInstruction(Instruction which is allowed to have out-of-loop users) 433 // is saved as part of the RecurrenceDescriptor. 434 435 // Save the description of this reduction variable. 436 RecurrenceDescriptor RD( 437 RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), 438 ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); 439 RedDes = RD; 440 441 return true; 442 } 443 444 /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction 445 /// pattern corresponding to a min(X, Y) or max(X, Y). 446 RecurrenceDescriptor::InstDesc 447 RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { 448 449 assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && 450 "Expect a select instruction"); 451 Instruction *Cmp = nullptr; 452 SelectInst *Select = nullptr; 453 454 // We must handle the select(cmp()) as a single instruction. Advance to the 455 // select. 456 if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) { 457 if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin()))) 458 return InstDesc(false, I); 459 return InstDesc(Select, Prev.getMinMaxKind()); 460 } 461 462 // Only handle single use cases for now. 463 if (!(Select = dyn_cast<SelectInst>(I))) 464 return InstDesc(false, I); 465 if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && 466 !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) 467 return InstDesc(false, I); 468 if (!Cmp->hasOneUse()) 469 return InstDesc(false, I); 470 471 Value *CmpLeft; 472 Value *CmpRight; 473 474 // Look for a min/max pattern. 475 if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 476 return InstDesc(Select, MRK_UIntMin); 477 else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 478 return InstDesc(Select, MRK_UIntMax); 479 else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 480 return InstDesc(Select, MRK_SIntMax); 481 else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 482 return InstDesc(Select, MRK_SIntMin); 483 else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 484 return InstDesc(Select, MRK_FloatMin); 485 else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 486 return InstDesc(Select, MRK_FloatMax); 487 else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 488 return InstDesc(Select, MRK_FloatMin); 489 else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 490 return InstDesc(Select, MRK_FloatMax); 491 492 return InstDesc(false, I); 493 } 494 495 RecurrenceDescriptor::InstDesc 496 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, 497 InstDesc &Prev, bool HasFunNoNaNAttr) { 498 bool FP = I->getType()->isFloatingPointTy(); 499 Instruction *UAI = Prev.getUnsafeAlgebraInst(); 500 if (!UAI && FP && !I->isFast()) 501 UAI = I; // Found an unsafe (unvectorizable) algebra instruction. 502 503 switch (I->getOpcode()) { 504 default: 505 return InstDesc(false, I); 506 case Instruction::PHI: 507 return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); 508 case Instruction::Sub: 509 case Instruction::Add: 510 return InstDesc(Kind == RK_IntegerAdd, I); 511 case Instruction::Mul: 512 return InstDesc(Kind == RK_IntegerMult, I); 513 case Instruction::And: 514 return InstDesc(Kind == RK_IntegerAnd, I); 515 case Instruction::Or: 516 return InstDesc(Kind == RK_IntegerOr, I); 517 case Instruction::Xor: 518 return InstDesc(Kind == RK_IntegerXor, I); 519 case Instruction::FMul: 520 return InstDesc(Kind == RK_FloatMult, I, UAI); 521 case Instruction::FSub: 522 case Instruction::FAdd: 523 return InstDesc(Kind == RK_FloatAdd, I, UAI); 524 case Instruction::FCmp: 525 case Instruction::ICmp: 526 case Instruction::Select: 527 if (Kind != RK_IntegerMinMax && 528 (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) 529 return InstDesc(false, I); 530 return isMinMaxSelectCmpPattern(I, Prev); 531 } 532 } 533 534 bool RecurrenceDescriptor::hasMultipleUsesOf( 535 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) { 536 unsigned NumUses = 0; 537 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; 538 ++Use) { 539 if (Insts.count(dyn_cast<Instruction>(*Use))) 540 ++NumUses; 541 if (NumUses > 1) 542 return true; 543 } 544 545 return false; 546 } 547 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, 548 RecurrenceDescriptor &RedDes, 549 DemandedBits *DB, AssumptionCache *AC, 550 DominatorTree *DT) { 551 552 BasicBlock *Header = TheLoop->getHeader(); 553 Function &F = *Header->getParent(); 554 bool HasFunNoNaNAttr = 555 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 556 557 if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 558 AC, DT)) { 559 LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); 560 return true; 561 } 562 if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, 563 AC, DT)) { 564 LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); 565 return true; 566 } 567 if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB, 568 AC, DT)) { 569 LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); 570 return true; 571 } 572 if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 573 AC, DT)) { 574 LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); 575 return true; 576 } 577 if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB, 578 AC, DT)) { 579 LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); 580 return true; 581 } 582 if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes, 583 DB, AC, DT)) { 584 LLVM_DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); 585 return true; 586 } 587 if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, 588 AC, DT)) { 589 LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); 590 return true; 591 } 592 if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 593 AC, DT)) { 594 LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); 595 return true; 596 } 597 if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB, 598 AC, DT)) { 599 LLVM_DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi 600 << "\n"); 601 return true; 602 } 603 // Not a reduction of known type. 604 return false; 605 } 606 607 bool RecurrenceDescriptor::isFirstOrderRecurrence( 608 PHINode *Phi, Loop *TheLoop, 609 DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) { 610 611 // Ensure the phi node is in the loop header and has two incoming values. 612 if (Phi->getParent() != TheLoop->getHeader() || 613 Phi->getNumIncomingValues() != 2) 614 return false; 615 616 // Ensure the loop has a preheader and a single latch block. The loop 617 // vectorizer will need the latch to set up the next iteration of the loop. 618 auto *Preheader = TheLoop->getLoopPreheader(); 619 auto *Latch = TheLoop->getLoopLatch(); 620 if (!Preheader || !Latch) 621 return false; 622 623 // Ensure the phi node's incoming blocks are the loop preheader and latch. 624 if (Phi->getBasicBlockIndex(Preheader) < 0 || 625 Phi->getBasicBlockIndex(Latch) < 0) 626 return false; 627 628 // Get the previous value. The previous value comes from the latch edge while 629 // the initial value comes form the preheader edge. 630 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); 631 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) || 632 SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. 633 return false; 634 635 // Ensure every user of the phi node is dominated by the previous value. 636 // The dominance requirement ensures the loop vectorizer will not need to 637 // vectorize the initial value prior to the first iteration of the loop. 638 // TODO: Consider extending this sinking to handle other kinds of instructions 639 // and expressions, beyond sinking a single cast past Previous. 640 if (Phi->hasOneUse()) { 641 auto *I = Phi->user_back(); 642 if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() && 643 DT->dominates(Previous, I->user_back())) { 644 if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking. 645 SinkAfter[I] = Previous; 646 return true; 647 } 648 } 649 650 for (User *U : Phi->users()) 651 if (auto *I = dyn_cast<Instruction>(U)) { 652 if (!DT->dominates(Previous, I)) 653 return false; 654 } 655 656 return true; 657 } 658 659 /// This function returns the identity element (or neutral element) for 660 /// the operation K. 661 Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, 662 Type *Tp) { 663 switch (K) { 664 case RK_IntegerXor: 665 case RK_IntegerAdd: 666 case RK_IntegerOr: 667 // Adding, Xoring, Oring zero to a number does not change it. 668 return ConstantInt::get(Tp, 0); 669 case RK_IntegerMult: 670 // Multiplying a number by 1 does not change it. 671 return ConstantInt::get(Tp, 1); 672 case RK_IntegerAnd: 673 // AND-ing a number with an all-1 value does not change it. 674 return ConstantInt::get(Tp, -1, true); 675 case RK_FloatMult: 676 // Multiplying a number by 1 does not change it. 677 return ConstantFP::get(Tp, 1.0L); 678 case RK_FloatAdd: 679 // Adding zero to a number does not change it. 680 return ConstantFP::get(Tp, 0.0L); 681 default: 682 llvm_unreachable("Unknown recurrence kind"); 683 } 684 } 685 686 /// This function translates the recurrence kind to an LLVM binary operator. 687 unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { 688 switch (Kind) { 689 case RK_IntegerAdd: 690 return Instruction::Add; 691 case RK_IntegerMult: 692 return Instruction::Mul; 693 case RK_IntegerOr: 694 return Instruction::Or; 695 case RK_IntegerAnd: 696 return Instruction::And; 697 case RK_IntegerXor: 698 return Instruction::Xor; 699 case RK_FloatMult: 700 return Instruction::FMul; 701 case RK_FloatAdd: 702 return Instruction::FAdd; 703 case RK_IntegerMinMax: 704 return Instruction::ICmp; 705 case RK_FloatMinMax: 706 return Instruction::FCmp; 707 default: 708 llvm_unreachable("Unknown recurrence operation"); 709 } 710 } 711 712 Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, 713 MinMaxRecurrenceKind RK, 714 Value *Left, Value *Right) { 715 CmpInst::Predicate P = CmpInst::ICMP_NE; 716 switch (RK) { 717 default: 718 llvm_unreachable("Unknown min/max recurrence kind"); 719 case MRK_UIntMin: 720 P = CmpInst::ICMP_ULT; 721 break; 722 case MRK_UIntMax: 723 P = CmpInst::ICMP_UGT; 724 break; 725 case MRK_SIntMin: 726 P = CmpInst::ICMP_SLT; 727 break; 728 case MRK_SIntMax: 729 P = CmpInst::ICMP_SGT; 730 break; 731 case MRK_FloatMin: 732 P = CmpInst::FCMP_OLT; 733 break; 734 case MRK_FloatMax: 735 P = CmpInst::FCMP_OGT; 736 break; 737 } 738 739 // We only match FP sequences that are 'fast', so we can unconditionally 740 // set it on any generated instructions. 741 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 742 FastMathFlags FMF; 743 FMF.setFast(); 744 Builder.setFastMathFlags(FMF); 745 746 Value *Cmp; 747 if (RK == MRK_FloatMin || RK == MRK_FloatMax) 748 Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); 749 else 750 Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); 751 752 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); 753 return Select; 754 } 755 756 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, 757 const SCEV *Step, BinaryOperator *BOp, 758 SmallVectorImpl<Instruction *> *Casts) 759 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { 760 assert(IK != IK_NoInduction && "Not an induction"); 761 762 // Start value type should match the induction kind and the value 763 // itself should not be null. 764 assert(StartValue && "StartValue is null"); 765 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && 766 "StartValue is not a pointer for pointer induction"); 767 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && 768 "StartValue is not an integer for integer induction"); 769 770 // Check the Step Value. It should be non-zero integer value. 771 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && 772 "Step value is zero"); 773 774 assert((IK != IK_PtrInduction || getConstIntStepValue()) && 775 "Step value should be constant for pointer induction"); 776 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && 777 "StepValue is not an integer"); 778 779 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && 780 "StepValue is not FP for FpInduction"); 781 assert((IK != IK_FpInduction || (InductionBinOp && 782 (InductionBinOp->getOpcode() == Instruction::FAdd || 783 InductionBinOp->getOpcode() == Instruction::FSub))) && 784 "Binary opcode should be specified for FP induction"); 785 786 if (Casts) { 787 for (auto &Inst : *Casts) { 788 RedundantCasts.push_back(Inst); 789 } 790 } 791 } 792 793 int InductionDescriptor::getConsecutiveDirection() const { 794 ConstantInt *ConstStep = getConstIntStepValue(); 795 if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) 796 return ConstStep->getSExtValue(); 797 return 0; 798 } 799 800 ConstantInt *InductionDescriptor::getConstIntStepValue() const { 801 if (isa<SCEVConstant>(Step)) 802 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); 803 return nullptr; 804 } 805 806 Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, 807 ScalarEvolution *SE, 808 const DataLayout& DL) const { 809 810 SCEVExpander Exp(*SE, DL, "induction"); 811 assert(Index->getType() == Step->getType() && 812 "Index type does not match StepValue type"); 813 switch (IK) { 814 case IK_IntInduction: { 815 assert(Index->getType() == StartValue->getType() && 816 "Index type does not match StartValue type"); 817 818 // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution 819 // and calculate (Start + Index * Step) for all cases, without 820 // special handling for "isOne" and "isMinusOne". 821 // But in the real life the result code getting worse. We mix SCEV 822 // expressions and ADD/SUB operations and receive redundant 823 // intermediate values being calculated in different ways and 824 // Instcombine is unable to reduce them all. 825 826 if (getConstIntStepValue() && 827 getConstIntStepValue()->isMinusOne()) 828 return B.CreateSub(StartValue, Index); 829 if (getConstIntStepValue() && 830 getConstIntStepValue()->isOne()) 831 return B.CreateAdd(StartValue, Index); 832 const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), 833 SE->getMulExpr(Step, SE->getSCEV(Index))); 834 return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); 835 } 836 case IK_PtrInduction: { 837 assert(isa<SCEVConstant>(Step) && 838 "Expected constant step for pointer induction"); 839 const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); 840 Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); 841 return B.CreateGEP(nullptr, StartValue, Index); 842 } 843 case IK_FpInduction: { 844 assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); 845 assert(InductionBinOp && 846 (InductionBinOp->getOpcode() == Instruction::FAdd || 847 InductionBinOp->getOpcode() == Instruction::FSub) && 848 "Original bin op should be defined for FP induction"); 849 850 Value *StepValue = cast<SCEVUnknown>(Step)->getValue(); 851 852 // Floating point operations had to be 'fast' to enable the induction. 853 FastMathFlags Flags; 854 Flags.setFast(); 855 856 Value *MulExp = B.CreateFMul(StepValue, Index); 857 if (isa<Instruction>(MulExp)) 858 // We have to check, the MulExp may be a constant. 859 cast<Instruction>(MulExp)->setFastMathFlags(Flags); 860 861 Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue, 862 MulExp, "induction"); 863 if (isa<Instruction>(BOp)) 864 cast<Instruction>(BOp)->setFastMathFlags(Flags); 865 866 return BOp; 867 } 868 case IK_NoInduction: 869 return nullptr; 870 } 871 llvm_unreachable("invalid enum"); 872 } 873 874 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, 875 ScalarEvolution *SE, 876 InductionDescriptor &D) { 877 878 // Here we only handle FP induction variables. 879 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); 880 881 if (TheLoop->getHeader() != Phi->getParent()) 882 return false; 883 884 // The loop may have multiple entrances or multiple exits; we can analyze 885 // this phi if it has a unique entry value and a unique backedge value. 886 if (Phi->getNumIncomingValues() != 2) 887 return false; 888 Value *BEValue = nullptr, *StartValue = nullptr; 889 if (TheLoop->contains(Phi->getIncomingBlock(0))) { 890 BEValue = Phi->getIncomingValue(0); 891 StartValue = Phi->getIncomingValue(1); 892 } else { 893 assert(TheLoop->contains(Phi->getIncomingBlock(1)) && 894 "Unexpected Phi node in the loop"); 895 BEValue = Phi->getIncomingValue(1); 896 StartValue = Phi->getIncomingValue(0); 897 } 898 899 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); 900 if (!BOp) 901 return false; 902 903 Value *Addend = nullptr; 904 if (BOp->getOpcode() == Instruction::FAdd) { 905 if (BOp->getOperand(0) == Phi) 906 Addend = BOp->getOperand(1); 907 else if (BOp->getOperand(1) == Phi) 908 Addend = BOp->getOperand(0); 909 } else if (BOp->getOpcode() == Instruction::FSub) 910 if (BOp->getOperand(0) == Phi) 911 Addend = BOp->getOperand(1); 912 913 if (!Addend) 914 return false; 915 916 // The addend should be loop invariant 917 if (auto *I = dyn_cast<Instruction>(Addend)) 918 if (TheLoop->contains(I)) 919 return false; 920 921 // FP Step has unknown SCEV 922 const SCEV *Step = SE->getUnknown(Addend); 923 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); 924 return true; 925 } 926 927 /// This function is called when we suspect that the update-chain of a phi node 928 /// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, 929 /// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime 930 /// predicate P under which the SCEV expression for the phi can be the 931 /// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the 932 /// cast instructions that are involved in the update-chain of this induction. 933 /// A caller that adds the required runtime predicate can be free to drop these 934 /// cast instructions, and compute the phi using \p AR (instead of some scev 935 /// expression with casts). 936 /// 937 /// For example, without a predicate the scev expression can take the following 938 /// form: 939 /// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) 940 /// 941 /// It corresponds to the following IR sequence: 942 /// %for.body: 943 /// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] 944 /// %casted_phi = "ExtTrunc i64 %x" 945 /// %add = add i64 %casted_phi, %step 946 /// 947 /// where %x is given in \p PN, 948 /// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate, 949 /// and the IR sequence that "ExtTrunc i64 %x" represents can take one of 950 /// several forms, for example, such as: 951 /// ExtTrunc1: %casted_phi = and %x, 2^n-1 952 /// or: 953 /// ExtTrunc2: %t = shl %x, m 954 /// %casted_phi = ashr %t, m 955 /// 956 /// If we are able to find such sequence, we return the instructions 957 /// we found, namely %casted_phi and the instructions on its use-def chain up 958 /// to the phi (not including the phi). 959 static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, 960 const SCEVUnknown *PhiScev, 961 const SCEVAddRecExpr *AR, 962 SmallVectorImpl<Instruction *> &CastInsts) { 963 964 assert(CastInsts.empty() && "CastInsts is expected to be empty."); 965 auto *PN = cast<PHINode>(PhiScev->getValue()); 966 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); 967 const Loop *L = AR->getLoop(); 968 969 // Find any cast instructions that participate in the def-use chain of 970 // PhiScev in the loop. 971 // FORNOW/TODO: We currently expect the def-use chain to include only 972 // two-operand instructions, where one of the operands is an invariant. 973 // createAddRecFromPHIWithCasts() currently does not support anything more 974 // involved than that, so we keep the search simple. This can be 975 // extended/generalized as needed. 976 977 auto getDef = [&](const Value *Val) -> Value * { 978 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val); 979 if (!BinOp) 980 return nullptr; 981 Value *Op0 = BinOp->getOperand(0); 982 Value *Op1 = BinOp->getOperand(1); 983 Value *Def = nullptr; 984 if (L->isLoopInvariant(Op0)) 985 Def = Op1; 986 else if (L->isLoopInvariant(Op1)) 987 Def = Op0; 988 return Def; 989 }; 990 991 // Look for the instruction that defines the induction via the 992 // loop backedge. 993 BasicBlock *Latch = L->getLoopLatch(); 994 if (!Latch) 995 return false; 996 Value *Val = PN->getIncomingValueForBlock(Latch); 997 if (!Val) 998 return false; 999 1000 // Follow the def-use chain until the induction phi is reached. 1001 // If on the way we encounter a Value that has the same SCEV Expr as the 1002 // phi node, we can consider the instructions we visit from that point 1003 // as part of the cast-sequence that can be ignored. 1004 bool InCastSequence = false; 1005 auto *Inst = dyn_cast<Instruction>(Val); 1006 while (Val != PN) { 1007 // If we encountered a phi node other than PN, or if we left the loop, 1008 // we bail out. 1009 if (!Inst || !L->contains(Inst)) { 1010 return false; 1011 } 1012 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val)); 1013 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR)) 1014 InCastSequence = true; 1015 if (InCastSequence) { 1016 // Only the last instruction in the cast sequence is expected to have 1017 // uses outside the induction def-use chain. 1018 if (!CastInsts.empty()) 1019 if (!Inst->hasOneUse()) 1020 return false; 1021 CastInsts.push_back(Inst); 1022 } 1023 Val = getDef(Val); 1024 if (!Val) 1025 return false; 1026 Inst = dyn_cast<Instruction>(Val); 1027 } 1028 1029 return InCastSequence; 1030 } 1031 1032 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 1033 PredicatedScalarEvolution &PSE, 1034 InductionDescriptor &D, 1035 bool Assume) { 1036 Type *PhiTy = Phi->getType(); 1037 1038 // Handle integer and pointer inductions variables. 1039 // Now we handle also FP induction but not trying to make a 1040 // recurrent expression from the PHI node in-place. 1041 1042 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && 1043 !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) 1044 return false; 1045 1046 if (PhiTy->isFloatingPointTy()) 1047 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); 1048 1049 const SCEV *PhiScev = PSE.getSCEV(Phi); 1050 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1051 1052 // We need this expression to be an AddRecExpr. 1053 if (Assume && !AR) 1054 AR = PSE.getAsAddRec(Phi); 1055 1056 if (!AR) { 1057 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1058 return false; 1059 } 1060 1061 // Record any Cast instructions that participate in the induction update 1062 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev); 1063 // If we started from an UnknownSCEV, and managed to build an addRecurrence 1064 // only after enabling Assume with PSCEV, this means we may have encountered 1065 // cast instructions that required adding a runtime check in order to 1066 // guarantee the correctness of the AddRecurence respresentation of the 1067 // induction. 1068 if (PhiScev != AR && SymbolicPhi) { 1069 SmallVector<Instruction *, 2> Casts; 1070 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts)) 1071 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts); 1072 } 1073 1074 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); 1075 } 1076 1077 bool InductionDescriptor::isInductionPHI( 1078 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, 1079 InductionDescriptor &D, const SCEV *Expr, 1080 SmallVectorImpl<Instruction *> *CastsToIgnore) { 1081 Type *PhiTy = Phi->getType(); 1082 // We only handle integer and pointer inductions variables. 1083 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) 1084 return false; 1085 1086 // Check that the PHI is consecutive. 1087 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); 1088 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1089 1090 if (!AR) { 1091 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1092 return false; 1093 } 1094 1095 if (AR->getLoop() != TheLoop) { 1096 // FIXME: We should treat this as a uniform. Unfortunately, we 1097 // don't currently know how to handled uniform PHIs. 1098 LLVM_DEBUG( 1099 dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); 1100 return false; 1101 } 1102 1103 Value *StartValue = 1104 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); 1105 const SCEV *Step = AR->getStepRecurrence(*SE); 1106 // Calculate the pointer stride and check if it is consecutive. 1107 // The stride may be a constant or a loop invariant integer value. 1108 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); 1109 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) 1110 return false; 1111 1112 if (PhiTy->isIntegerTy()) { 1113 D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr, 1114 CastsToIgnore); 1115 return true; 1116 } 1117 1118 assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); 1119 // Pointer induction should be a constant. 1120 if (!ConstStep) 1121 return false; 1122 1123 ConstantInt *CV = ConstStep->getValue(); 1124 Type *PointerElementType = PhiTy->getPointerElementType(); 1125 // The pointer stride cannot be determined if the pointer element type is not 1126 // sized. 1127 if (!PointerElementType->isSized()) 1128 return false; 1129 1130 const DataLayout &DL = Phi->getModule()->getDataLayout(); 1131 int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); 1132 if (!Size) 1133 return false; 1134 1135 int64_t CVSize = CV->getSExtValue(); 1136 if (CVSize % Size) 1137 return false; 1138 auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, 1139 true /* signed */); 1140 D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); 1141 return true; 1142 } 1143 1144 bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, 1145 bool PreserveLCSSA) { 1146 bool Changed = false; 1147 1148 // We re-use a vector for the in-loop predecesosrs. 1149 SmallVector<BasicBlock *, 4> InLoopPredecessors; 1150 1151 auto RewriteExit = [&](BasicBlock *BB) { 1152 assert(InLoopPredecessors.empty() && 1153 "Must start with an empty predecessors list!"); 1154 auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); 1155 1156 // See if there are any non-loop predecessors of this exit block and 1157 // keep track of the in-loop predecessors. 1158 bool IsDedicatedExit = true; 1159 for (auto *PredBB : predecessors(BB)) 1160 if (L->contains(PredBB)) { 1161 if (isa<IndirectBrInst>(PredBB->getTerminator())) 1162 // We cannot rewrite exiting edges from an indirectbr. 1163 return false; 1164 1165 InLoopPredecessors.push_back(PredBB); 1166 } else { 1167 IsDedicatedExit = false; 1168 } 1169 1170 assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); 1171 1172 // Nothing to do if this is already a dedicated exit. 1173 if (IsDedicatedExit) 1174 return false; 1175 1176 auto *NewExitBB = SplitBlockPredecessors( 1177 BB, InLoopPredecessors, ".loopexit", DT, LI, nullptr, PreserveLCSSA); 1178 1179 if (!NewExitBB) 1180 LLVM_DEBUG( 1181 dbgs() << "WARNING: Can't create a dedicated exit block for loop: " 1182 << *L << "\n"); 1183 else 1184 LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " 1185 << NewExitBB->getName() << "\n"); 1186 return true; 1187 }; 1188 1189 // Walk the exit blocks directly rather than building up a data structure for 1190 // them, but only visit each one once. 1191 SmallPtrSet<BasicBlock *, 4> Visited; 1192 for (auto *BB : L->blocks()) 1193 for (auto *SuccBB : successors(BB)) { 1194 // We're looking for exit blocks so skip in-loop successors. 1195 if (L->contains(SuccBB)) 1196 continue; 1197 1198 // Visit each exit block exactly once. 1199 if (!Visited.insert(SuccBB).second) 1200 continue; 1201 1202 Changed |= RewriteExit(SuccBB); 1203 } 1204 1205 return Changed; 1206 } 1207 1208 /// Returns the instructions that use values defined in the loop. 1209 SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { 1210 SmallVector<Instruction *, 8> UsedOutside; 1211 1212 for (auto *Block : L->getBlocks()) 1213 // FIXME: I believe that this could use copy_if if the Inst reference could 1214 // be adapted into a pointer. 1215 for (auto &Inst : *Block) { 1216 auto Users = Inst.users(); 1217 if (any_of(Users, [&](User *U) { 1218 auto *Use = cast<Instruction>(U); 1219 return !L->contains(Use->getParent()); 1220 })) 1221 UsedOutside.push_back(&Inst); 1222 } 1223 1224 return UsedOutside; 1225 } 1226 1227 void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { 1228 // By definition, all loop passes need the LoopInfo analysis and the 1229 // Dominator tree it depends on. Because they all participate in the loop 1230 // pass manager, they must also preserve these. 1231 AU.addRequired<DominatorTreeWrapperPass>(); 1232 AU.addPreserved<DominatorTreeWrapperPass>(); 1233 AU.addRequired<LoopInfoWrapperPass>(); 1234 AU.addPreserved<LoopInfoWrapperPass>(); 1235 1236 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs 1237 // here because users shouldn't directly get them from this header. 1238 extern char &LoopSimplifyID; 1239 extern char &LCSSAID; 1240 AU.addRequiredID(LoopSimplifyID); 1241 AU.addPreservedID(LoopSimplifyID); 1242 AU.addRequiredID(LCSSAID); 1243 AU.addPreservedID(LCSSAID); 1244 // This is used in the LPPassManager to perform LCSSA verification on passes 1245 // which preserve lcssa form 1246 AU.addRequired<LCSSAVerificationPass>(); 1247 AU.addPreserved<LCSSAVerificationPass>(); 1248 1249 // Loop passes are designed to run inside of a loop pass manager which means 1250 // that any function analyses they require must be required by the first loop 1251 // pass in the manager (so that it is computed before the loop pass manager 1252 // runs) and preserved by all loop pasess in the manager. To make this 1253 // reasonably robust, the set needed for most loop passes is maintained here. 1254 // If your loop pass requires an analysis not listed here, you will need to 1255 // carefully audit the loop pass manager nesting structure that results. 1256 AU.addRequired<AAResultsWrapperPass>(); 1257 AU.addPreserved<AAResultsWrapperPass>(); 1258 AU.addPreserved<BasicAAWrapperPass>(); 1259 AU.addPreserved<GlobalsAAWrapperPass>(); 1260 AU.addPreserved<SCEVAAWrapperPass>(); 1261 AU.addRequired<ScalarEvolutionWrapperPass>(); 1262 AU.addPreserved<ScalarEvolutionWrapperPass>(); 1263 } 1264 1265 /// Manually defined generic "LoopPass" dependency initialization. This is used 1266 /// to initialize the exact set of passes from above in \c 1267 /// getLoopAnalysisUsage. It can be used within a loop pass's initialization 1268 /// with: 1269 /// 1270 /// INITIALIZE_PASS_DEPENDENCY(LoopPass) 1271 /// 1272 /// As-if "LoopPass" were a pass. 1273 void llvm::initializeLoopPassPass(PassRegistry &Registry) { 1274 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1275 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1276 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 1277 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 1278 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1279 INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) 1280 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 1281 INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) 1282 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 1283 } 1284 1285 /// Find string metadata for loop 1286 /// 1287 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an 1288 /// operand or null otherwise. If the string metadata is not found return 1289 /// Optional's not-a-value. 1290 Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, 1291 StringRef Name) { 1292 MDNode *LoopID = TheLoop->getLoopID(); 1293 // Return none if LoopID is false. 1294 if (!LoopID) 1295 return None; 1296 1297 // First operand should refer to the loop id itself. 1298 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 1299 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 1300 1301 // Iterate over LoopID operands and look for MDString Metadata 1302 for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { 1303 MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); 1304 if (!MD) 1305 continue; 1306 MDString *S = dyn_cast<MDString>(MD->getOperand(0)); 1307 if (!S) 1308 continue; 1309 // Return true if MDString holds expected MetaData. 1310 if (Name.equals(S->getString())) 1311 switch (MD->getNumOperands()) { 1312 case 1: 1313 return nullptr; 1314 case 2: 1315 return &MD->getOperand(1); 1316 default: 1317 llvm_unreachable("loop metadata has 0 or 1 operand"); 1318 } 1319 } 1320 return None; 1321 } 1322 1323 /// Does a BFS from a given node to all of its children inside a given loop. 1324 /// The returned vector of nodes includes the starting point. 1325 SmallVector<DomTreeNode *, 16> 1326 llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { 1327 SmallVector<DomTreeNode *, 16> Worklist; 1328 auto AddRegionToWorklist = [&](DomTreeNode *DTN) { 1329 // Only include subregions in the top level loop. 1330 BasicBlock *BB = DTN->getBlock(); 1331 if (CurLoop->contains(BB)) 1332 Worklist.push_back(DTN); 1333 }; 1334 1335 AddRegionToWorklist(N); 1336 1337 for (size_t I = 0; I < Worklist.size(); I++) 1338 for (DomTreeNode *Child : Worklist[I]->getChildren()) 1339 AddRegionToWorklist(Child); 1340 1341 return Worklist; 1342 } 1343 1344 void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, 1345 ScalarEvolution *SE = nullptr, 1346 LoopInfo *LI = nullptr) { 1347 assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!"); 1348 auto *Preheader = L->getLoopPreheader(); 1349 assert(Preheader && "Preheader should exist!"); 1350 1351 // Now that we know the removal is safe, remove the loop by changing the 1352 // branch from the preheader to go to the single exit block. 1353 // 1354 // Because we're deleting a large chunk of code at once, the sequence in which 1355 // we remove things is very important to avoid invalidation issues. 1356 1357 // Tell ScalarEvolution that the loop is deleted. Do this before 1358 // deleting the loop so that ScalarEvolution can look at the loop 1359 // to determine what it needs to clean up. 1360 if (SE) 1361 SE->forgetLoop(L); 1362 1363 auto *ExitBlock = L->getUniqueExitBlock(); 1364 assert(ExitBlock && "Should have a unique exit block!"); 1365 assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); 1366 1367 auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator()); 1368 assert(OldBr && "Preheader must end with a branch"); 1369 assert(OldBr->isUnconditional() && "Preheader must have a single successor"); 1370 // Connect the preheader to the exit block. Keep the old edge to the header 1371 // around to perform the dominator tree update in two separate steps 1372 // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge 1373 // preheader -> header. 1374 // 1375 // 1376 // 0. Preheader 1. Preheader 2. Preheader 1377 // | | | | 1378 // V | V | 1379 // Header <--\ | Header <--\ | Header <--\ 1380 // | | | | | | | | | | | 1381 // | V | | | V | | | V | 1382 // | Body --/ | | Body --/ | | Body --/ 1383 // V V V V V 1384 // Exit Exit Exit 1385 // 1386 // By doing this is two separate steps we can perform the dominator tree 1387 // update without using the batch update API. 1388 // 1389 // Even when the loop is never executed, we cannot remove the edge from the 1390 // source block to the exit block. Consider the case where the unexecuted loop 1391 // branches back to an outer loop. If we deleted the loop and removed the edge 1392 // coming to this inner loop, this will break the outer loop structure (by 1393 // deleting the backedge of the outer loop). If the outer loop is indeed a 1394 // non-loop, it will be deleted in a future iteration of loop deletion pass. 1395 IRBuilder<> Builder(OldBr); 1396 Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); 1397 // Remove the old branch. The conditional branch becomes a new terminator. 1398 OldBr->eraseFromParent(); 1399 1400 // Rewrite phis in the exit block to get their inputs from the Preheader 1401 // instead of the exiting block. 1402 for (PHINode &P : ExitBlock->phis()) { 1403 // Set the zero'th element of Phi to be from the preheader and remove all 1404 // other incoming values. Given the loop has dedicated exits, all other 1405 // incoming values must be from the exiting blocks. 1406 int PredIndex = 0; 1407 P.setIncomingBlock(PredIndex, Preheader); 1408 // Removes all incoming values from all other exiting blocks (including 1409 // duplicate values from an exiting block). 1410 // Nuke all entries except the zero'th entry which is the preheader entry. 1411 // NOTE! We need to remove Incoming Values in the reverse order as done 1412 // below, to keep the indices valid for deletion (removeIncomingValues 1413 // updates getNumIncomingValues and shifts all values down into the operand 1414 // being deleted). 1415 for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i) 1416 P.removeIncomingValue(e - i, false); 1417 1418 assert((P.getNumIncomingValues() == 1 && 1419 P.getIncomingBlock(PredIndex) == Preheader) && 1420 "Should have exactly one value and that's from the preheader!"); 1421 } 1422 1423 // Disconnect the loop body by branching directly to its exit. 1424 Builder.SetInsertPoint(Preheader->getTerminator()); 1425 Builder.CreateBr(ExitBlock); 1426 // Remove the old branch. 1427 Preheader->getTerminator()->eraseFromParent(); 1428 1429 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 1430 if (DT) { 1431 // Update the dominator tree by informing it about the new edge from the 1432 // preheader to the exit. 1433 DTU.insertEdge(Preheader, ExitBlock); 1434 // Inform the dominator tree about the removed edge. 1435 DTU.deleteEdge(Preheader, L->getHeader()); 1436 } 1437 1438 // Given LCSSA form is satisfied, we should not have users of instructions 1439 // within the dead loop outside of the loop. However, LCSSA doesn't take 1440 // unreachable uses into account. We handle them here. 1441 // We could do it after drop all references (in this case all users in the 1442 // loop will be already eliminated and we have less work to do but according 1443 // to API doc of User::dropAllReferences only valid operation after dropping 1444 // references, is deletion. So let's substitute all usages of 1445 // instruction from the loop with undef value of corresponding type first. 1446 for (auto *Block : L->blocks()) 1447 for (Instruction &I : *Block) { 1448 auto *Undef = UndefValue::get(I.getType()); 1449 for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E;) { 1450 Use &U = *UI; 1451 ++UI; 1452 if (auto *Usr = dyn_cast<Instruction>(U.getUser())) 1453 if (L->contains(Usr->getParent())) 1454 continue; 1455 // If we have a DT then we can check that uses outside a loop only in 1456 // unreachable block. 1457 if (DT) 1458 assert(!DT->isReachableFromEntry(U) && 1459 "Unexpected user in reachable block"); 1460 U.set(Undef); 1461 } 1462 } 1463 1464 // Remove the block from the reference counting scheme, so that we can 1465 // delete it freely later. 1466 for (auto *Block : L->blocks()) 1467 Block->dropAllReferences(); 1468 1469 if (LI) { 1470 // Erase the instructions and the blocks without having to worry 1471 // about ordering because we already dropped the references. 1472 // NOTE: This iteration is safe because erasing the block does not remove 1473 // its entry from the loop's block list. We do that in the next section. 1474 for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end(); 1475 LpI != LpE; ++LpI) 1476 (*LpI)->eraseFromParent(); 1477 1478 // Finally, the blocks from loopinfo. This has to happen late because 1479 // otherwise our loop iterators won't work. 1480 1481 SmallPtrSet<BasicBlock *, 8> blocks; 1482 blocks.insert(L->block_begin(), L->block_end()); 1483 for (BasicBlock *BB : blocks) 1484 LI->removeBlock(BB); 1485 1486 // The last step is to update LoopInfo now that we've eliminated this loop. 1487 LI->erase(L); 1488 } 1489 } 1490 1491 Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { 1492 // Only support loops with a unique exiting block, and a latch. 1493 if (!L->getExitingBlock()) 1494 return None; 1495 1496 // Get the branch weights for the loop's backedge. 1497 BranchInst *LatchBR = 1498 dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator()); 1499 if (!LatchBR || LatchBR->getNumSuccessors() != 2) 1500 return None; 1501 1502 assert((LatchBR->getSuccessor(0) == L->getHeader() || 1503 LatchBR->getSuccessor(1) == L->getHeader()) && 1504 "At least one edge out of the latch must go to the header"); 1505 1506 // To estimate the number of times the loop body was executed, we want to 1507 // know the number of times the backedge was taken, vs. the number of times 1508 // we exited the loop. 1509 uint64_t TrueVal, FalseVal; 1510 if (!LatchBR->extractProfMetadata(TrueVal, FalseVal)) 1511 return None; 1512 1513 if (!TrueVal || !FalseVal) 1514 return 0; 1515 1516 // Divide the count of the backedge by the count of the edge exiting the loop, 1517 // rounding to nearest. 1518 if (LatchBR->getSuccessor(0) == L->getHeader()) 1519 return (TrueVal + (FalseVal / 2)) / FalseVal; 1520 else 1521 return (FalseVal + (TrueVal / 2)) / TrueVal; 1522 } 1523 1524 bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, 1525 ScalarEvolution &SE) { 1526 Loop *OuterL = InnerLoop->getParentLoop(); 1527 if (!OuterL) 1528 return true; 1529 1530 // Get the backedge taken count for the inner loop 1531 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); 1532 const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch); 1533 if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) || 1534 !InnerLoopBECountSC->getType()->isIntegerTy()) 1535 return false; 1536 1537 // Get whether count is invariant to the outer loop 1538 ScalarEvolution::LoopDisposition LD = 1539 SE.getLoopDisposition(InnerLoopBECountSC, OuterL); 1540 if (LD != ScalarEvolution::LoopInvariant) 1541 return false; 1542 1543 return true; 1544 } 1545 1546 /// Adds a 'fast' flag to floating point operations. 1547 static Value *addFastMathFlag(Value *V) { 1548 if (isa<FPMathOperator>(V)) { 1549 FastMathFlags Flags; 1550 Flags.setFast(); 1551 cast<Instruction>(V)->setFastMathFlags(Flags); 1552 } 1553 return V; 1554 } 1555 1556 // Helper to generate an ordered reduction. 1557 Value * 1558 llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, 1559 unsigned Op, 1560 RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, 1561 ArrayRef<Value *> RedOps) { 1562 unsigned VF = Src->getType()->getVectorNumElements(); 1563 1564 // Extract and apply reduction ops in ascending order: 1565 // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1] 1566 Value *Result = Acc; 1567 for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) { 1568 Value *Ext = 1569 Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx)); 1570 1571 if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 1572 Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext, 1573 "bin.rdx"); 1574 } else { 1575 assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && 1576 "Invalid min/max"); 1577 Result = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, Result, 1578 Ext); 1579 } 1580 1581 if (!RedOps.empty()) 1582 propagateIRFlags(Result, RedOps); 1583 } 1584 1585 return Result; 1586 } 1587 1588 // Helper to generate a log2 shuffle reduction. 1589 Value * 1590 llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, 1591 RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, 1592 ArrayRef<Value *> RedOps) { 1593 unsigned VF = Src->getType()->getVectorNumElements(); 1594 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles 1595 // and vector ops, reducing the set of values being computed by half each 1596 // round. 1597 assert(isPowerOf2_32(VF) && 1598 "Reduction emission only supported for pow2 vectors!"); 1599 Value *TmpVec = Src; 1600 SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); 1601 for (unsigned i = VF; i != 1; i >>= 1) { 1602 // Move the upper half of the vector to the lower half. 1603 for (unsigned j = 0; j != i / 2; ++j) 1604 ShuffleMask[j] = Builder.getInt32(i / 2 + j); 1605 1606 // Fill the rest of the mask with undef. 1607 std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), 1608 UndefValue::get(Builder.getInt32Ty())); 1609 1610 Value *Shuf = Builder.CreateShuffleVector( 1611 TmpVec, UndefValue::get(TmpVec->getType()), 1612 ConstantVector::get(ShuffleMask), "rdx.shuf"); 1613 1614 if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 1615 // Floating point operations had to be 'fast' to enable the reduction. 1616 TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, 1617 TmpVec, Shuf, "bin.rdx")); 1618 } else { 1619 assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && 1620 "Invalid min/max"); 1621 TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec, 1622 Shuf); 1623 } 1624 if (!RedOps.empty()) 1625 propagateIRFlags(TmpVec, RedOps); 1626 } 1627 // The result is in the first element of the vector. 1628 return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); 1629 } 1630 1631 /// Create a simple vector reduction specified by an opcode and some 1632 /// flags (if generating min/max reductions). 1633 Value *llvm::createSimpleTargetReduction( 1634 IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, 1635 Value *Src, TargetTransformInfo::ReductionFlags Flags, 1636 ArrayRef<Value *> RedOps) { 1637 assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); 1638 1639 Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); 1640 std::function<Value*()> BuildFunc; 1641 using RD = RecurrenceDescriptor; 1642 RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; 1643 // TODO: Support creating ordered reductions. 1644 FastMathFlags FMFFast; 1645 FMFFast.setFast(); 1646 1647 switch (Opcode) { 1648 case Instruction::Add: 1649 BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; 1650 break; 1651 case Instruction::Mul: 1652 BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; 1653 break; 1654 case Instruction::And: 1655 BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; 1656 break; 1657 case Instruction::Or: 1658 BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; 1659 break; 1660 case Instruction::Xor: 1661 BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; 1662 break; 1663 case Instruction::FAdd: 1664 BuildFunc = [&]() { 1665 auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); 1666 cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); 1667 return Rdx; 1668 }; 1669 break; 1670 case Instruction::FMul: 1671 BuildFunc = [&]() { 1672 auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); 1673 cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); 1674 return Rdx; 1675 }; 1676 break; 1677 case Instruction::ICmp: 1678 if (Flags.IsMaxOp) { 1679 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; 1680 BuildFunc = [&]() { 1681 return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); 1682 }; 1683 } else { 1684 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; 1685 BuildFunc = [&]() { 1686 return Builder.CreateIntMinReduce(Src, Flags.IsSigned); 1687 }; 1688 } 1689 break; 1690 case Instruction::FCmp: 1691 if (Flags.IsMaxOp) { 1692 MinMaxKind = RD::MRK_FloatMax; 1693 BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; 1694 } else { 1695 MinMaxKind = RD::MRK_FloatMin; 1696 BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; 1697 } 1698 break; 1699 default: 1700 llvm_unreachable("Unhandled opcode"); 1701 break; 1702 } 1703 if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) 1704 return BuildFunc(); 1705 return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); 1706 } 1707 1708 /// Create a vector reduction using a given recurrence descriptor. 1709 Value *llvm::createTargetReduction(IRBuilder<> &B, 1710 const TargetTransformInfo *TTI, 1711 RecurrenceDescriptor &Desc, Value *Src, 1712 bool NoNaN) { 1713 // TODO: Support in-order reductions based on the recurrence descriptor. 1714 using RD = RecurrenceDescriptor; 1715 RD::RecurrenceKind RecKind = Desc.getRecurrenceKind(); 1716 TargetTransformInfo::ReductionFlags Flags; 1717 Flags.NoNaN = NoNaN; 1718 switch (RecKind) { 1719 case RD::RK_FloatAdd: 1720 return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags); 1721 case RD::RK_FloatMult: 1722 return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags); 1723 case RD::RK_IntegerAdd: 1724 return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags); 1725 case RD::RK_IntegerMult: 1726 return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags); 1727 case RD::RK_IntegerAnd: 1728 return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags); 1729 case RD::RK_IntegerOr: 1730 return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags); 1731 case RD::RK_IntegerXor: 1732 return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags); 1733 case RD::RK_IntegerMinMax: { 1734 RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); 1735 Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax); 1736 Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin); 1737 return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags); 1738 } 1739 case RD::RK_FloatMinMax: { 1740 Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax; 1741 return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags); 1742 } 1743 default: 1744 llvm_unreachable("Unhandled RecKind"); 1745 } 1746 } 1747 1748 void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { 1749 auto *VecOp = dyn_cast<Instruction>(I); 1750 if (!VecOp) 1751 return; 1752 auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0]) 1753 : dyn_cast<Instruction>(OpValue); 1754 if (!Intersection) 1755 return; 1756 const unsigned Opcode = Intersection->getOpcode(); 1757 VecOp->copyIRFlags(Intersection); 1758 for (auto *V : VL) { 1759 auto *Instr = dyn_cast<Instruction>(V); 1760 if (!Instr) 1761 continue; 1762 if (OpValue == nullptr || Opcode == Instr->getOpcode()) 1763 VecOp->andIRFlags(V); 1764 } 1765 } 1766