1 //===-- LoopUtils.cpp - Loop Utility functions -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines common loop utility functions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Utils/LoopUtils.h" 15 #include "llvm/ADT/ScopeExit.h" 16 #include "llvm/Analysis/AliasAnalysis.h" 17 #include "llvm/Analysis/BasicAliasAnalysis.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/InstructionSimplify.h" 20 #include "llvm/Analysis/LoopInfo.h" 21 #include "llvm/Analysis/LoopPass.h" 22 #include "llvm/Analysis/MustExecute.h" 23 #include "llvm/Analysis/ScalarEvolution.h" 24 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 25 #include "llvm/Analysis/ScalarEvolutionExpander.h" 26 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 27 #include "llvm/Analysis/TargetTransformInfo.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/IR/Dominators.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/Module.h" 32 #include "llvm/IR/PatternMatch.h" 33 #include "llvm/IR/ValueHandle.h" 34 #include "llvm/Pass.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/KnownBits.h" 37 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 38 39 using namespace llvm; 40 using namespace llvm::PatternMatch; 41 42 #define DEBUG_TYPE "loop-utils" 43 44 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, 45 SmallPtrSetImpl<Instruction *> &Set) { 46 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) 47 if (!Set.count(dyn_cast<Instruction>(*Use))) 48 return false; 49 return true; 50 } 51 52 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { 53 switch (Kind) { 54 default: 55 break; 56 case RK_IntegerAdd: 57 case RK_IntegerMult: 58 case RK_IntegerOr: 59 case RK_IntegerAnd: 60 case RK_IntegerXor: 61 case RK_IntegerMinMax: 62 return true; 63 } 64 return false; 65 } 66 67 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { 68 return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); 69 } 70 71 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { 72 switch (Kind) { 73 default: 74 break; 75 case RK_IntegerAdd: 76 case RK_IntegerMult: 77 case RK_FloatAdd: 78 case RK_FloatMult: 79 return true; 80 } 81 return false; 82 } 83 84 /// Determines if Phi may have been type-promoted. If Phi has a single user 85 /// that ANDs the Phi with a type mask, return the user. RT is updated to 86 /// account for the narrower bit width represented by the mask, and the AND 87 /// instruction is added to CI. 88 static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, 89 SmallPtrSetImpl<Instruction *> &Visited, 90 SmallPtrSetImpl<Instruction *> &CI) { 91 if (!Phi->hasOneUse()) 92 return Phi; 93 94 const APInt *M = nullptr; 95 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); 96 97 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT 98 // with a new integer type of the corresponding bit width. 99 if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { 100 int32_t Bits = (*M + 1).exactLogBase2(); 101 if (Bits > 0) { 102 RT = IntegerType::get(Phi->getContext(), Bits); 103 Visited.insert(Phi); 104 CI.insert(J); 105 return J; 106 } 107 } 108 return Phi; 109 } 110 111 /// Compute the minimal bit width needed to represent a reduction whose exit 112 /// instruction is given by Exit. 113 static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit, 114 DemandedBits *DB, 115 AssumptionCache *AC, 116 DominatorTree *DT) { 117 bool IsSigned = false; 118 const DataLayout &DL = Exit->getModule()->getDataLayout(); 119 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType()); 120 121 if (DB) { 122 // Use the demanded bits analysis to determine the bits that are live out 123 // of the exit instruction, rounding up to the nearest power of two. If the 124 // use of demanded bits results in a smaller bit width, we know the value 125 // must be positive (i.e., IsSigned = false), because if this were not the 126 // case, the sign bit would have been demanded. 127 auto Mask = DB->getDemandedBits(Exit); 128 MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros(); 129 } 130 131 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) { 132 // If demanded bits wasn't able to limit the bit width, we can try to use 133 // value tracking instead. This can be the case, for example, if the value 134 // may be negative. 135 auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT); 136 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType()); 137 MaxBitWidth = NumTypeBits - NumSignBits; 138 KnownBits Bits = computeKnownBits(Exit, DL); 139 if (!Bits.isNonNegative()) { 140 // If the value is not known to be non-negative, we set IsSigned to true, 141 // meaning that we will use sext instructions instead of zext 142 // instructions to restore the original type. 143 IsSigned = true; 144 if (!Bits.isNegative()) 145 // If the value is not known to be negative, we don't known what the 146 // upper bit is, and therefore, we don't know what kind of extend we 147 // will need. In this case, just increase the bit width by one bit and 148 // use sext. 149 ++MaxBitWidth; 150 } 151 } 152 if (!isPowerOf2_64(MaxBitWidth)) 153 MaxBitWidth = NextPowerOf2(MaxBitWidth); 154 155 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth), 156 IsSigned); 157 } 158 159 /// Collect cast instructions that can be ignored in the vectorizer's cost 160 /// model, given a reduction exit value and the minimal type in which the 161 /// reduction can be represented. 162 static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, 163 Type *RecurrenceType, 164 SmallPtrSetImpl<Instruction *> &Casts) { 165 166 SmallVector<Instruction *, 8> Worklist; 167 SmallPtrSet<Instruction *, 8> Visited; 168 Worklist.push_back(Exit); 169 170 while (!Worklist.empty()) { 171 Instruction *Val = Worklist.pop_back_val(); 172 Visited.insert(Val); 173 if (auto *Cast = dyn_cast<CastInst>(Val)) 174 if (Cast->getSrcTy() == RecurrenceType) { 175 // If the source type of a cast instruction is equal to the recurrence 176 // type, it will be eliminated, and should be ignored in the vectorizer 177 // cost model. 178 Casts.insert(Cast); 179 continue; 180 } 181 182 // Add all operands to the work list if they are loop-varying values that 183 // we haven't yet visited. 184 for (Value *O : cast<User>(Val)->operands()) 185 if (auto *I = dyn_cast<Instruction>(O)) 186 if (TheLoop->contains(I) && !Visited.count(I)) 187 Worklist.push_back(I); 188 } 189 } 190 191 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, 192 Loop *TheLoop, bool HasFunNoNaNAttr, 193 RecurrenceDescriptor &RedDes, 194 DemandedBits *DB, 195 AssumptionCache *AC, 196 DominatorTree *DT) { 197 if (Phi->getNumIncomingValues() != 2) 198 return false; 199 200 // Reduction variables are only found in the loop header block. 201 if (Phi->getParent() != TheLoop->getHeader()) 202 return false; 203 204 // Obtain the reduction start value from the value that comes from the loop 205 // preheader. 206 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); 207 208 // ExitInstruction is the single value which is used outside the loop. 209 // We only allow for a single reduction value to be used outside the loop. 210 // This includes users of the reduction, variables (which form a cycle 211 // which ends in the phi node). 212 Instruction *ExitInstruction = nullptr; 213 // Indicates that we found a reduction operation in our scan. 214 bool FoundReduxOp = false; 215 216 // We start with the PHI node and scan for all of the users of this 217 // instruction. All users must be instructions that can be used as reduction 218 // variables (such as ADD). We must have a single out-of-block user. The cycle 219 // must include the original PHI. 220 bool FoundStartPHI = false; 221 222 // To recognize min/max patterns formed by a icmp select sequence, we store 223 // the number of instruction we saw from the recognized min/max pattern, 224 // to make sure we only see exactly the two instructions. 225 unsigned NumCmpSelectPatternInst = 0; 226 InstDesc ReduxDesc(false, nullptr); 227 228 // Data used for determining if the recurrence has been type-promoted. 229 Type *RecurrenceType = Phi->getType(); 230 SmallPtrSet<Instruction *, 4> CastInsts; 231 Instruction *Start = Phi; 232 bool IsSigned = false; 233 234 SmallPtrSet<Instruction *, 8> VisitedInsts; 235 SmallVector<Instruction *, 8> Worklist; 236 237 // Return early if the recurrence kind does not match the type of Phi. If the 238 // recurrence kind is arithmetic, we attempt to look through AND operations 239 // resulting from the type promotion performed by InstCombine. Vector 240 // operations are not limited to the legal integer widths, so we may be able 241 // to evaluate the reduction in the narrower width. 242 if (RecurrenceType->isFloatingPointTy()) { 243 if (!isFloatingPointRecurrenceKind(Kind)) 244 return false; 245 } else { 246 if (!isIntegerRecurrenceKind(Kind)) 247 return false; 248 if (isArithmeticRecurrenceKind(Kind)) 249 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); 250 } 251 252 Worklist.push_back(Start); 253 VisitedInsts.insert(Start); 254 255 // A value in the reduction can be used: 256 // - By the reduction: 257 // - Reduction operation: 258 // - One use of reduction value (safe). 259 // - Multiple use of reduction value (not safe). 260 // - PHI: 261 // - All uses of the PHI must be the reduction (safe). 262 // - Otherwise, not safe. 263 // - By instructions outside of the loop (safe). 264 // * One value may have several outside users, but all outside 265 // uses must be of the same value. 266 // - By an instruction that is not part of the reduction (not safe). 267 // This is either: 268 // * An instruction type other than PHI or the reduction operation. 269 // * A PHI in the header other than the initial PHI. 270 while (!Worklist.empty()) { 271 Instruction *Cur = Worklist.back(); 272 Worklist.pop_back(); 273 274 // No Users. 275 // If the instruction has no users then this is a broken chain and can't be 276 // a reduction variable. 277 if (Cur->use_empty()) 278 return false; 279 280 bool IsAPhi = isa<PHINode>(Cur); 281 282 // A header PHI use other than the original PHI. 283 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) 284 return false; 285 286 // Reductions of instructions such as Div, and Sub is only possible if the 287 // LHS is the reduction variable. 288 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && 289 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && 290 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) 291 return false; 292 293 // Any reduction instruction must be of one of the allowed kinds. We ignore 294 // the starting value (the Phi or an AND instruction if the Phi has been 295 // type-promoted). 296 if (Cur != Start) { 297 ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); 298 if (!ReduxDesc.isRecurrence()) 299 return false; 300 } 301 302 // A reduction operation must only have one use of the reduction value. 303 if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && 304 hasMultipleUsesOf(Cur, VisitedInsts)) 305 return false; 306 307 // All inputs to a PHI node must be a reduction value. 308 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) 309 return false; 310 311 if (Kind == RK_IntegerMinMax && 312 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) 313 ++NumCmpSelectPatternInst; 314 if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) 315 ++NumCmpSelectPatternInst; 316 317 // Check whether we found a reduction operator. 318 FoundReduxOp |= !IsAPhi && Cur != Start; 319 320 // Process users of current instruction. Push non-PHI nodes after PHI nodes 321 // onto the stack. This way we are going to have seen all inputs to PHI 322 // nodes once we get to them. 323 SmallVector<Instruction *, 8> NonPHIs; 324 SmallVector<Instruction *, 8> PHIs; 325 for (User *U : Cur->users()) { 326 Instruction *UI = cast<Instruction>(U); 327 328 // Check if we found the exit user. 329 BasicBlock *Parent = UI->getParent(); 330 if (!TheLoop->contains(Parent)) { 331 // If we already know this instruction is used externally, move on to 332 // the next user. 333 if (ExitInstruction == Cur) 334 continue; 335 336 // Exit if you find multiple values used outside or if the header phi 337 // node is being used. In this case the user uses the value of the 338 // previous iteration, in which case we would loose "VF-1" iterations of 339 // the reduction operation if we vectorize. 340 if (ExitInstruction != nullptr || Cur == Phi) 341 return false; 342 343 // The instruction used by an outside user must be the last instruction 344 // before we feed back to the reduction phi. Otherwise, we loose VF-1 345 // operations on the value. 346 if (!is_contained(Phi->operands(), Cur)) 347 return false; 348 349 ExitInstruction = Cur; 350 continue; 351 } 352 353 // Process instructions only once (termination). Each reduction cycle 354 // value must only be used once, except by phi nodes and min/max 355 // reductions which are represented as a cmp followed by a select. 356 InstDesc IgnoredVal(false, nullptr); 357 if (VisitedInsts.insert(UI).second) { 358 if (isa<PHINode>(UI)) 359 PHIs.push_back(UI); 360 else 361 NonPHIs.push_back(UI); 362 } else if (!isa<PHINode>(UI) && 363 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && 364 !isa<SelectInst>(UI)) || 365 !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) 366 return false; 367 368 // Remember that we completed the cycle. 369 if (UI == Phi) 370 FoundStartPHI = true; 371 } 372 Worklist.append(PHIs.begin(), PHIs.end()); 373 Worklist.append(NonPHIs.begin(), NonPHIs.end()); 374 } 375 376 // This means we have seen one but not the other instruction of the 377 // pattern or more than just a select and cmp. 378 if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && 379 NumCmpSelectPatternInst != 2) 380 return false; 381 382 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) 383 return false; 384 385 if (Start != Phi) { 386 // If the starting value is not the same as the phi node, we speculatively 387 // looked through an 'and' instruction when evaluating a potential 388 // arithmetic reduction to determine if it may have been type-promoted. 389 // 390 // We now compute the minimal bit width that is required to represent the 391 // reduction. If this is the same width that was indicated by the 'and', we 392 // can represent the reduction in the smaller type. The 'and' instruction 393 // will be eliminated since it will essentially be a cast instruction that 394 // can be ignore in the cost model. If we compute a different type than we 395 // did when evaluating the 'and', the 'and' will not be eliminated, and we 396 // will end up with different kinds of operations in the recurrence 397 // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is 398 // the case. 399 // 400 // The vectorizer relies on InstCombine to perform the actual 401 // type-shrinking. It does this by inserting instructions to truncate the 402 // exit value of the reduction to the width indicated by RecurrenceType and 403 // then extend this value back to the original width. If IsSigned is false, 404 // a 'zext' instruction will be generated; otherwise, a 'sext' will be 405 // used. 406 // 407 // TODO: We should not rely on InstCombine to rewrite the reduction in the 408 // smaller type. We should just generate a correctly typed expression 409 // to begin with. 410 Type *ComputedType; 411 std::tie(ComputedType, IsSigned) = 412 computeRecurrenceType(ExitInstruction, DB, AC, DT); 413 if (ComputedType != RecurrenceType) 414 return false; 415 416 // The recurrence expression will be represented in a narrower type. If 417 // there are any cast instructions that will be unnecessary, collect them 418 // in CastInsts. Note that the 'and' instruction was already included in 419 // this list. 420 // 421 // TODO: A better way to represent this may be to tag in some way all the 422 // instructions that are a part of the reduction. The vectorizer cost 423 // model could then apply the recurrence type to these instructions, 424 // without needing a white list of instructions to ignore. 425 collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts); 426 } 427 428 // We found a reduction var if we have reached the original phi node and we 429 // only have a single instruction with out-of-loop users. 430 431 // The ExitInstruction(Instruction which is allowed to have out-of-loop users) 432 // is saved as part of the RecurrenceDescriptor. 433 434 // Save the description of this reduction variable. 435 RecurrenceDescriptor RD( 436 RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), 437 ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); 438 RedDes = RD; 439 440 return true; 441 } 442 443 /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction 444 /// pattern corresponding to a min(X, Y) or max(X, Y). 445 RecurrenceDescriptor::InstDesc 446 RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { 447 448 assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && 449 "Expect a select instruction"); 450 Instruction *Cmp = nullptr; 451 SelectInst *Select = nullptr; 452 453 // We must handle the select(cmp()) as a single instruction. Advance to the 454 // select. 455 if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) { 456 if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin()))) 457 return InstDesc(false, I); 458 return InstDesc(Select, Prev.getMinMaxKind()); 459 } 460 461 // Only handle single use cases for now. 462 if (!(Select = dyn_cast<SelectInst>(I))) 463 return InstDesc(false, I); 464 if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && 465 !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) 466 return InstDesc(false, I); 467 if (!Cmp->hasOneUse()) 468 return InstDesc(false, I); 469 470 Value *CmpLeft; 471 Value *CmpRight; 472 473 // Look for a min/max pattern. 474 if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 475 return InstDesc(Select, MRK_UIntMin); 476 else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 477 return InstDesc(Select, MRK_UIntMax); 478 else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 479 return InstDesc(Select, MRK_SIntMax); 480 else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 481 return InstDesc(Select, MRK_SIntMin); 482 else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 483 return InstDesc(Select, MRK_FloatMin); 484 else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 485 return InstDesc(Select, MRK_FloatMax); 486 else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 487 return InstDesc(Select, MRK_FloatMin); 488 else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 489 return InstDesc(Select, MRK_FloatMax); 490 491 return InstDesc(false, I); 492 } 493 494 RecurrenceDescriptor::InstDesc 495 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, 496 InstDesc &Prev, bool HasFunNoNaNAttr) { 497 bool FP = I->getType()->isFloatingPointTy(); 498 Instruction *UAI = Prev.getUnsafeAlgebraInst(); 499 if (!UAI && FP && !I->isFast()) 500 UAI = I; // Found an unsafe (unvectorizable) algebra instruction. 501 502 switch (I->getOpcode()) { 503 default: 504 return InstDesc(false, I); 505 case Instruction::PHI: 506 return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); 507 case Instruction::Sub: 508 case Instruction::Add: 509 return InstDesc(Kind == RK_IntegerAdd, I); 510 case Instruction::Mul: 511 return InstDesc(Kind == RK_IntegerMult, I); 512 case Instruction::And: 513 return InstDesc(Kind == RK_IntegerAnd, I); 514 case Instruction::Or: 515 return InstDesc(Kind == RK_IntegerOr, I); 516 case Instruction::Xor: 517 return InstDesc(Kind == RK_IntegerXor, I); 518 case Instruction::FMul: 519 return InstDesc(Kind == RK_FloatMult, I, UAI); 520 case Instruction::FSub: 521 case Instruction::FAdd: 522 return InstDesc(Kind == RK_FloatAdd, I, UAI); 523 case Instruction::FCmp: 524 case Instruction::ICmp: 525 case Instruction::Select: 526 if (Kind != RK_IntegerMinMax && 527 (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) 528 return InstDesc(false, I); 529 return isMinMaxSelectCmpPattern(I, Prev); 530 } 531 } 532 533 bool RecurrenceDescriptor::hasMultipleUsesOf( 534 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) { 535 unsigned NumUses = 0; 536 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; 537 ++Use) { 538 if (Insts.count(dyn_cast<Instruction>(*Use))) 539 ++NumUses; 540 if (NumUses > 1) 541 return true; 542 } 543 544 return false; 545 } 546 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, 547 RecurrenceDescriptor &RedDes, 548 DemandedBits *DB, AssumptionCache *AC, 549 DominatorTree *DT) { 550 551 BasicBlock *Header = TheLoop->getHeader(); 552 Function &F = *Header->getParent(); 553 bool HasFunNoNaNAttr = 554 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 555 556 if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 557 AC, DT)) { 558 DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); 559 return true; 560 } 561 if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, 562 AC, DT)) { 563 DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); 564 return true; 565 } 566 if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB, 567 AC, DT)) { 568 DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); 569 return true; 570 } 571 if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 572 AC, DT)) { 573 DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); 574 return true; 575 } 576 if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB, 577 AC, DT)) { 578 DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); 579 return true; 580 } 581 if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes, 582 DB, AC, DT)) { 583 DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); 584 return true; 585 } 586 if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, 587 AC, DT)) { 588 DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); 589 return true; 590 } 591 if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 592 AC, DT)) { 593 DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); 594 return true; 595 } 596 if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB, 597 AC, DT)) { 598 DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); 599 return true; 600 } 601 // Not a reduction of known type. 602 return false; 603 } 604 605 bool RecurrenceDescriptor::isFirstOrderRecurrence( 606 PHINode *Phi, Loop *TheLoop, 607 DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) { 608 609 // Ensure the phi node is in the loop header and has two incoming values. 610 if (Phi->getParent() != TheLoop->getHeader() || 611 Phi->getNumIncomingValues() != 2) 612 return false; 613 614 // Ensure the loop has a preheader and a single latch block. The loop 615 // vectorizer will need the latch to set up the next iteration of the loop. 616 auto *Preheader = TheLoop->getLoopPreheader(); 617 auto *Latch = TheLoop->getLoopLatch(); 618 if (!Preheader || !Latch) 619 return false; 620 621 // Ensure the phi node's incoming blocks are the loop preheader and latch. 622 if (Phi->getBasicBlockIndex(Preheader) < 0 || 623 Phi->getBasicBlockIndex(Latch) < 0) 624 return false; 625 626 // Get the previous value. The previous value comes from the latch edge while 627 // the initial value comes form the preheader edge. 628 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); 629 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) || 630 SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. 631 return false; 632 633 // Ensure every user of the phi node is dominated by the previous value. 634 // The dominance requirement ensures the loop vectorizer will not need to 635 // vectorize the initial value prior to the first iteration of the loop. 636 // TODO: Consider extending this sinking to handle other kinds of instructions 637 // and expressions, beyond sinking a single cast past Previous. 638 if (Phi->hasOneUse()) { 639 auto *I = Phi->user_back(); 640 if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() && 641 DT->dominates(Previous, I->user_back())) { 642 if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking. 643 SinkAfter[I] = Previous; 644 return true; 645 } 646 } 647 648 for (User *U : Phi->users()) 649 if (auto *I = dyn_cast<Instruction>(U)) { 650 if (!DT->dominates(Previous, I)) 651 return false; 652 } 653 654 return true; 655 } 656 657 /// This function returns the identity element (or neutral element) for 658 /// the operation K. 659 Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, 660 Type *Tp) { 661 switch (K) { 662 case RK_IntegerXor: 663 case RK_IntegerAdd: 664 case RK_IntegerOr: 665 // Adding, Xoring, Oring zero to a number does not change it. 666 return ConstantInt::get(Tp, 0); 667 case RK_IntegerMult: 668 // Multiplying a number by 1 does not change it. 669 return ConstantInt::get(Tp, 1); 670 case RK_IntegerAnd: 671 // AND-ing a number with an all-1 value does not change it. 672 return ConstantInt::get(Tp, -1, true); 673 case RK_FloatMult: 674 // Multiplying a number by 1 does not change it. 675 return ConstantFP::get(Tp, 1.0L); 676 case RK_FloatAdd: 677 // Adding zero to a number does not change it. 678 return ConstantFP::get(Tp, 0.0L); 679 default: 680 llvm_unreachable("Unknown recurrence kind"); 681 } 682 } 683 684 /// This function translates the recurrence kind to an LLVM binary operator. 685 unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { 686 switch (Kind) { 687 case RK_IntegerAdd: 688 return Instruction::Add; 689 case RK_IntegerMult: 690 return Instruction::Mul; 691 case RK_IntegerOr: 692 return Instruction::Or; 693 case RK_IntegerAnd: 694 return Instruction::And; 695 case RK_IntegerXor: 696 return Instruction::Xor; 697 case RK_FloatMult: 698 return Instruction::FMul; 699 case RK_FloatAdd: 700 return Instruction::FAdd; 701 case RK_IntegerMinMax: 702 return Instruction::ICmp; 703 case RK_FloatMinMax: 704 return Instruction::FCmp; 705 default: 706 llvm_unreachable("Unknown recurrence operation"); 707 } 708 } 709 710 Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, 711 MinMaxRecurrenceKind RK, 712 Value *Left, Value *Right) { 713 CmpInst::Predicate P = CmpInst::ICMP_NE; 714 switch (RK) { 715 default: 716 llvm_unreachable("Unknown min/max recurrence kind"); 717 case MRK_UIntMin: 718 P = CmpInst::ICMP_ULT; 719 break; 720 case MRK_UIntMax: 721 P = CmpInst::ICMP_UGT; 722 break; 723 case MRK_SIntMin: 724 P = CmpInst::ICMP_SLT; 725 break; 726 case MRK_SIntMax: 727 P = CmpInst::ICMP_SGT; 728 break; 729 case MRK_FloatMin: 730 P = CmpInst::FCMP_OLT; 731 break; 732 case MRK_FloatMax: 733 P = CmpInst::FCMP_OGT; 734 break; 735 } 736 737 // We only match FP sequences that are 'fast', so we can unconditionally 738 // set it on any generated instructions. 739 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 740 FastMathFlags FMF; 741 FMF.setFast(); 742 Builder.setFastMathFlags(FMF); 743 744 Value *Cmp; 745 if (RK == MRK_FloatMin || RK == MRK_FloatMax) 746 Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); 747 else 748 Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); 749 750 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); 751 return Select; 752 } 753 754 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, 755 const SCEV *Step, BinaryOperator *BOp, 756 SmallVectorImpl<Instruction *> *Casts) 757 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { 758 assert(IK != IK_NoInduction && "Not an induction"); 759 760 // Start value type should match the induction kind and the value 761 // itself should not be null. 762 assert(StartValue && "StartValue is null"); 763 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && 764 "StartValue is not a pointer for pointer induction"); 765 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && 766 "StartValue is not an integer for integer induction"); 767 768 // Check the Step Value. It should be non-zero integer value. 769 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && 770 "Step value is zero"); 771 772 assert((IK != IK_PtrInduction || getConstIntStepValue()) && 773 "Step value should be constant for pointer induction"); 774 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && 775 "StepValue is not an integer"); 776 777 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && 778 "StepValue is not FP for FpInduction"); 779 assert((IK != IK_FpInduction || (InductionBinOp && 780 (InductionBinOp->getOpcode() == Instruction::FAdd || 781 InductionBinOp->getOpcode() == Instruction::FSub))) && 782 "Binary opcode should be specified for FP induction"); 783 784 if (Casts) { 785 for (auto &Inst : *Casts) { 786 RedundantCasts.push_back(Inst); 787 } 788 } 789 } 790 791 int InductionDescriptor::getConsecutiveDirection() const { 792 ConstantInt *ConstStep = getConstIntStepValue(); 793 if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) 794 return ConstStep->getSExtValue(); 795 return 0; 796 } 797 798 ConstantInt *InductionDescriptor::getConstIntStepValue() const { 799 if (isa<SCEVConstant>(Step)) 800 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); 801 return nullptr; 802 } 803 804 Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, 805 ScalarEvolution *SE, 806 const DataLayout& DL) const { 807 808 SCEVExpander Exp(*SE, DL, "induction"); 809 assert(Index->getType() == Step->getType() && 810 "Index type does not match StepValue type"); 811 switch (IK) { 812 case IK_IntInduction: { 813 assert(Index->getType() == StartValue->getType() && 814 "Index type does not match StartValue type"); 815 816 // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution 817 // and calculate (Start + Index * Step) for all cases, without 818 // special handling for "isOne" and "isMinusOne". 819 // But in the real life the result code getting worse. We mix SCEV 820 // expressions and ADD/SUB operations and receive redundant 821 // intermediate values being calculated in different ways and 822 // Instcombine is unable to reduce them all. 823 824 if (getConstIntStepValue() && 825 getConstIntStepValue()->isMinusOne()) 826 return B.CreateSub(StartValue, Index); 827 if (getConstIntStepValue() && 828 getConstIntStepValue()->isOne()) 829 return B.CreateAdd(StartValue, Index); 830 const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), 831 SE->getMulExpr(Step, SE->getSCEV(Index))); 832 return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); 833 } 834 case IK_PtrInduction: { 835 assert(isa<SCEVConstant>(Step) && 836 "Expected constant step for pointer induction"); 837 const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); 838 Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); 839 return B.CreateGEP(nullptr, StartValue, Index); 840 } 841 case IK_FpInduction: { 842 assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); 843 assert(InductionBinOp && 844 (InductionBinOp->getOpcode() == Instruction::FAdd || 845 InductionBinOp->getOpcode() == Instruction::FSub) && 846 "Original bin op should be defined for FP induction"); 847 848 Value *StepValue = cast<SCEVUnknown>(Step)->getValue(); 849 850 // Floating point operations had to be 'fast' to enable the induction. 851 FastMathFlags Flags; 852 Flags.setFast(); 853 854 Value *MulExp = B.CreateFMul(StepValue, Index); 855 if (isa<Instruction>(MulExp)) 856 // We have to check, the MulExp may be a constant. 857 cast<Instruction>(MulExp)->setFastMathFlags(Flags); 858 859 Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue, 860 MulExp, "induction"); 861 if (isa<Instruction>(BOp)) 862 cast<Instruction>(BOp)->setFastMathFlags(Flags); 863 864 return BOp; 865 } 866 case IK_NoInduction: 867 return nullptr; 868 } 869 llvm_unreachable("invalid enum"); 870 } 871 872 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, 873 ScalarEvolution *SE, 874 InductionDescriptor &D) { 875 876 // Here we only handle FP induction variables. 877 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); 878 879 if (TheLoop->getHeader() != Phi->getParent()) 880 return false; 881 882 // The loop may have multiple entrances or multiple exits; we can analyze 883 // this phi if it has a unique entry value and a unique backedge value. 884 if (Phi->getNumIncomingValues() != 2) 885 return false; 886 Value *BEValue = nullptr, *StartValue = nullptr; 887 if (TheLoop->contains(Phi->getIncomingBlock(0))) { 888 BEValue = Phi->getIncomingValue(0); 889 StartValue = Phi->getIncomingValue(1); 890 } else { 891 assert(TheLoop->contains(Phi->getIncomingBlock(1)) && 892 "Unexpected Phi node in the loop"); 893 BEValue = Phi->getIncomingValue(1); 894 StartValue = Phi->getIncomingValue(0); 895 } 896 897 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); 898 if (!BOp) 899 return false; 900 901 Value *Addend = nullptr; 902 if (BOp->getOpcode() == Instruction::FAdd) { 903 if (BOp->getOperand(0) == Phi) 904 Addend = BOp->getOperand(1); 905 else if (BOp->getOperand(1) == Phi) 906 Addend = BOp->getOperand(0); 907 } else if (BOp->getOpcode() == Instruction::FSub) 908 if (BOp->getOperand(0) == Phi) 909 Addend = BOp->getOperand(1); 910 911 if (!Addend) 912 return false; 913 914 // The addend should be loop invariant 915 if (auto *I = dyn_cast<Instruction>(Addend)) 916 if (TheLoop->contains(I)) 917 return false; 918 919 // FP Step has unknown SCEV 920 const SCEV *Step = SE->getUnknown(Addend); 921 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); 922 return true; 923 } 924 925 /// This function is called when we suspect that the update-chain of a phi node 926 /// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, 927 /// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime 928 /// predicate P under which the SCEV expression for the phi can be the 929 /// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the 930 /// cast instructions that are involved in the update-chain of this induction. 931 /// A caller that adds the required runtime predicate can be free to drop these 932 /// cast instructions, and compute the phi using \p AR (instead of some scev 933 /// expression with casts). 934 /// 935 /// For example, without a predicate the scev expression can take the following 936 /// form: 937 /// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) 938 /// 939 /// It corresponds to the following IR sequence: 940 /// %for.body: 941 /// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] 942 /// %casted_phi = "ExtTrunc i64 %x" 943 /// %add = add i64 %casted_phi, %step 944 /// 945 /// where %x is given in \p PN, 946 /// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate, 947 /// and the IR sequence that "ExtTrunc i64 %x" represents can take one of 948 /// several forms, for example, such as: 949 /// ExtTrunc1: %casted_phi = and %x, 2^n-1 950 /// or: 951 /// ExtTrunc2: %t = shl %x, m 952 /// %casted_phi = ashr %t, m 953 /// 954 /// If we are able to find such sequence, we return the instructions 955 /// we found, namely %casted_phi and the instructions on its use-def chain up 956 /// to the phi (not including the phi). 957 static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, 958 const SCEVUnknown *PhiScev, 959 const SCEVAddRecExpr *AR, 960 SmallVectorImpl<Instruction *> &CastInsts) { 961 962 assert(CastInsts.empty() && "CastInsts is expected to be empty."); 963 auto *PN = cast<PHINode>(PhiScev->getValue()); 964 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); 965 const Loop *L = AR->getLoop(); 966 967 // Find any cast instructions that participate in the def-use chain of 968 // PhiScev in the loop. 969 // FORNOW/TODO: We currently expect the def-use chain to include only 970 // two-operand instructions, where one of the operands is an invariant. 971 // createAddRecFromPHIWithCasts() currently does not support anything more 972 // involved than that, so we keep the search simple. This can be 973 // extended/generalized as needed. 974 975 auto getDef = [&](const Value *Val) -> Value * { 976 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val); 977 if (!BinOp) 978 return nullptr; 979 Value *Op0 = BinOp->getOperand(0); 980 Value *Op1 = BinOp->getOperand(1); 981 Value *Def = nullptr; 982 if (L->isLoopInvariant(Op0)) 983 Def = Op1; 984 else if (L->isLoopInvariant(Op1)) 985 Def = Op0; 986 return Def; 987 }; 988 989 // Look for the instruction that defines the induction via the 990 // loop backedge. 991 BasicBlock *Latch = L->getLoopLatch(); 992 if (!Latch) 993 return false; 994 Value *Val = PN->getIncomingValueForBlock(Latch); 995 if (!Val) 996 return false; 997 998 // Follow the def-use chain until the induction phi is reached. 999 // If on the way we encounter a Value that has the same SCEV Expr as the 1000 // phi node, we can consider the instructions we visit from that point 1001 // as part of the cast-sequence that can be ignored. 1002 bool InCastSequence = false; 1003 auto *Inst = dyn_cast<Instruction>(Val); 1004 while (Val != PN) { 1005 // If we encountered a phi node other than PN, or if we left the loop, 1006 // we bail out. 1007 if (!Inst || !L->contains(Inst)) { 1008 return false; 1009 } 1010 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val)); 1011 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR)) 1012 InCastSequence = true; 1013 if (InCastSequence) { 1014 // Only the last instruction in the cast sequence is expected to have 1015 // uses outside the induction def-use chain. 1016 if (!CastInsts.empty()) 1017 if (!Inst->hasOneUse()) 1018 return false; 1019 CastInsts.push_back(Inst); 1020 } 1021 Val = getDef(Val); 1022 if (!Val) 1023 return false; 1024 Inst = dyn_cast<Instruction>(Val); 1025 } 1026 1027 return InCastSequence; 1028 } 1029 1030 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 1031 PredicatedScalarEvolution &PSE, 1032 InductionDescriptor &D, 1033 bool Assume) { 1034 Type *PhiTy = Phi->getType(); 1035 1036 // Handle integer and pointer inductions variables. 1037 // Now we handle also FP induction but not trying to make a 1038 // recurrent expression from the PHI node in-place. 1039 1040 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && 1041 !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) 1042 return false; 1043 1044 if (PhiTy->isFloatingPointTy()) 1045 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); 1046 1047 const SCEV *PhiScev = PSE.getSCEV(Phi); 1048 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1049 1050 // We need this expression to be an AddRecExpr. 1051 if (Assume && !AR) 1052 AR = PSE.getAsAddRec(Phi); 1053 1054 if (!AR) { 1055 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1056 return false; 1057 } 1058 1059 // Record any Cast instructions that participate in the induction update 1060 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev); 1061 // If we started from an UnknownSCEV, and managed to build an addRecurrence 1062 // only after enabling Assume with PSCEV, this means we may have encountered 1063 // cast instructions that required adding a runtime check in order to 1064 // guarantee the correctness of the AddRecurence respresentation of the 1065 // induction. 1066 if (PhiScev != AR && SymbolicPhi) { 1067 SmallVector<Instruction *, 2> Casts; 1068 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts)) 1069 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts); 1070 } 1071 1072 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); 1073 } 1074 1075 bool InductionDescriptor::isInductionPHI( 1076 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, 1077 InductionDescriptor &D, const SCEV *Expr, 1078 SmallVectorImpl<Instruction *> *CastsToIgnore) { 1079 Type *PhiTy = Phi->getType(); 1080 // We only handle integer and pointer inductions variables. 1081 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) 1082 return false; 1083 1084 // Check that the PHI is consecutive. 1085 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); 1086 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1087 1088 if (!AR) { 1089 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1090 return false; 1091 } 1092 1093 if (AR->getLoop() != TheLoop) { 1094 // FIXME: We should treat this as a uniform. Unfortunately, we 1095 // don't currently know how to handled uniform PHIs. 1096 DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); 1097 return false; 1098 } 1099 1100 Value *StartValue = 1101 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); 1102 const SCEV *Step = AR->getStepRecurrence(*SE); 1103 // Calculate the pointer stride and check if it is consecutive. 1104 // The stride may be a constant or a loop invariant integer value. 1105 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); 1106 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) 1107 return false; 1108 1109 if (PhiTy->isIntegerTy()) { 1110 D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr, 1111 CastsToIgnore); 1112 return true; 1113 } 1114 1115 assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); 1116 // Pointer induction should be a constant. 1117 if (!ConstStep) 1118 return false; 1119 1120 ConstantInt *CV = ConstStep->getValue(); 1121 Type *PointerElementType = PhiTy->getPointerElementType(); 1122 // The pointer stride cannot be determined if the pointer element type is not 1123 // sized. 1124 if (!PointerElementType->isSized()) 1125 return false; 1126 1127 const DataLayout &DL = Phi->getModule()->getDataLayout(); 1128 int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); 1129 if (!Size) 1130 return false; 1131 1132 int64_t CVSize = CV->getSExtValue(); 1133 if (CVSize % Size) 1134 return false; 1135 auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, 1136 true /* signed */); 1137 D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); 1138 return true; 1139 } 1140 1141 bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, 1142 bool PreserveLCSSA) { 1143 bool Changed = false; 1144 1145 // We re-use a vector for the in-loop predecesosrs. 1146 SmallVector<BasicBlock *, 4> InLoopPredecessors; 1147 1148 auto RewriteExit = [&](BasicBlock *BB) { 1149 assert(InLoopPredecessors.empty() && 1150 "Must start with an empty predecessors list!"); 1151 auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); 1152 1153 // See if there are any non-loop predecessors of this exit block and 1154 // keep track of the in-loop predecessors. 1155 bool IsDedicatedExit = true; 1156 for (auto *PredBB : predecessors(BB)) 1157 if (L->contains(PredBB)) { 1158 if (isa<IndirectBrInst>(PredBB->getTerminator())) 1159 // We cannot rewrite exiting edges from an indirectbr. 1160 return false; 1161 1162 InLoopPredecessors.push_back(PredBB); 1163 } else { 1164 IsDedicatedExit = false; 1165 } 1166 1167 assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); 1168 1169 // Nothing to do if this is already a dedicated exit. 1170 if (IsDedicatedExit) 1171 return false; 1172 1173 auto *NewExitBB = SplitBlockPredecessors( 1174 BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA); 1175 1176 if (!NewExitBB) 1177 DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: " 1178 << *L << "\n"); 1179 else 1180 DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " 1181 << NewExitBB->getName() << "\n"); 1182 return true; 1183 }; 1184 1185 // Walk the exit blocks directly rather than building up a data structure for 1186 // them, but only visit each one once. 1187 SmallPtrSet<BasicBlock *, 4> Visited; 1188 for (auto *BB : L->blocks()) 1189 for (auto *SuccBB : successors(BB)) { 1190 // We're looking for exit blocks so skip in-loop successors. 1191 if (L->contains(SuccBB)) 1192 continue; 1193 1194 // Visit each exit block exactly once. 1195 if (!Visited.insert(SuccBB).second) 1196 continue; 1197 1198 Changed |= RewriteExit(SuccBB); 1199 } 1200 1201 return Changed; 1202 } 1203 1204 /// \brief Returns the instructions that use values defined in the loop. 1205 SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { 1206 SmallVector<Instruction *, 8> UsedOutside; 1207 1208 for (auto *Block : L->getBlocks()) 1209 // FIXME: I believe that this could use copy_if if the Inst reference could 1210 // be adapted into a pointer. 1211 for (auto &Inst : *Block) { 1212 auto Users = Inst.users(); 1213 if (any_of(Users, [&](User *U) { 1214 auto *Use = cast<Instruction>(U); 1215 return !L->contains(Use->getParent()); 1216 })) 1217 UsedOutside.push_back(&Inst); 1218 } 1219 1220 return UsedOutside; 1221 } 1222 1223 void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { 1224 // By definition, all loop passes need the LoopInfo analysis and the 1225 // Dominator tree it depends on. Because they all participate in the loop 1226 // pass manager, they must also preserve these. 1227 AU.addRequired<DominatorTreeWrapperPass>(); 1228 AU.addPreserved<DominatorTreeWrapperPass>(); 1229 AU.addRequired<LoopInfoWrapperPass>(); 1230 AU.addPreserved<LoopInfoWrapperPass>(); 1231 1232 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs 1233 // here because users shouldn't directly get them from this header. 1234 extern char &LoopSimplifyID; 1235 extern char &LCSSAID; 1236 AU.addRequiredID(LoopSimplifyID); 1237 AU.addPreservedID(LoopSimplifyID); 1238 AU.addRequiredID(LCSSAID); 1239 AU.addPreservedID(LCSSAID); 1240 // This is used in the LPPassManager to perform LCSSA verification on passes 1241 // which preserve lcssa form 1242 AU.addRequired<LCSSAVerificationPass>(); 1243 AU.addPreserved<LCSSAVerificationPass>(); 1244 1245 // Loop passes are designed to run inside of a loop pass manager which means 1246 // that any function analyses they require must be required by the first loop 1247 // pass in the manager (so that it is computed before the loop pass manager 1248 // runs) and preserved by all loop pasess in the manager. To make this 1249 // reasonably robust, the set needed for most loop passes is maintained here. 1250 // If your loop pass requires an analysis not listed here, you will need to 1251 // carefully audit the loop pass manager nesting structure that results. 1252 AU.addRequired<AAResultsWrapperPass>(); 1253 AU.addPreserved<AAResultsWrapperPass>(); 1254 AU.addPreserved<BasicAAWrapperPass>(); 1255 AU.addPreserved<GlobalsAAWrapperPass>(); 1256 AU.addPreserved<SCEVAAWrapperPass>(); 1257 AU.addRequired<ScalarEvolutionWrapperPass>(); 1258 AU.addPreserved<ScalarEvolutionWrapperPass>(); 1259 } 1260 1261 /// Manually defined generic "LoopPass" dependency initialization. This is used 1262 /// to initialize the exact set of passes from above in \c 1263 /// getLoopAnalysisUsage. It can be used within a loop pass's initialization 1264 /// with: 1265 /// 1266 /// INITIALIZE_PASS_DEPENDENCY(LoopPass) 1267 /// 1268 /// As-if "LoopPass" were a pass. 1269 void llvm::initializeLoopPassPass(PassRegistry &Registry) { 1270 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1271 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1272 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 1273 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 1274 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1275 INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) 1276 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 1277 INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) 1278 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 1279 } 1280 1281 /// \brief Find string metadata for loop 1282 /// 1283 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an 1284 /// operand or null otherwise. If the string metadata is not found return 1285 /// Optional's not-a-value. 1286 Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, 1287 StringRef Name) { 1288 MDNode *LoopID = TheLoop->getLoopID(); 1289 // Return none if LoopID is false. 1290 if (!LoopID) 1291 return None; 1292 1293 // First operand should refer to the loop id itself. 1294 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 1295 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 1296 1297 // Iterate over LoopID operands and look for MDString Metadata 1298 for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { 1299 MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); 1300 if (!MD) 1301 continue; 1302 MDString *S = dyn_cast<MDString>(MD->getOperand(0)); 1303 if (!S) 1304 continue; 1305 // Return true if MDString holds expected MetaData. 1306 if (Name.equals(S->getString())) 1307 switch (MD->getNumOperands()) { 1308 case 1: 1309 return nullptr; 1310 case 2: 1311 return &MD->getOperand(1); 1312 default: 1313 llvm_unreachable("loop metadata has 0 or 1 operand"); 1314 } 1315 } 1316 return None; 1317 } 1318 1319 /// Does a BFS from a given node to all of its children inside a given loop. 1320 /// The returned vector of nodes includes the starting point. 1321 SmallVector<DomTreeNode *, 16> 1322 llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { 1323 SmallVector<DomTreeNode *, 16> Worklist; 1324 auto AddRegionToWorklist = [&](DomTreeNode *DTN) { 1325 // Only include subregions in the top level loop. 1326 BasicBlock *BB = DTN->getBlock(); 1327 if (CurLoop->contains(BB)) 1328 Worklist.push_back(DTN); 1329 }; 1330 1331 AddRegionToWorklist(N); 1332 1333 for (size_t I = 0; I < Worklist.size(); I++) 1334 for (DomTreeNode *Child : Worklist[I]->getChildren()) 1335 AddRegionToWorklist(Child); 1336 1337 return Worklist; 1338 } 1339 1340 void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, 1341 ScalarEvolution *SE = nullptr, 1342 LoopInfo *LI = nullptr) { 1343 assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!"); 1344 auto *Preheader = L->getLoopPreheader(); 1345 assert(Preheader && "Preheader should exist!"); 1346 1347 // Now that we know the removal is safe, remove the loop by changing the 1348 // branch from the preheader to go to the single exit block. 1349 // 1350 // Because we're deleting a large chunk of code at once, the sequence in which 1351 // we remove things is very important to avoid invalidation issues. 1352 1353 // Tell ScalarEvolution that the loop is deleted. Do this before 1354 // deleting the loop so that ScalarEvolution can look at the loop 1355 // to determine what it needs to clean up. 1356 if (SE) 1357 SE->forgetLoop(L); 1358 1359 auto *ExitBlock = L->getUniqueExitBlock(); 1360 assert(ExitBlock && "Should have a unique exit block!"); 1361 assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); 1362 1363 auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator()); 1364 assert(OldBr && "Preheader must end with a branch"); 1365 assert(OldBr->isUnconditional() && "Preheader must have a single successor"); 1366 // Connect the preheader to the exit block. Keep the old edge to the header 1367 // around to perform the dominator tree update in two separate steps 1368 // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge 1369 // preheader -> header. 1370 // 1371 // 1372 // 0. Preheader 1. Preheader 2. Preheader 1373 // | | | | 1374 // V | V | 1375 // Header <--\ | Header <--\ | Header <--\ 1376 // | | | | | | | | | | | 1377 // | V | | | V | | | V | 1378 // | Body --/ | | Body --/ | | Body --/ 1379 // V V V V V 1380 // Exit Exit Exit 1381 // 1382 // By doing this is two separate steps we can perform the dominator tree 1383 // update without using the batch update API. 1384 // 1385 // Even when the loop is never executed, we cannot remove the edge from the 1386 // source block to the exit block. Consider the case where the unexecuted loop 1387 // branches back to an outer loop. If we deleted the loop and removed the edge 1388 // coming to this inner loop, this will break the outer loop structure (by 1389 // deleting the backedge of the outer loop). If the outer loop is indeed a 1390 // non-loop, it will be deleted in a future iteration of loop deletion pass. 1391 IRBuilder<> Builder(OldBr); 1392 Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); 1393 // Remove the old branch. The conditional branch becomes a new terminator. 1394 OldBr->eraseFromParent(); 1395 1396 // Rewrite phis in the exit block to get their inputs from the Preheader 1397 // instead of the exiting block. 1398 for (PHINode &P : ExitBlock->phis()) { 1399 // Set the zero'th element of Phi to be from the preheader and remove all 1400 // other incoming values. Given the loop has dedicated exits, all other 1401 // incoming values must be from the exiting blocks. 1402 int PredIndex = 0; 1403 P.setIncomingBlock(PredIndex, Preheader); 1404 // Removes all incoming values from all other exiting blocks (including 1405 // duplicate values from an exiting block). 1406 // Nuke all entries except the zero'th entry which is the preheader entry. 1407 // NOTE! We need to remove Incoming Values in the reverse order as done 1408 // below, to keep the indices valid for deletion (removeIncomingValues 1409 // updates getNumIncomingValues and shifts all values down into the operand 1410 // being deleted). 1411 for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i) 1412 P.removeIncomingValue(e - i, false); 1413 1414 assert((P.getNumIncomingValues() == 1 && 1415 P.getIncomingBlock(PredIndex) == Preheader) && 1416 "Should have exactly one value and that's from the preheader!"); 1417 } 1418 1419 // Disconnect the loop body by branching directly to its exit. 1420 Builder.SetInsertPoint(Preheader->getTerminator()); 1421 Builder.CreateBr(ExitBlock); 1422 // Remove the old branch. 1423 Preheader->getTerminator()->eraseFromParent(); 1424 1425 if (DT) { 1426 // Update the dominator tree by informing it about the new edge from the 1427 // preheader to the exit. 1428 DT->insertEdge(Preheader, ExitBlock); 1429 // Inform the dominator tree about the removed edge. 1430 DT->deleteEdge(Preheader, L->getHeader()); 1431 } 1432 1433 // Given LCSSA form is satisfied, we should not have users of instructions 1434 // within the dead loop outside of the loop. However, LCSSA doesn't take 1435 // unreachable uses into account. We handle them here. 1436 // We could do it after drop all references (in this case all users in the 1437 // loop will be already eliminated and we have less work to do but according 1438 // to API doc of User::dropAllReferences only valid operation after dropping 1439 // references, is deletion. So let's substitute all usages of 1440 // instruction from the loop with undef value of corresponding type first. 1441 for (auto *Block : L->blocks()) 1442 for (Instruction &I : *Block) { 1443 auto *Undef = UndefValue::get(I.getType()); 1444 for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E;) { 1445 Use &U = *UI; 1446 ++UI; 1447 if (auto *Usr = dyn_cast<Instruction>(U.getUser())) 1448 if (L->contains(Usr->getParent())) 1449 continue; 1450 // If we have a DT then we can check that uses outside a loop only in 1451 // unreachable block. 1452 if (DT) 1453 assert(!DT->isReachableFromEntry(U) && 1454 "Unexpected user in reachable block"); 1455 U.set(Undef); 1456 } 1457 } 1458 1459 // Remove the block from the reference counting scheme, so that we can 1460 // delete it freely later. 1461 for (auto *Block : L->blocks()) 1462 Block->dropAllReferences(); 1463 1464 if (LI) { 1465 // Erase the instructions and the blocks without having to worry 1466 // about ordering because we already dropped the references. 1467 // NOTE: This iteration is safe because erasing the block does not remove 1468 // its entry from the loop's block list. We do that in the next section. 1469 for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end(); 1470 LpI != LpE; ++LpI) 1471 (*LpI)->eraseFromParent(); 1472 1473 // Finally, the blocks from loopinfo. This has to happen late because 1474 // otherwise our loop iterators won't work. 1475 1476 SmallPtrSet<BasicBlock *, 8> blocks; 1477 blocks.insert(L->block_begin(), L->block_end()); 1478 for (BasicBlock *BB : blocks) 1479 LI->removeBlock(BB); 1480 1481 // The last step is to update LoopInfo now that we've eliminated this loop. 1482 LI->erase(L); 1483 } 1484 } 1485 1486 Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { 1487 // Only support loops with a unique exiting block, and a latch. 1488 if (!L->getExitingBlock()) 1489 return None; 1490 1491 // Get the branch weights for the loop's backedge. 1492 BranchInst *LatchBR = 1493 dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator()); 1494 if (!LatchBR || LatchBR->getNumSuccessors() != 2) 1495 return None; 1496 1497 assert((LatchBR->getSuccessor(0) == L->getHeader() || 1498 LatchBR->getSuccessor(1) == L->getHeader()) && 1499 "At least one edge out of the latch must go to the header"); 1500 1501 // To estimate the number of times the loop body was executed, we want to 1502 // know the number of times the backedge was taken, vs. the number of times 1503 // we exited the loop. 1504 uint64_t TrueVal, FalseVal; 1505 if (!LatchBR->extractProfMetadata(TrueVal, FalseVal)) 1506 return None; 1507 1508 if (!TrueVal || !FalseVal) 1509 return 0; 1510 1511 // Divide the count of the backedge by the count of the edge exiting the loop, 1512 // rounding to nearest. 1513 if (LatchBR->getSuccessor(0) == L->getHeader()) 1514 return (TrueVal + (FalseVal / 2)) / FalseVal; 1515 else 1516 return (FalseVal + (TrueVal / 2)) / TrueVal; 1517 } 1518 1519 /// \brief Adds a 'fast' flag to floating point operations. 1520 static Value *addFastMathFlag(Value *V) { 1521 if (isa<FPMathOperator>(V)) { 1522 FastMathFlags Flags; 1523 Flags.setFast(); 1524 cast<Instruction>(V)->setFastMathFlags(Flags); 1525 } 1526 return V; 1527 } 1528 1529 // Helper to generate an ordered reduction. 1530 Value * 1531 llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, 1532 unsigned Op, 1533 RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, 1534 ArrayRef<Value *> RedOps) { 1535 unsigned VF = Src->getType()->getVectorNumElements(); 1536 1537 // Extract and apply reduction ops in ascending order: 1538 // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1] 1539 Value *Result = Acc; 1540 for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) { 1541 Value *Ext = 1542 Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx)); 1543 1544 if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 1545 Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext, 1546 "bin.rdx"); 1547 } else { 1548 assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && 1549 "Invalid min/max"); 1550 Result = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, Result, 1551 Ext); 1552 } 1553 1554 if (!RedOps.empty()) 1555 propagateIRFlags(Result, RedOps); 1556 } 1557 1558 return Result; 1559 } 1560 1561 // Helper to generate a log2 shuffle reduction. 1562 Value * 1563 llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, 1564 RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, 1565 ArrayRef<Value *> RedOps) { 1566 unsigned VF = Src->getType()->getVectorNumElements(); 1567 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles 1568 // and vector ops, reducing the set of values being computed by half each 1569 // round. 1570 assert(isPowerOf2_32(VF) && 1571 "Reduction emission only supported for pow2 vectors!"); 1572 Value *TmpVec = Src; 1573 SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); 1574 for (unsigned i = VF; i != 1; i >>= 1) { 1575 // Move the upper half of the vector to the lower half. 1576 for (unsigned j = 0; j != i / 2; ++j) 1577 ShuffleMask[j] = Builder.getInt32(i / 2 + j); 1578 1579 // Fill the rest of the mask with undef. 1580 std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), 1581 UndefValue::get(Builder.getInt32Ty())); 1582 1583 Value *Shuf = Builder.CreateShuffleVector( 1584 TmpVec, UndefValue::get(TmpVec->getType()), 1585 ConstantVector::get(ShuffleMask), "rdx.shuf"); 1586 1587 if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 1588 // Floating point operations had to be 'fast' to enable the reduction. 1589 TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, 1590 TmpVec, Shuf, "bin.rdx")); 1591 } else { 1592 assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && 1593 "Invalid min/max"); 1594 TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec, 1595 Shuf); 1596 } 1597 if (!RedOps.empty()) 1598 propagateIRFlags(TmpVec, RedOps); 1599 } 1600 // The result is in the first element of the vector. 1601 return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); 1602 } 1603 1604 /// Create a simple vector reduction specified by an opcode and some 1605 /// flags (if generating min/max reductions). 1606 Value *llvm::createSimpleTargetReduction( 1607 IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, 1608 Value *Src, TargetTransformInfo::ReductionFlags Flags, 1609 ArrayRef<Value *> RedOps) { 1610 assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); 1611 1612 Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); 1613 std::function<Value*()> BuildFunc; 1614 using RD = RecurrenceDescriptor; 1615 RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; 1616 // TODO: Support creating ordered reductions. 1617 FastMathFlags FMFFast; 1618 FMFFast.setFast(); 1619 1620 switch (Opcode) { 1621 case Instruction::Add: 1622 BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; 1623 break; 1624 case Instruction::Mul: 1625 BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; 1626 break; 1627 case Instruction::And: 1628 BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; 1629 break; 1630 case Instruction::Or: 1631 BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; 1632 break; 1633 case Instruction::Xor: 1634 BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; 1635 break; 1636 case Instruction::FAdd: 1637 BuildFunc = [&]() { 1638 auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); 1639 cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); 1640 return Rdx; 1641 }; 1642 break; 1643 case Instruction::FMul: 1644 BuildFunc = [&]() { 1645 auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); 1646 cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); 1647 return Rdx; 1648 }; 1649 break; 1650 case Instruction::ICmp: 1651 if (Flags.IsMaxOp) { 1652 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; 1653 BuildFunc = [&]() { 1654 return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); 1655 }; 1656 } else { 1657 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; 1658 BuildFunc = [&]() { 1659 return Builder.CreateIntMinReduce(Src, Flags.IsSigned); 1660 }; 1661 } 1662 break; 1663 case Instruction::FCmp: 1664 if (Flags.IsMaxOp) { 1665 MinMaxKind = RD::MRK_FloatMax; 1666 BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; 1667 } else { 1668 MinMaxKind = RD::MRK_FloatMin; 1669 BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; 1670 } 1671 break; 1672 default: 1673 llvm_unreachable("Unhandled opcode"); 1674 break; 1675 } 1676 if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) 1677 return BuildFunc(); 1678 return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); 1679 } 1680 1681 /// Create a vector reduction using a given recurrence descriptor. 1682 Value *llvm::createTargetReduction(IRBuilder<> &B, 1683 const TargetTransformInfo *TTI, 1684 RecurrenceDescriptor &Desc, Value *Src, 1685 bool NoNaN) { 1686 // TODO: Support in-order reductions based on the recurrence descriptor. 1687 using RD = RecurrenceDescriptor; 1688 RD::RecurrenceKind RecKind = Desc.getRecurrenceKind(); 1689 TargetTransformInfo::ReductionFlags Flags; 1690 Flags.NoNaN = NoNaN; 1691 switch (RecKind) { 1692 case RD::RK_FloatAdd: 1693 return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags); 1694 case RD::RK_FloatMult: 1695 return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags); 1696 case RD::RK_IntegerAdd: 1697 return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags); 1698 case RD::RK_IntegerMult: 1699 return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags); 1700 case RD::RK_IntegerAnd: 1701 return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags); 1702 case RD::RK_IntegerOr: 1703 return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags); 1704 case RD::RK_IntegerXor: 1705 return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags); 1706 case RD::RK_IntegerMinMax: { 1707 RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); 1708 Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax); 1709 Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin); 1710 return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags); 1711 } 1712 case RD::RK_FloatMinMax: { 1713 Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax; 1714 return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags); 1715 } 1716 default: 1717 llvm_unreachable("Unhandled RecKind"); 1718 } 1719 } 1720 1721 void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { 1722 auto *VecOp = dyn_cast<Instruction>(I); 1723 if (!VecOp) 1724 return; 1725 auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0]) 1726 : dyn_cast<Instruction>(OpValue); 1727 if (!Intersection) 1728 return; 1729 const unsigned Opcode = Intersection->getOpcode(); 1730 VecOp->copyIRFlags(Intersection); 1731 for (auto *V : VL) { 1732 auto *Instr = dyn_cast<Instruction>(V); 1733 if (!Instr) 1734 continue; 1735 if (OpValue == nullptr || Opcode == Instr->getOpcode()) 1736 VecOp->andIRFlags(V); 1737 } 1738 } 1739