1 //===-- LoopUtils.cpp - Loop Utility functions -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines common loop utility functions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Utils/LoopUtils.h" 15 #include "llvm/ADT/ScopeExit.h" 16 #include "llvm/Analysis/AliasAnalysis.h" 17 #include "llvm/Analysis/BasicAliasAnalysis.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/LoopInfo.h" 20 #include "llvm/Analysis/LoopPass.h" 21 #include "llvm/Analysis/ScalarEvolution.h" 22 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 23 #include "llvm/Analysis/ScalarEvolutionExpander.h" 24 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 25 #include "llvm/Analysis/TargetTransformInfo.h" 26 #include "llvm/Analysis/ValueTracking.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/Instructions.h" 29 #include "llvm/IR/Module.h" 30 #include "llvm/IR/PatternMatch.h" 31 #include "llvm/IR/ValueHandle.h" 32 #include "llvm/Pass.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/KnownBits.h" 35 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 36 37 using namespace llvm; 38 using namespace llvm::PatternMatch; 39 40 #define DEBUG_TYPE "loop-utils" 41 42 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, 43 SmallPtrSetImpl<Instruction *> &Set) { 44 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) 45 if (!Set.count(dyn_cast<Instruction>(*Use))) 46 return false; 47 return true; 48 } 49 50 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { 51 switch (Kind) { 52 default: 53 break; 54 case RK_IntegerAdd: 55 case RK_IntegerMult: 56 case RK_IntegerOr: 57 case RK_IntegerAnd: 58 case RK_IntegerXor: 59 case RK_IntegerMinMax: 60 return true; 61 } 62 return false; 63 } 64 65 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { 66 return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); 67 } 68 69 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { 70 switch (Kind) { 71 default: 72 break; 73 case RK_IntegerAdd: 74 case RK_IntegerMult: 75 case RK_FloatAdd: 76 case RK_FloatMult: 77 return true; 78 } 79 return false; 80 } 81 82 /// Determines if Phi may have been type-promoted. If Phi has a single user 83 /// that ANDs the Phi with a type mask, return the user. RT is updated to 84 /// account for the narrower bit width represented by the mask, and the AND 85 /// instruction is added to CI. 86 static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, 87 SmallPtrSetImpl<Instruction *> &Visited, 88 SmallPtrSetImpl<Instruction *> &CI) { 89 if (!Phi->hasOneUse()) 90 return Phi; 91 92 const APInt *M = nullptr; 93 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); 94 95 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT 96 // with a new integer type of the corresponding bit width. 97 if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { 98 int32_t Bits = (*M + 1).exactLogBase2(); 99 if (Bits > 0) { 100 RT = IntegerType::get(Phi->getContext(), Bits); 101 Visited.insert(Phi); 102 CI.insert(J); 103 return J; 104 } 105 } 106 return Phi; 107 } 108 109 /// Compute the minimal bit width needed to represent a reduction whose exit 110 /// instruction is given by Exit. 111 static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit, 112 DemandedBits *DB, 113 AssumptionCache *AC, 114 DominatorTree *DT) { 115 bool IsSigned = false; 116 const DataLayout &DL = Exit->getModule()->getDataLayout(); 117 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType()); 118 119 if (DB) { 120 // Use the demanded bits analysis to determine the bits that are live out 121 // of the exit instruction, rounding up to the nearest power of two. If the 122 // use of demanded bits results in a smaller bit width, we know the value 123 // must be positive (i.e., IsSigned = false), because if this were not the 124 // case, the sign bit would have been demanded. 125 auto Mask = DB->getDemandedBits(Exit); 126 MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros(); 127 } 128 129 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) { 130 // If demanded bits wasn't able to limit the bit width, we can try to use 131 // value tracking instead. This can be the case, for example, if the value 132 // may be negative. 133 auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT); 134 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType()); 135 MaxBitWidth = NumTypeBits - NumSignBits; 136 KnownBits Bits = computeKnownBits(Exit, DL); 137 if (!Bits.isNonNegative()) { 138 // If the value is not known to be non-negative, we set IsSigned to true, 139 // meaning that we will use sext instructions instead of zext 140 // instructions to restore the original type. 141 IsSigned = true; 142 if (!Bits.isNegative()) 143 // If the value is not known to be negative, we don't known what the 144 // upper bit is, and therefore, we don't know what kind of extend we 145 // will need. In this case, just increase the bit width by one bit and 146 // use sext. 147 ++MaxBitWidth; 148 } 149 } 150 if (!isPowerOf2_64(MaxBitWidth)) 151 MaxBitWidth = NextPowerOf2(MaxBitWidth); 152 153 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth), 154 IsSigned); 155 } 156 157 /// Collect cast instructions that can be ignored in the vectorizer's cost 158 /// model, given a reduction exit value and the minimal type in which the 159 /// reduction can be represented. 160 static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, 161 Type *RecurrenceType, 162 SmallPtrSetImpl<Instruction *> &Casts) { 163 164 SmallVector<Instruction *, 8> Worklist; 165 SmallPtrSet<Instruction *, 8> Visited; 166 Worklist.push_back(Exit); 167 168 while (!Worklist.empty()) { 169 Instruction *Val = Worklist.pop_back_val(); 170 Visited.insert(Val); 171 if (auto *Cast = dyn_cast<CastInst>(Val)) 172 if (Cast->getSrcTy() == RecurrenceType) { 173 // If the source type of a cast instruction is equal to the recurrence 174 // type, it will be eliminated, and should be ignored in the vectorizer 175 // cost model. 176 Casts.insert(Cast); 177 continue; 178 } 179 180 // Add all operands to the work list if they are loop-varying values that 181 // we haven't yet visited. 182 for (Value *O : cast<User>(Val)->operands()) 183 if (auto *I = dyn_cast<Instruction>(O)) 184 if (TheLoop->contains(I) && !Visited.count(I)) 185 Worklist.push_back(I); 186 } 187 } 188 189 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, 190 Loop *TheLoop, bool HasFunNoNaNAttr, 191 RecurrenceDescriptor &RedDes, 192 DemandedBits *DB, 193 AssumptionCache *AC, 194 DominatorTree *DT) { 195 if (Phi->getNumIncomingValues() != 2) 196 return false; 197 198 // Reduction variables are only found in the loop header block. 199 if (Phi->getParent() != TheLoop->getHeader()) 200 return false; 201 202 // Obtain the reduction start value from the value that comes from the loop 203 // preheader. 204 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); 205 206 // ExitInstruction is the single value which is used outside the loop. 207 // We only allow for a single reduction value to be used outside the loop. 208 // This includes users of the reduction, variables (which form a cycle 209 // which ends in the phi node). 210 Instruction *ExitInstruction = nullptr; 211 // Indicates that we found a reduction operation in our scan. 212 bool FoundReduxOp = false; 213 214 // We start with the PHI node and scan for all of the users of this 215 // instruction. All users must be instructions that can be used as reduction 216 // variables (such as ADD). We must have a single out-of-block user. The cycle 217 // must include the original PHI. 218 bool FoundStartPHI = false; 219 220 // To recognize min/max patterns formed by a icmp select sequence, we store 221 // the number of instruction we saw from the recognized min/max pattern, 222 // to make sure we only see exactly the two instructions. 223 unsigned NumCmpSelectPatternInst = 0; 224 InstDesc ReduxDesc(false, nullptr); 225 226 // Data used for determining if the recurrence has been type-promoted. 227 Type *RecurrenceType = Phi->getType(); 228 SmallPtrSet<Instruction *, 4> CastInsts; 229 Instruction *Start = Phi; 230 bool IsSigned = false; 231 232 SmallPtrSet<Instruction *, 8> VisitedInsts; 233 SmallVector<Instruction *, 8> Worklist; 234 235 // Return early if the recurrence kind does not match the type of Phi. If the 236 // recurrence kind is arithmetic, we attempt to look through AND operations 237 // resulting from the type promotion performed by InstCombine. Vector 238 // operations are not limited to the legal integer widths, so we may be able 239 // to evaluate the reduction in the narrower width. 240 if (RecurrenceType->isFloatingPointTy()) { 241 if (!isFloatingPointRecurrenceKind(Kind)) 242 return false; 243 } else { 244 if (!isIntegerRecurrenceKind(Kind)) 245 return false; 246 if (isArithmeticRecurrenceKind(Kind)) 247 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); 248 } 249 250 Worklist.push_back(Start); 251 VisitedInsts.insert(Start); 252 253 // A value in the reduction can be used: 254 // - By the reduction: 255 // - Reduction operation: 256 // - One use of reduction value (safe). 257 // - Multiple use of reduction value (not safe). 258 // - PHI: 259 // - All uses of the PHI must be the reduction (safe). 260 // - Otherwise, not safe. 261 // - By instructions outside of the loop (safe). 262 // * One value may have several outside users, but all outside 263 // uses must be of the same value. 264 // - By an instruction that is not part of the reduction (not safe). 265 // This is either: 266 // * An instruction type other than PHI or the reduction operation. 267 // * A PHI in the header other than the initial PHI. 268 while (!Worklist.empty()) { 269 Instruction *Cur = Worklist.back(); 270 Worklist.pop_back(); 271 272 // No Users. 273 // If the instruction has no users then this is a broken chain and can't be 274 // a reduction variable. 275 if (Cur->use_empty()) 276 return false; 277 278 bool IsAPhi = isa<PHINode>(Cur); 279 280 // A header PHI use other than the original PHI. 281 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) 282 return false; 283 284 // Reductions of instructions such as Div, and Sub is only possible if the 285 // LHS is the reduction variable. 286 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && 287 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && 288 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) 289 return false; 290 291 // Any reduction instruction must be of one of the allowed kinds. We ignore 292 // the starting value (the Phi or an AND instruction if the Phi has been 293 // type-promoted). 294 if (Cur != Start) { 295 ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); 296 if (!ReduxDesc.isRecurrence()) 297 return false; 298 } 299 300 // A reduction operation must only have one use of the reduction value. 301 if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && 302 hasMultipleUsesOf(Cur, VisitedInsts)) 303 return false; 304 305 // All inputs to a PHI node must be a reduction value. 306 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) 307 return false; 308 309 if (Kind == RK_IntegerMinMax && 310 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) 311 ++NumCmpSelectPatternInst; 312 if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) 313 ++NumCmpSelectPatternInst; 314 315 // Check whether we found a reduction operator. 316 FoundReduxOp |= !IsAPhi && Cur != Start; 317 318 // Process users of current instruction. Push non-PHI nodes after PHI nodes 319 // onto the stack. This way we are going to have seen all inputs to PHI 320 // nodes once we get to them. 321 SmallVector<Instruction *, 8> NonPHIs; 322 SmallVector<Instruction *, 8> PHIs; 323 for (User *U : Cur->users()) { 324 Instruction *UI = cast<Instruction>(U); 325 326 // Check if we found the exit user. 327 BasicBlock *Parent = UI->getParent(); 328 if (!TheLoop->contains(Parent)) { 329 // If we already know this instruction is used externally, move on to 330 // the next user. 331 if (ExitInstruction == Cur) 332 continue; 333 334 // Exit if you find multiple values used outside or if the header phi 335 // node is being used. In this case the user uses the value of the 336 // previous iteration, in which case we would loose "VF-1" iterations of 337 // the reduction operation if we vectorize. 338 if (ExitInstruction != nullptr || Cur == Phi) 339 return false; 340 341 // The instruction used by an outside user must be the last instruction 342 // before we feed back to the reduction phi. Otherwise, we loose VF-1 343 // operations on the value. 344 if (!is_contained(Phi->operands(), Cur)) 345 return false; 346 347 ExitInstruction = Cur; 348 continue; 349 } 350 351 // Process instructions only once (termination). Each reduction cycle 352 // value must only be used once, except by phi nodes and min/max 353 // reductions which are represented as a cmp followed by a select. 354 InstDesc IgnoredVal(false, nullptr); 355 if (VisitedInsts.insert(UI).second) { 356 if (isa<PHINode>(UI)) 357 PHIs.push_back(UI); 358 else 359 NonPHIs.push_back(UI); 360 } else if (!isa<PHINode>(UI) && 361 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && 362 !isa<SelectInst>(UI)) || 363 !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) 364 return false; 365 366 // Remember that we completed the cycle. 367 if (UI == Phi) 368 FoundStartPHI = true; 369 } 370 Worklist.append(PHIs.begin(), PHIs.end()); 371 Worklist.append(NonPHIs.begin(), NonPHIs.end()); 372 } 373 374 // This means we have seen one but not the other instruction of the 375 // pattern or more than just a select and cmp. 376 if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && 377 NumCmpSelectPatternInst != 2) 378 return false; 379 380 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) 381 return false; 382 383 if (Start != Phi) { 384 // If the starting value is not the same as the phi node, we speculatively 385 // looked through an 'and' instruction when evaluating a potential 386 // arithmetic reduction to determine if it may have been type-promoted. 387 // 388 // We now compute the minimal bit width that is required to represent the 389 // reduction. If this is the same width that was indicated by the 'and', we 390 // can represent the reduction in the smaller type. The 'and' instruction 391 // will be eliminated since it will essentially be a cast instruction that 392 // can be ignore in the cost model. If we compute a different type than we 393 // did when evaluating the 'and', the 'and' will not be eliminated, and we 394 // will end up with different kinds of operations in the recurrence 395 // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is 396 // the case. 397 // 398 // The vectorizer relies on InstCombine to perform the actual 399 // type-shrinking. It does this by inserting instructions to truncate the 400 // exit value of the reduction to the width indicated by RecurrenceType and 401 // then extend this value back to the original width. If IsSigned is false, 402 // a 'zext' instruction will be generated; otherwise, a 'sext' will be 403 // used. 404 // 405 // TODO: We should not rely on InstCombine to rewrite the reduction in the 406 // smaller type. We should just generate a correctly typed expression 407 // to begin with. 408 Type *ComputedType; 409 std::tie(ComputedType, IsSigned) = 410 computeRecurrenceType(ExitInstruction, DB, AC, DT); 411 if (ComputedType != RecurrenceType) 412 return false; 413 414 // The recurrence expression will be represented in a narrower type. If 415 // there are any cast instructions that will be unnecessary, collect them 416 // in CastInsts. Note that the 'and' instruction was already included in 417 // this list. 418 // 419 // TODO: A better way to represent this may be to tag in some way all the 420 // instructions that are a part of the reduction. The vectorizer cost 421 // model could then apply the recurrence type to these instructions, 422 // without needing a white list of instructions to ignore. 423 collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts); 424 } 425 426 // We found a reduction var if we have reached the original phi node and we 427 // only have a single instruction with out-of-loop users. 428 429 // The ExitInstruction(Instruction which is allowed to have out-of-loop users) 430 // is saved as part of the RecurrenceDescriptor. 431 432 // Save the description of this reduction variable. 433 RecurrenceDescriptor RD( 434 RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), 435 ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); 436 RedDes = RD; 437 438 return true; 439 } 440 441 /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction 442 /// pattern corresponding to a min(X, Y) or max(X, Y). 443 RecurrenceDescriptor::InstDesc 444 RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { 445 446 assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && 447 "Expect a select instruction"); 448 Instruction *Cmp = nullptr; 449 SelectInst *Select = nullptr; 450 451 // We must handle the select(cmp()) as a single instruction. Advance to the 452 // select. 453 if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) { 454 if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin()))) 455 return InstDesc(false, I); 456 return InstDesc(Select, Prev.getMinMaxKind()); 457 } 458 459 // Only handle single use cases for now. 460 if (!(Select = dyn_cast<SelectInst>(I))) 461 return InstDesc(false, I); 462 if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && 463 !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) 464 return InstDesc(false, I); 465 if (!Cmp->hasOneUse()) 466 return InstDesc(false, I); 467 468 Value *CmpLeft; 469 Value *CmpRight; 470 471 // Look for a min/max pattern. 472 if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 473 return InstDesc(Select, MRK_UIntMin); 474 else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 475 return InstDesc(Select, MRK_UIntMax); 476 else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 477 return InstDesc(Select, MRK_SIntMax); 478 else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 479 return InstDesc(Select, MRK_SIntMin); 480 else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 481 return InstDesc(Select, MRK_FloatMin); 482 else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 483 return InstDesc(Select, MRK_FloatMax); 484 else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 485 return InstDesc(Select, MRK_FloatMin); 486 else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 487 return InstDesc(Select, MRK_FloatMax); 488 489 return InstDesc(false, I); 490 } 491 492 RecurrenceDescriptor::InstDesc 493 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, 494 InstDesc &Prev, bool HasFunNoNaNAttr) { 495 bool FP = I->getType()->isFloatingPointTy(); 496 Instruction *UAI = Prev.getUnsafeAlgebraInst(); 497 if (!UAI && FP && !I->isFast()) 498 UAI = I; // Found an unsafe (unvectorizable) algebra instruction. 499 500 switch (I->getOpcode()) { 501 default: 502 return InstDesc(false, I); 503 case Instruction::PHI: 504 return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); 505 case Instruction::Sub: 506 case Instruction::Add: 507 return InstDesc(Kind == RK_IntegerAdd, I); 508 case Instruction::Mul: 509 return InstDesc(Kind == RK_IntegerMult, I); 510 case Instruction::And: 511 return InstDesc(Kind == RK_IntegerAnd, I); 512 case Instruction::Or: 513 return InstDesc(Kind == RK_IntegerOr, I); 514 case Instruction::Xor: 515 return InstDesc(Kind == RK_IntegerXor, I); 516 case Instruction::FMul: 517 return InstDesc(Kind == RK_FloatMult, I, UAI); 518 case Instruction::FSub: 519 case Instruction::FAdd: 520 return InstDesc(Kind == RK_FloatAdd, I, UAI); 521 case Instruction::FCmp: 522 case Instruction::ICmp: 523 case Instruction::Select: 524 if (Kind != RK_IntegerMinMax && 525 (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) 526 return InstDesc(false, I); 527 return isMinMaxSelectCmpPattern(I, Prev); 528 } 529 } 530 531 bool RecurrenceDescriptor::hasMultipleUsesOf( 532 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) { 533 unsigned NumUses = 0; 534 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; 535 ++Use) { 536 if (Insts.count(dyn_cast<Instruction>(*Use))) 537 ++NumUses; 538 if (NumUses > 1) 539 return true; 540 } 541 542 return false; 543 } 544 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, 545 RecurrenceDescriptor &RedDes, 546 DemandedBits *DB, AssumptionCache *AC, 547 DominatorTree *DT) { 548 549 BasicBlock *Header = TheLoop->getHeader(); 550 Function &F = *Header->getParent(); 551 bool HasFunNoNaNAttr = 552 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 553 554 if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 555 AC, DT)) { 556 DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); 557 return true; 558 } 559 if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, 560 AC, DT)) { 561 DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); 562 return true; 563 } 564 if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB, 565 AC, DT)) { 566 DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); 567 return true; 568 } 569 if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 570 AC, DT)) { 571 DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); 572 return true; 573 } 574 if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB, 575 AC, DT)) { 576 DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); 577 return true; 578 } 579 if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes, 580 DB, AC, DT)) { 581 DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); 582 return true; 583 } 584 if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, 585 AC, DT)) { 586 DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); 587 return true; 588 } 589 if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 590 AC, DT)) { 591 DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); 592 return true; 593 } 594 if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB, 595 AC, DT)) { 596 DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); 597 return true; 598 } 599 // Not a reduction of known type. 600 return false; 601 } 602 603 bool RecurrenceDescriptor::isFirstOrderRecurrence( 604 PHINode *Phi, Loop *TheLoop, 605 DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) { 606 607 // Ensure the phi node is in the loop header and has two incoming values. 608 if (Phi->getParent() != TheLoop->getHeader() || 609 Phi->getNumIncomingValues() != 2) 610 return false; 611 612 // Ensure the loop has a preheader and a single latch block. The loop 613 // vectorizer will need the latch to set up the next iteration of the loop. 614 auto *Preheader = TheLoop->getLoopPreheader(); 615 auto *Latch = TheLoop->getLoopLatch(); 616 if (!Preheader || !Latch) 617 return false; 618 619 // Ensure the phi node's incoming blocks are the loop preheader and latch. 620 if (Phi->getBasicBlockIndex(Preheader) < 0 || 621 Phi->getBasicBlockIndex(Latch) < 0) 622 return false; 623 624 // Get the previous value. The previous value comes from the latch edge while 625 // the initial value comes form the preheader edge. 626 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); 627 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) || 628 SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. 629 return false; 630 631 // Ensure every user of the phi node is dominated by the previous value. 632 // The dominance requirement ensures the loop vectorizer will not need to 633 // vectorize the initial value prior to the first iteration of the loop. 634 // TODO: Consider extending this sinking to handle other kinds of instructions 635 // and expressions, beyond sinking a single cast past Previous. 636 if (Phi->hasOneUse()) { 637 auto *I = Phi->user_back(); 638 if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() && 639 DT->dominates(Previous, I->user_back())) { 640 if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking. 641 SinkAfter[I] = Previous; 642 return true; 643 } 644 } 645 646 for (User *U : Phi->users()) 647 if (auto *I = dyn_cast<Instruction>(U)) { 648 if (!DT->dominates(Previous, I)) 649 return false; 650 } 651 652 return true; 653 } 654 655 /// This function returns the identity element (or neutral element) for 656 /// the operation K. 657 Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, 658 Type *Tp) { 659 switch (K) { 660 case RK_IntegerXor: 661 case RK_IntegerAdd: 662 case RK_IntegerOr: 663 // Adding, Xoring, Oring zero to a number does not change it. 664 return ConstantInt::get(Tp, 0); 665 case RK_IntegerMult: 666 // Multiplying a number by 1 does not change it. 667 return ConstantInt::get(Tp, 1); 668 case RK_IntegerAnd: 669 // AND-ing a number with an all-1 value does not change it. 670 return ConstantInt::get(Tp, -1, true); 671 case RK_FloatMult: 672 // Multiplying a number by 1 does not change it. 673 return ConstantFP::get(Tp, 1.0L); 674 case RK_FloatAdd: 675 // Adding zero to a number does not change it. 676 return ConstantFP::get(Tp, 0.0L); 677 default: 678 llvm_unreachable("Unknown recurrence kind"); 679 } 680 } 681 682 /// This function translates the recurrence kind to an LLVM binary operator. 683 unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { 684 switch (Kind) { 685 case RK_IntegerAdd: 686 return Instruction::Add; 687 case RK_IntegerMult: 688 return Instruction::Mul; 689 case RK_IntegerOr: 690 return Instruction::Or; 691 case RK_IntegerAnd: 692 return Instruction::And; 693 case RK_IntegerXor: 694 return Instruction::Xor; 695 case RK_FloatMult: 696 return Instruction::FMul; 697 case RK_FloatAdd: 698 return Instruction::FAdd; 699 case RK_IntegerMinMax: 700 return Instruction::ICmp; 701 case RK_FloatMinMax: 702 return Instruction::FCmp; 703 default: 704 llvm_unreachable("Unknown recurrence operation"); 705 } 706 } 707 708 Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, 709 MinMaxRecurrenceKind RK, 710 Value *Left, Value *Right) { 711 CmpInst::Predicate P = CmpInst::ICMP_NE; 712 switch (RK) { 713 default: 714 llvm_unreachable("Unknown min/max recurrence kind"); 715 case MRK_UIntMin: 716 P = CmpInst::ICMP_ULT; 717 break; 718 case MRK_UIntMax: 719 P = CmpInst::ICMP_UGT; 720 break; 721 case MRK_SIntMin: 722 P = CmpInst::ICMP_SLT; 723 break; 724 case MRK_SIntMax: 725 P = CmpInst::ICMP_SGT; 726 break; 727 case MRK_FloatMin: 728 P = CmpInst::FCMP_OLT; 729 break; 730 case MRK_FloatMax: 731 P = CmpInst::FCMP_OGT; 732 break; 733 } 734 735 // We only match FP sequences that are 'fast', so we can unconditionally 736 // set it on any generated instructions. 737 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 738 FastMathFlags FMF; 739 FMF.setFast(); 740 Builder.setFastMathFlags(FMF); 741 742 Value *Cmp; 743 if (RK == MRK_FloatMin || RK == MRK_FloatMax) 744 Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); 745 else 746 Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); 747 748 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); 749 return Select; 750 } 751 752 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, 753 const SCEV *Step, BinaryOperator *BOp, 754 SmallVectorImpl<Instruction *> *Casts) 755 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { 756 assert(IK != IK_NoInduction && "Not an induction"); 757 758 // Start value type should match the induction kind and the value 759 // itself should not be null. 760 assert(StartValue && "StartValue is null"); 761 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && 762 "StartValue is not a pointer for pointer induction"); 763 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && 764 "StartValue is not an integer for integer induction"); 765 766 // Check the Step Value. It should be non-zero integer value. 767 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && 768 "Step value is zero"); 769 770 assert((IK != IK_PtrInduction || getConstIntStepValue()) && 771 "Step value should be constant for pointer induction"); 772 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && 773 "StepValue is not an integer"); 774 775 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && 776 "StepValue is not FP for FpInduction"); 777 assert((IK != IK_FpInduction || (InductionBinOp && 778 (InductionBinOp->getOpcode() == Instruction::FAdd || 779 InductionBinOp->getOpcode() == Instruction::FSub))) && 780 "Binary opcode should be specified for FP induction"); 781 782 if (Casts) { 783 for (auto &Inst : *Casts) { 784 RedundantCasts.push_back(Inst); 785 } 786 } 787 } 788 789 int InductionDescriptor::getConsecutiveDirection() const { 790 ConstantInt *ConstStep = getConstIntStepValue(); 791 if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) 792 return ConstStep->getSExtValue(); 793 return 0; 794 } 795 796 ConstantInt *InductionDescriptor::getConstIntStepValue() const { 797 if (isa<SCEVConstant>(Step)) 798 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); 799 return nullptr; 800 } 801 802 Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, 803 ScalarEvolution *SE, 804 const DataLayout& DL) const { 805 806 SCEVExpander Exp(*SE, DL, "induction"); 807 assert(Index->getType() == Step->getType() && 808 "Index type does not match StepValue type"); 809 switch (IK) { 810 case IK_IntInduction: { 811 assert(Index->getType() == StartValue->getType() && 812 "Index type does not match StartValue type"); 813 814 // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution 815 // and calculate (Start + Index * Step) for all cases, without 816 // special handling for "isOne" and "isMinusOne". 817 // But in the real life the result code getting worse. We mix SCEV 818 // expressions and ADD/SUB operations and receive redundant 819 // intermediate values being calculated in different ways and 820 // Instcombine is unable to reduce them all. 821 822 if (getConstIntStepValue() && 823 getConstIntStepValue()->isMinusOne()) 824 return B.CreateSub(StartValue, Index); 825 if (getConstIntStepValue() && 826 getConstIntStepValue()->isOne()) 827 return B.CreateAdd(StartValue, Index); 828 const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), 829 SE->getMulExpr(Step, SE->getSCEV(Index))); 830 return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); 831 } 832 case IK_PtrInduction: { 833 assert(isa<SCEVConstant>(Step) && 834 "Expected constant step for pointer induction"); 835 const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); 836 Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); 837 return B.CreateGEP(nullptr, StartValue, Index); 838 } 839 case IK_FpInduction: { 840 assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); 841 assert(InductionBinOp && 842 (InductionBinOp->getOpcode() == Instruction::FAdd || 843 InductionBinOp->getOpcode() == Instruction::FSub) && 844 "Original bin op should be defined for FP induction"); 845 846 Value *StepValue = cast<SCEVUnknown>(Step)->getValue(); 847 848 // Floating point operations had to be 'fast' to enable the induction. 849 FastMathFlags Flags; 850 Flags.setFast(); 851 852 Value *MulExp = B.CreateFMul(StepValue, Index); 853 if (isa<Instruction>(MulExp)) 854 // We have to check, the MulExp may be a constant. 855 cast<Instruction>(MulExp)->setFastMathFlags(Flags); 856 857 Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue, 858 MulExp, "induction"); 859 if (isa<Instruction>(BOp)) 860 cast<Instruction>(BOp)->setFastMathFlags(Flags); 861 862 return BOp; 863 } 864 case IK_NoInduction: 865 return nullptr; 866 } 867 llvm_unreachable("invalid enum"); 868 } 869 870 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, 871 ScalarEvolution *SE, 872 InductionDescriptor &D) { 873 874 // Here we only handle FP induction variables. 875 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); 876 877 if (TheLoop->getHeader() != Phi->getParent()) 878 return false; 879 880 // The loop may have multiple entrances or multiple exits; we can analyze 881 // this phi if it has a unique entry value and a unique backedge value. 882 if (Phi->getNumIncomingValues() != 2) 883 return false; 884 Value *BEValue = nullptr, *StartValue = nullptr; 885 if (TheLoop->contains(Phi->getIncomingBlock(0))) { 886 BEValue = Phi->getIncomingValue(0); 887 StartValue = Phi->getIncomingValue(1); 888 } else { 889 assert(TheLoop->contains(Phi->getIncomingBlock(1)) && 890 "Unexpected Phi node in the loop"); 891 BEValue = Phi->getIncomingValue(1); 892 StartValue = Phi->getIncomingValue(0); 893 } 894 895 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); 896 if (!BOp) 897 return false; 898 899 Value *Addend = nullptr; 900 if (BOp->getOpcode() == Instruction::FAdd) { 901 if (BOp->getOperand(0) == Phi) 902 Addend = BOp->getOperand(1); 903 else if (BOp->getOperand(1) == Phi) 904 Addend = BOp->getOperand(0); 905 } else if (BOp->getOpcode() == Instruction::FSub) 906 if (BOp->getOperand(0) == Phi) 907 Addend = BOp->getOperand(1); 908 909 if (!Addend) 910 return false; 911 912 // The addend should be loop invariant 913 if (auto *I = dyn_cast<Instruction>(Addend)) 914 if (TheLoop->contains(I)) 915 return false; 916 917 // FP Step has unknown SCEV 918 const SCEV *Step = SE->getUnknown(Addend); 919 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); 920 return true; 921 } 922 923 /// This function is called when we suspect that the update-chain of a phi node 924 /// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, 925 /// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime 926 /// predicate P under which the SCEV expression for the phi can be the 927 /// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the 928 /// cast instructions that are involved in the update-chain of this induction. 929 /// A caller that adds the required runtime predicate can be free to drop these 930 /// cast instructions, and compute the phi using \p AR (instead of some scev 931 /// expression with casts). 932 /// 933 /// For example, without a predicate the scev expression can take the following 934 /// form: 935 /// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) 936 /// 937 /// It corresponds to the following IR sequence: 938 /// %for.body: 939 /// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] 940 /// %casted_phi = "ExtTrunc i64 %x" 941 /// %add = add i64 %casted_phi, %step 942 /// 943 /// where %x is given in \p PN, 944 /// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate, 945 /// and the IR sequence that "ExtTrunc i64 %x" represents can take one of 946 /// several forms, for example, such as: 947 /// ExtTrunc1: %casted_phi = and %x, 2^n-1 948 /// or: 949 /// ExtTrunc2: %t = shl %x, m 950 /// %casted_phi = ashr %t, m 951 /// 952 /// If we are able to find such sequence, we return the instructions 953 /// we found, namely %casted_phi and the instructions on its use-def chain up 954 /// to the phi (not including the phi). 955 static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, 956 const SCEVUnknown *PhiScev, 957 const SCEVAddRecExpr *AR, 958 SmallVectorImpl<Instruction *> &CastInsts) { 959 960 assert(CastInsts.empty() && "CastInsts is expected to be empty."); 961 auto *PN = cast<PHINode>(PhiScev->getValue()); 962 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); 963 const Loop *L = AR->getLoop(); 964 965 // Find any cast instructions that participate in the def-use chain of 966 // PhiScev in the loop. 967 // FORNOW/TODO: We currently expect the def-use chain to include only 968 // two-operand instructions, where one of the operands is an invariant. 969 // createAddRecFromPHIWithCasts() currently does not support anything more 970 // involved than that, so we keep the search simple. This can be 971 // extended/generalized as needed. 972 973 auto getDef = [&](const Value *Val) -> Value * { 974 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val); 975 if (!BinOp) 976 return nullptr; 977 Value *Op0 = BinOp->getOperand(0); 978 Value *Op1 = BinOp->getOperand(1); 979 Value *Def = nullptr; 980 if (L->isLoopInvariant(Op0)) 981 Def = Op1; 982 else if (L->isLoopInvariant(Op1)) 983 Def = Op0; 984 return Def; 985 }; 986 987 // Look for the instruction that defines the induction via the 988 // loop backedge. 989 BasicBlock *Latch = L->getLoopLatch(); 990 if (!Latch) 991 return false; 992 Value *Val = PN->getIncomingValueForBlock(Latch); 993 if (!Val) 994 return false; 995 996 // Follow the def-use chain until the induction phi is reached. 997 // If on the way we encounter a Value that has the same SCEV Expr as the 998 // phi node, we can consider the instructions we visit from that point 999 // as part of the cast-sequence that can be ignored. 1000 bool InCastSequence = false; 1001 auto *Inst = dyn_cast<Instruction>(Val); 1002 while (Val != PN) { 1003 // If we encountered a phi node other than PN, or if we left the loop, 1004 // we bail out. 1005 if (!Inst || !L->contains(Inst)) { 1006 return false; 1007 } 1008 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val)); 1009 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR)) 1010 InCastSequence = true; 1011 if (InCastSequence) { 1012 // Only the last instruction in the cast sequence is expected to have 1013 // uses outside the induction def-use chain. 1014 if (!CastInsts.empty()) 1015 if (!Inst->hasOneUse()) 1016 return false; 1017 CastInsts.push_back(Inst); 1018 } 1019 Val = getDef(Val); 1020 if (!Val) 1021 return false; 1022 Inst = dyn_cast<Instruction>(Val); 1023 } 1024 1025 return InCastSequence; 1026 } 1027 1028 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 1029 PredicatedScalarEvolution &PSE, 1030 InductionDescriptor &D, 1031 bool Assume) { 1032 Type *PhiTy = Phi->getType(); 1033 1034 // Handle integer and pointer inductions variables. 1035 // Now we handle also FP induction but not trying to make a 1036 // recurrent expression from the PHI node in-place. 1037 1038 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && 1039 !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) 1040 return false; 1041 1042 if (PhiTy->isFloatingPointTy()) 1043 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); 1044 1045 const SCEV *PhiScev = PSE.getSCEV(Phi); 1046 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1047 1048 // We need this expression to be an AddRecExpr. 1049 if (Assume && !AR) 1050 AR = PSE.getAsAddRec(Phi); 1051 1052 if (!AR) { 1053 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1054 return false; 1055 } 1056 1057 // Record any Cast instructions that participate in the induction update 1058 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev); 1059 // If we started from an UnknownSCEV, and managed to build an addRecurrence 1060 // only after enabling Assume with PSCEV, this means we may have encountered 1061 // cast instructions that required adding a runtime check in order to 1062 // guarantee the correctness of the AddRecurence respresentation of the 1063 // induction. 1064 if (PhiScev != AR && SymbolicPhi) { 1065 SmallVector<Instruction *, 2> Casts; 1066 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts)) 1067 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts); 1068 } 1069 1070 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); 1071 } 1072 1073 bool InductionDescriptor::isInductionPHI( 1074 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, 1075 InductionDescriptor &D, const SCEV *Expr, 1076 SmallVectorImpl<Instruction *> *CastsToIgnore) { 1077 Type *PhiTy = Phi->getType(); 1078 // We only handle integer and pointer inductions variables. 1079 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) 1080 return false; 1081 1082 // Check that the PHI is consecutive. 1083 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); 1084 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1085 1086 if (!AR) { 1087 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1088 return false; 1089 } 1090 1091 if (AR->getLoop() != TheLoop) { 1092 // FIXME: We should treat this as a uniform. Unfortunately, we 1093 // don't currently know how to handled uniform PHIs. 1094 DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); 1095 return false; 1096 } 1097 1098 Value *StartValue = 1099 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); 1100 const SCEV *Step = AR->getStepRecurrence(*SE); 1101 // Calculate the pointer stride and check if it is consecutive. 1102 // The stride may be a constant or a loop invariant integer value. 1103 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); 1104 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) 1105 return false; 1106 1107 if (PhiTy->isIntegerTy()) { 1108 D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr, 1109 CastsToIgnore); 1110 return true; 1111 } 1112 1113 assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); 1114 // Pointer induction should be a constant. 1115 if (!ConstStep) 1116 return false; 1117 1118 ConstantInt *CV = ConstStep->getValue(); 1119 Type *PointerElementType = PhiTy->getPointerElementType(); 1120 // The pointer stride cannot be determined if the pointer element type is not 1121 // sized. 1122 if (!PointerElementType->isSized()) 1123 return false; 1124 1125 const DataLayout &DL = Phi->getModule()->getDataLayout(); 1126 int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); 1127 if (!Size) 1128 return false; 1129 1130 int64_t CVSize = CV->getSExtValue(); 1131 if (CVSize % Size) 1132 return false; 1133 auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, 1134 true /* signed */); 1135 D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); 1136 return true; 1137 } 1138 1139 bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, 1140 bool PreserveLCSSA) { 1141 bool Changed = false; 1142 1143 // We re-use a vector for the in-loop predecesosrs. 1144 SmallVector<BasicBlock *, 4> InLoopPredecessors; 1145 1146 auto RewriteExit = [&](BasicBlock *BB) { 1147 assert(InLoopPredecessors.empty() && 1148 "Must start with an empty predecessors list!"); 1149 auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); 1150 1151 // See if there are any non-loop predecessors of this exit block and 1152 // keep track of the in-loop predecessors. 1153 bool IsDedicatedExit = true; 1154 for (auto *PredBB : predecessors(BB)) 1155 if (L->contains(PredBB)) { 1156 if (isa<IndirectBrInst>(PredBB->getTerminator())) 1157 // We cannot rewrite exiting edges from an indirectbr. 1158 return false; 1159 1160 InLoopPredecessors.push_back(PredBB); 1161 } else { 1162 IsDedicatedExit = false; 1163 } 1164 1165 assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); 1166 1167 // Nothing to do if this is already a dedicated exit. 1168 if (IsDedicatedExit) 1169 return false; 1170 1171 auto *NewExitBB = SplitBlockPredecessors( 1172 BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA); 1173 1174 if (!NewExitBB) 1175 DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: " 1176 << *L << "\n"); 1177 else 1178 DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " 1179 << NewExitBB->getName() << "\n"); 1180 return true; 1181 }; 1182 1183 // Walk the exit blocks directly rather than building up a data structure for 1184 // them, but only visit each one once. 1185 SmallPtrSet<BasicBlock *, 4> Visited; 1186 for (auto *BB : L->blocks()) 1187 for (auto *SuccBB : successors(BB)) { 1188 // We're looking for exit blocks so skip in-loop successors. 1189 if (L->contains(SuccBB)) 1190 continue; 1191 1192 // Visit each exit block exactly once. 1193 if (!Visited.insert(SuccBB).second) 1194 continue; 1195 1196 Changed |= RewriteExit(SuccBB); 1197 } 1198 1199 return Changed; 1200 } 1201 1202 /// \brief Returns the instructions that use values defined in the loop. 1203 SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { 1204 SmallVector<Instruction *, 8> UsedOutside; 1205 1206 for (auto *Block : L->getBlocks()) 1207 // FIXME: I believe that this could use copy_if if the Inst reference could 1208 // be adapted into a pointer. 1209 for (auto &Inst : *Block) { 1210 auto Users = Inst.users(); 1211 if (any_of(Users, [&](User *U) { 1212 auto *Use = cast<Instruction>(U); 1213 return !L->contains(Use->getParent()); 1214 })) 1215 UsedOutside.push_back(&Inst); 1216 } 1217 1218 return UsedOutside; 1219 } 1220 1221 void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { 1222 // By definition, all loop passes need the LoopInfo analysis and the 1223 // Dominator tree it depends on. Because they all participate in the loop 1224 // pass manager, they must also preserve these. 1225 AU.addRequired<DominatorTreeWrapperPass>(); 1226 AU.addPreserved<DominatorTreeWrapperPass>(); 1227 AU.addRequired<LoopInfoWrapperPass>(); 1228 AU.addPreserved<LoopInfoWrapperPass>(); 1229 1230 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs 1231 // here because users shouldn't directly get them from this header. 1232 extern char &LoopSimplifyID; 1233 extern char &LCSSAID; 1234 AU.addRequiredID(LoopSimplifyID); 1235 AU.addPreservedID(LoopSimplifyID); 1236 AU.addRequiredID(LCSSAID); 1237 AU.addPreservedID(LCSSAID); 1238 // This is used in the LPPassManager to perform LCSSA verification on passes 1239 // which preserve lcssa form 1240 AU.addRequired<LCSSAVerificationPass>(); 1241 AU.addPreserved<LCSSAVerificationPass>(); 1242 1243 // Loop passes are designed to run inside of a loop pass manager which means 1244 // that any function analyses they require must be required by the first loop 1245 // pass in the manager (so that it is computed before the loop pass manager 1246 // runs) and preserved by all loop pasess in the manager. To make this 1247 // reasonably robust, the set needed for most loop passes is maintained here. 1248 // If your loop pass requires an analysis not listed here, you will need to 1249 // carefully audit the loop pass manager nesting structure that results. 1250 AU.addRequired<AAResultsWrapperPass>(); 1251 AU.addPreserved<AAResultsWrapperPass>(); 1252 AU.addPreserved<BasicAAWrapperPass>(); 1253 AU.addPreserved<GlobalsAAWrapperPass>(); 1254 AU.addPreserved<SCEVAAWrapperPass>(); 1255 AU.addRequired<ScalarEvolutionWrapperPass>(); 1256 AU.addPreserved<ScalarEvolutionWrapperPass>(); 1257 } 1258 1259 /// Manually defined generic "LoopPass" dependency initialization. This is used 1260 /// to initialize the exact set of passes from above in \c 1261 /// getLoopAnalysisUsage. It can be used within a loop pass's initialization 1262 /// with: 1263 /// 1264 /// INITIALIZE_PASS_DEPENDENCY(LoopPass) 1265 /// 1266 /// As-if "LoopPass" were a pass. 1267 void llvm::initializeLoopPassPass(PassRegistry &Registry) { 1268 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1269 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1270 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 1271 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 1272 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1273 INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) 1274 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 1275 INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) 1276 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 1277 } 1278 1279 /// \brief Find string metadata for loop 1280 /// 1281 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an 1282 /// operand or null otherwise. If the string metadata is not found return 1283 /// Optional's not-a-value. 1284 Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, 1285 StringRef Name) { 1286 MDNode *LoopID = TheLoop->getLoopID(); 1287 // Return none if LoopID is false. 1288 if (!LoopID) 1289 return None; 1290 1291 // First operand should refer to the loop id itself. 1292 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 1293 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 1294 1295 // Iterate over LoopID operands and look for MDString Metadata 1296 for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { 1297 MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); 1298 if (!MD) 1299 continue; 1300 MDString *S = dyn_cast<MDString>(MD->getOperand(0)); 1301 if (!S) 1302 continue; 1303 // Return true if MDString holds expected MetaData. 1304 if (Name.equals(S->getString())) 1305 switch (MD->getNumOperands()) { 1306 case 1: 1307 return nullptr; 1308 case 2: 1309 return &MD->getOperand(1); 1310 default: 1311 llvm_unreachable("loop metadata has 0 or 1 operand"); 1312 } 1313 } 1314 return None; 1315 } 1316 1317 /// Does a BFS from a given node to all of its children inside a given loop. 1318 /// The returned vector of nodes includes the starting point. 1319 SmallVector<DomTreeNode *, 16> 1320 llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { 1321 SmallVector<DomTreeNode *, 16> Worklist; 1322 auto AddRegionToWorklist = [&](DomTreeNode *DTN) { 1323 // Only include subregions in the top level loop. 1324 BasicBlock *BB = DTN->getBlock(); 1325 if (CurLoop->contains(BB)) 1326 Worklist.push_back(DTN); 1327 }; 1328 1329 AddRegionToWorklist(N); 1330 1331 for (size_t I = 0; I < Worklist.size(); I++) 1332 for (DomTreeNode *Child : Worklist[I]->getChildren()) 1333 AddRegionToWorklist(Child); 1334 1335 return Worklist; 1336 } 1337 1338 void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, 1339 ScalarEvolution *SE = nullptr, 1340 LoopInfo *LI = nullptr) { 1341 assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!"); 1342 auto *Preheader = L->getLoopPreheader(); 1343 assert(Preheader && "Preheader should exist!"); 1344 1345 // Now that we know the removal is safe, remove the loop by changing the 1346 // branch from the preheader to go to the single exit block. 1347 // 1348 // Because we're deleting a large chunk of code at once, the sequence in which 1349 // we remove things is very important to avoid invalidation issues. 1350 1351 // Tell ScalarEvolution that the loop is deleted. Do this before 1352 // deleting the loop so that ScalarEvolution can look at the loop 1353 // to determine what it needs to clean up. 1354 if (SE) 1355 SE->forgetLoop(L); 1356 1357 auto *ExitBlock = L->getUniqueExitBlock(); 1358 assert(ExitBlock && "Should have a unique exit block!"); 1359 assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); 1360 1361 auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator()); 1362 assert(OldBr && "Preheader must end with a branch"); 1363 assert(OldBr->isUnconditional() && "Preheader must have a single successor"); 1364 // Connect the preheader to the exit block. Keep the old edge to the header 1365 // around to perform the dominator tree update in two separate steps 1366 // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge 1367 // preheader -> header. 1368 // 1369 // 1370 // 0. Preheader 1. Preheader 2. Preheader 1371 // | | | | 1372 // V | V | 1373 // Header <--\ | Header <--\ | Header <--\ 1374 // | | | | | | | | | | | 1375 // | V | | | V | | | V | 1376 // | Body --/ | | Body --/ | | Body --/ 1377 // V V V V V 1378 // Exit Exit Exit 1379 // 1380 // By doing this is two separate steps we can perform the dominator tree 1381 // update without using the batch update API. 1382 // 1383 // Even when the loop is never executed, we cannot remove the edge from the 1384 // source block to the exit block. Consider the case where the unexecuted loop 1385 // branches back to an outer loop. If we deleted the loop and removed the edge 1386 // coming to this inner loop, this will break the outer loop structure (by 1387 // deleting the backedge of the outer loop). If the outer loop is indeed a 1388 // non-loop, it will be deleted in a future iteration of loop deletion pass. 1389 IRBuilder<> Builder(OldBr); 1390 Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); 1391 // Remove the old branch. The conditional branch becomes a new terminator. 1392 OldBr->eraseFromParent(); 1393 1394 // Rewrite phis in the exit block to get their inputs from the Preheader 1395 // instead of the exiting block. 1396 for (PHINode &P : ExitBlock->phis()) { 1397 // Set the zero'th element of Phi to be from the preheader and remove all 1398 // other incoming values. Given the loop has dedicated exits, all other 1399 // incoming values must be from the exiting blocks. 1400 int PredIndex = 0; 1401 P.setIncomingBlock(PredIndex, Preheader); 1402 // Removes all incoming values from all other exiting blocks (including 1403 // duplicate values from an exiting block). 1404 // Nuke all entries except the zero'th entry which is the preheader entry. 1405 // NOTE! We need to remove Incoming Values in the reverse order as done 1406 // below, to keep the indices valid for deletion (removeIncomingValues 1407 // updates getNumIncomingValues and shifts all values down into the operand 1408 // being deleted). 1409 for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i) 1410 P.removeIncomingValue(e - i, false); 1411 1412 assert((P.getNumIncomingValues() == 1 && 1413 P.getIncomingBlock(PredIndex) == Preheader) && 1414 "Should have exactly one value and that's from the preheader!"); 1415 } 1416 1417 // Disconnect the loop body by branching directly to its exit. 1418 Builder.SetInsertPoint(Preheader->getTerminator()); 1419 Builder.CreateBr(ExitBlock); 1420 // Remove the old branch. 1421 Preheader->getTerminator()->eraseFromParent(); 1422 1423 if (DT) { 1424 // Update the dominator tree by informing it about the new edge from the 1425 // preheader to the exit. 1426 DT->insertEdge(Preheader, ExitBlock); 1427 // Inform the dominator tree about the removed edge. 1428 DT->deleteEdge(Preheader, L->getHeader()); 1429 } 1430 1431 // Remove the block from the reference counting scheme, so that we can 1432 // delete it freely later. 1433 for (auto *Block : L->blocks()) 1434 Block->dropAllReferences(); 1435 1436 if (LI) { 1437 // Erase the instructions and the blocks without having to worry 1438 // about ordering because we already dropped the references. 1439 // NOTE: This iteration is safe because erasing the block does not remove 1440 // its entry from the loop's block list. We do that in the next section. 1441 for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end(); 1442 LpI != LpE; ++LpI) 1443 (*LpI)->eraseFromParent(); 1444 1445 // Finally, the blocks from loopinfo. This has to happen late because 1446 // otherwise our loop iterators won't work. 1447 1448 SmallPtrSet<BasicBlock *, 8> blocks; 1449 blocks.insert(L->block_begin(), L->block_end()); 1450 for (BasicBlock *BB : blocks) 1451 LI->removeBlock(BB); 1452 1453 // The last step is to update LoopInfo now that we've eliminated this loop. 1454 LI->erase(L); 1455 } 1456 } 1457 1458 /// Returns true if the instruction in a loop is guaranteed to execute at least 1459 /// once. 1460 bool llvm::isGuaranteedToExecute(const Instruction &Inst, 1461 const DominatorTree *DT, const Loop *CurLoop, 1462 const LoopSafetyInfo *SafetyInfo) { 1463 // We have to check to make sure that the instruction dominates all 1464 // of the exit blocks. If it doesn't, then there is a path out of the loop 1465 // which does not execute this instruction, so we can't hoist it. 1466 1467 // If the instruction is in the header block for the loop (which is very 1468 // common), it is always guaranteed to dominate the exit blocks. Since this 1469 // is a common case, and can save some work, check it now. 1470 if (Inst.getParent() == CurLoop->getHeader()) 1471 // If there's a throw in the header block, we can't guarantee we'll reach 1472 // Inst. 1473 return !SafetyInfo->HeaderMayThrow; 1474 1475 // Somewhere in this loop there is an instruction which may throw and make us 1476 // exit the loop. 1477 if (SafetyInfo->MayThrow) 1478 return false; 1479 1480 // Get the exit blocks for the current loop. 1481 SmallVector<BasicBlock *, 8> ExitBlocks; 1482 CurLoop->getExitBlocks(ExitBlocks); 1483 1484 // Verify that the block dominates each of the exit blocks of the loop. 1485 for (BasicBlock *ExitBlock : ExitBlocks) 1486 if (!DT->dominates(Inst.getParent(), ExitBlock)) 1487 return false; 1488 1489 // As a degenerate case, if the loop is statically infinite then we haven't 1490 // proven anything since there are no exit blocks. 1491 if (ExitBlocks.empty()) 1492 return false; 1493 1494 // FIXME: In general, we have to prove that the loop isn't an infinite loop. 1495 // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is 1496 // just a special case of this.) 1497 return true; 1498 } 1499 1500 Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { 1501 // Only support loops with a unique exiting block, and a latch. 1502 if (!L->getExitingBlock()) 1503 return None; 1504 1505 // Get the branch weights for the the loop's backedge. 1506 BranchInst *LatchBR = 1507 dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator()); 1508 if (!LatchBR || LatchBR->getNumSuccessors() != 2) 1509 return None; 1510 1511 assert((LatchBR->getSuccessor(0) == L->getHeader() || 1512 LatchBR->getSuccessor(1) == L->getHeader()) && 1513 "At least one edge out of the latch must go to the header"); 1514 1515 // To estimate the number of times the loop body was executed, we want to 1516 // know the number of times the backedge was taken, vs. the number of times 1517 // we exited the loop. 1518 uint64_t TrueVal, FalseVal; 1519 if (!LatchBR->extractProfMetadata(TrueVal, FalseVal)) 1520 return None; 1521 1522 if (!TrueVal || !FalseVal) 1523 return 0; 1524 1525 // Divide the count of the backedge by the count of the edge exiting the loop, 1526 // rounding to nearest. 1527 if (LatchBR->getSuccessor(0) == L->getHeader()) 1528 return (TrueVal + (FalseVal / 2)) / FalseVal; 1529 else 1530 return (FalseVal + (TrueVal / 2)) / TrueVal; 1531 } 1532 1533 /// \brief Adds a 'fast' flag to floating point operations. 1534 static Value *addFastMathFlag(Value *V) { 1535 if (isa<FPMathOperator>(V)) { 1536 FastMathFlags Flags; 1537 Flags.setFast(); 1538 cast<Instruction>(V)->setFastMathFlags(Flags); 1539 } 1540 return V; 1541 } 1542 1543 // Helper to generate a log2 shuffle reduction. 1544 Value * 1545 llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, 1546 RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, 1547 ArrayRef<Value *> RedOps) { 1548 unsigned VF = Src->getType()->getVectorNumElements(); 1549 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles 1550 // and vector ops, reducing the set of values being computed by half each 1551 // round. 1552 assert(isPowerOf2_32(VF) && 1553 "Reduction emission only supported for pow2 vectors!"); 1554 Value *TmpVec = Src; 1555 SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); 1556 for (unsigned i = VF; i != 1; i >>= 1) { 1557 // Move the upper half of the vector to the lower half. 1558 for (unsigned j = 0; j != i / 2; ++j) 1559 ShuffleMask[j] = Builder.getInt32(i / 2 + j); 1560 1561 // Fill the rest of the mask with undef. 1562 std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), 1563 UndefValue::get(Builder.getInt32Ty())); 1564 1565 Value *Shuf = Builder.CreateShuffleVector( 1566 TmpVec, UndefValue::get(TmpVec->getType()), 1567 ConstantVector::get(ShuffleMask), "rdx.shuf"); 1568 1569 if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 1570 // Floating point operations had to be 'fast' to enable the reduction. 1571 TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, 1572 TmpVec, Shuf, "bin.rdx")); 1573 } else { 1574 assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && 1575 "Invalid min/max"); 1576 TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec, 1577 Shuf); 1578 } 1579 if (!RedOps.empty()) 1580 propagateIRFlags(TmpVec, RedOps); 1581 } 1582 // The result is in the first element of the vector. 1583 return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); 1584 } 1585 1586 /// Create a simple vector reduction specified by an opcode and some 1587 /// flags (if generating min/max reductions). 1588 Value *llvm::createSimpleTargetReduction( 1589 IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, 1590 Value *Src, TargetTransformInfo::ReductionFlags Flags, 1591 ArrayRef<Value *> RedOps) { 1592 assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); 1593 1594 Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); 1595 std::function<Value*()> BuildFunc; 1596 using RD = RecurrenceDescriptor; 1597 RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; 1598 // TODO: Support creating ordered reductions. 1599 FastMathFlags FMFFast; 1600 FMFFast.setFast(); 1601 1602 switch (Opcode) { 1603 case Instruction::Add: 1604 BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; 1605 break; 1606 case Instruction::Mul: 1607 BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; 1608 break; 1609 case Instruction::And: 1610 BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; 1611 break; 1612 case Instruction::Or: 1613 BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; 1614 break; 1615 case Instruction::Xor: 1616 BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; 1617 break; 1618 case Instruction::FAdd: 1619 BuildFunc = [&]() { 1620 auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); 1621 cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); 1622 return Rdx; 1623 }; 1624 break; 1625 case Instruction::FMul: 1626 BuildFunc = [&]() { 1627 auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); 1628 cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); 1629 return Rdx; 1630 }; 1631 break; 1632 case Instruction::ICmp: 1633 if (Flags.IsMaxOp) { 1634 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; 1635 BuildFunc = [&]() { 1636 return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); 1637 }; 1638 } else { 1639 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; 1640 BuildFunc = [&]() { 1641 return Builder.CreateIntMinReduce(Src, Flags.IsSigned); 1642 }; 1643 } 1644 break; 1645 case Instruction::FCmp: 1646 if (Flags.IsMaxOp) { 1647 MinMaxKind = RD::MRK_FloatMax; 1648 BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; 1649 } else { 1650 MinMaxKind = RD::MRK_FloatMin; 1651 BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; 1652 } 1653 break; 1654 default: 1655 llvm_unreachable("Unhandled opcode"); 1656 break; 1657 } 1658 if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) 1659 return BuildFunc(); 1660 return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); 1661 } 1662 1663 /// Create a vector reduction using a given recurrence descriptor. 1664 Value *llvm::createTargetReduction(IRBuilder<> &B, 1665 const TargetTransformInfo *TTI, 1666 RecurrenceDescriptor &Desc, Value *Src, 1667 bool NoNaN) { 1668 // TODO: Support in-order reductions based on the recurrence descriptor. 1669 using RD = RecurrenceDescriptor; 1670 RD::RecurrenceKind RecKind = Desc.getRecurrenceKind(); 1671 TargetTransformInfo::ReductionFlags Flags; 1672 Flags.NoNaN = NoNaN; 1673 switch (RecKind) { 1674 case RD::RK_FloatAdd: 1675 return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags); 1676 case RD::RK_FloatMult: 1677 return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags); 1678 case RD::RK_IntegerAdd: 1679 return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags); 1680 case RD::RK_IntegerMult: 1681 return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags); 1682 case RD::RK_IntegerAnd: 1683 return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags); 1684 case RD::RK_IntegerOr: 1685 return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags); 1686 case RD::RK_IntegerXor: 1687 return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags); 1688 case RD::RK_IntegerMinMax: { 1689 RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); 1690 Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax); 1691 Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin); 1692 return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags); 1693 } 1694 case RD::RK_FloatMinMax: { 1695 Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax; 1696 return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags); 1697 } 1698 default: 1699 llvm_unreachable("Unhandled RecKind"); 1700 } 1701 } 1702 1703 void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { 1704 auto *VecOp = dyn_cast<Instruction>(I); 1705 if (!VecOp) 1706 return; 1707 auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0]) 1708 : dyn_cast<Instruction>(OpValue); 1709 if (!Intersection) 1710 return; 1711 const unsigned Opcode = Intersection->getOpcode(); 1712 VecOp->copyIRFlags(Intersection); 1713 for (auto *V : VL) { 1714 auto *Instr = dyn_cast<Instruction>(V); 1715 if (!Instr) 1716 continue; 1717 if (OpValue == nullptr || Opcode == Instr->getOpcode()) 1718 VecOp->andIRFlags(V); 1719 } 1720 } 1721