1 //===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file "describes" induction and recurrence variables. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/IVDescriptors.h" 15 #include "llvm/ADT/ScopeExit.h" 16 #include "llvm/Analysis/AliasAnalysis.h" 17 #include "llvm/Analysis/BasicAliasAnalysis.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/InstructionSimplify.h" 20 #include "llvm/Analysis/LoopInfo.h" 21 #include "llvm/Analysis/LoopPass.h" 22 #include "llvm/Analysis/MustExecute.h" 23 #include "llvm/Analysis/ScalarEvolution.h" 24 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 25 #include "llvm/Analysis/ScalarEvolutionExpander.h" 26 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 27 #include "llvm/Analysis/TargetTransformInfo.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/IR/DomTreeUpdater.h" 30 #include "llvm/IR/Dominators.h" 31 #include "llvm/IR/Instructions.h" 32 #include "llvm/IR/Module.h" 33 #include "llvm/IR/PatternMatch.h" 34 #include "llvm/IR/ValueHandle.h" 35 #include "llvm/Pass.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/KnownBits.h" 38 39 using namespace llvm; 40 using namespace llvm::PatternMatch; 41 42 #define DEBUG_TYPE "iv-descriptors" 43 44 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, 45 SmallPtrSetImpl<Instruction *> &Set) { 46 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) 47 if (!Set.count(dyn_cast<Instruction>(*Use))) 48 return false; 49 return true; 50 } 51 52 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { 53 switch (Kind) { 54 default: 55 break; 56 case RK_IntegerAdd: 57 case RK_IntegerMult: 58 case RK_IntegerOr: 59 case RK_IntegerAnd: 60 case RK_IntegerXor: 61 case RK_IntegerMinMax: 62 return true; 63 } 64 return false; 65 } 66 67 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { 68 return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); 69 } 70 71 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { 72 switch (Kind) { 73 default: 74 break; 75 case RK_IntegerAdd: 76 case RK_IntegerMult: 77 case RK_FloatAdd: 78 case RK_FloatMult: 79 return true; 80 } 81 return false; 82 } 83 84 /// Determines if Phi may have been type-promoted. If Phi has a single user 85 /// that ANDs the Phi with a type mask, return the user. RT is updated to 86 /// account for the narrower bit width represented by the mask, and the AND 87 /// instruction is added to CI. 88 static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, 89 SmallPtrSetImpl<Instruction *> &Visited, 90 SmallPtrSetImpl<Instruction *> &CI) { 91 if (!Phi->hasOneUse()) 92 return Phi; 93 94 const APInt *M = nullptr; 95 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); 96 97 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT 98 // with a new integer type of the corresponding bit width. 99 if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { 100 int32_t Bits = (*M + 1).exactLogBase2(); 101 if (Bits > 0) { 102 RT = IntegerType::get(Phi->getContext(), Bits); 103 Visited.insert(Phi); 104 CI.insert(J); 105 return J; 106 } 107 } 108 return Phi; 109 } 110 111 /// Compute the minimal bit width needed to represent a reduction whose exit 112 /// instruction is given by Exit. 113 static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit, 114 DemandedBits *DB, 115 AssumptionCache *AC, 116 DominatorTree *DT) { 117 bool IsSigned = false; 118 const DataLayout &DL = Exit->getModule()->getDataLayout(); 119 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType()); 120 121 if (DB) { 122 // Use the demanded bits analysis to determine the bits that are live out 123 // of the exit instruction, rounding up to the nearest power of two. If the 124 // use of demanded bits results in a smaller bit width, we know the value 125 // must be positive (i.e., IsSigned = false), because if this were not the 126 // case, the sign bit would have been demanded. 127 auto Mask = DB->getDemandedBits(Exit); 128 MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros(); 129 } 130 131 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) { 132 // If demanded bits wasn't able to limit the bit width, we can try to use 133 // value tracking instead. This can be the case, for example, if the value 134 // may be negative. 135 auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT); 136 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType()); 137 MaxBitWidth = NumTypeBits - NumSignBits; 138 KnownBits Bits = computeKnownBits(Exit, DL); 139 if (!Bits.isNonNegative()) { 140 // If the value is not known to be non-negative, we set IsSigned to true, 141 // meaning that we will use sext instructions instead of zext 142 // instructions to restore the original type. 143 IsSigned = true; 144 if (!Bits.isNegative()) 145 // If the value is not known to be negative, we don't known what the 146 // upper bit is, and therefore, we don't know what kind of extend we 147 // will need. In this case, just increase the bit width by one bit and 148 // use sext. 149 ++MaxBitWidth; 150 } 151 } 152 if (!isPowerOf2_64(MaxBitWidth)) 153 MaxBitWidth = NextPowerOf2(MaxBitWidth); 154 155 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth), 156 IsSigned); 157 } 158 159 /// Collect cast instructions that can be ignored in the vectorizer's cost 160 /// model, given a reduction exit value and the minimal type in which the 161 /// reduction can be represented. 162 static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, 163 Type *RecurrenceType, 164 SmallPtrSetImpl<Instruction *> &Casts) { 165 166 SmallVector<Instruction *, 8> Worklist; 167 SmallPtrSet<Instruction *, 8> Visited; 168 Worklist.push_back(Exit); 169 170 while (!Worklist.empty()) { 171 Instruction *Val = Worklist.pop_back_val(); 172 Visited.insert(Val); 173 if (auto *Cast = dyn_cast<CastInst>(Val)) 174 if (Cast->getSrcTy() == RecurrenceType) { 175 // If the source type of a cast instruction is equal to the recurrence 176 // type, it will be eliminated, and should be ignored in the vectorizer 177 // cost model. 178 Casts.insert(Cast); 179 continue; 180 } 181 182 // Add all operands to the work list if they are loop-varying values that 183 // we haven't yet visited. 184 for (Value *O : cast<User>(Val)->operands()) 185 if (auto *I = dyn_cast<Instruction>(O)) 186 if (TheLoop->contains(I) && !Visited.count(I)) 187 Worklist.push_back(I); 188 } 189 } 190 191 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, 192 Loop *TheLoop, bool HasFunNoNaNAttr, 193 RecurrenceDescriptor &RedDes, 194 DemandedBits *DB, 195 AssumptionCache *AC, 196 DominatorTree *DT) { 197 if (Phi->getNumIncomingValues() != 2) 198 return false; 199 200 // Reduction variables are only found in the loop header block. 201 if (Phi->getParent() != TheLoop->getHeader()) 202 return false; 203 204 // Obtain the reduction start value from the value that comes from the loop 205 // preheader. 206 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); 207 208 // ExitInstruction is the single value which is used outside the loop. 209 // We only allow for a single reduction value to be used outside the loop. 210 // This includes users of the reduction, variables (which form a cycle 211 // which ends in the phi node). 212 Instruction *ExitInstruction = nullptr; 213 // Indicates that we found a reduction operation in our scan. 214 bool FoundReduxOp = false; 215 216 // We start with the PHI node and scan for all of the users of this 217 // instruction. All users must be instructions that can be used as reduction 218 // variables (such as ADD). We must have a single out-of-block user. The cycle 219 // must include the original PHI. 220 bool FoundStartPHI = false; 221 222 // To recognize min/max patterns formed by a icmp select sequence, we store 223 // the number of instruction we saw from the recognized min/max pattern, 224 // to make sure we only see exactly the two instructions. 225 unsigned NumCmpSelectPatternInst = 0; 226 InstDesc ReduxDesc(false, nullptr); 227 228 // Data used for determining if the recurrence has been type-promoted. 229 Type *RecurrenceType = Phi->getType(); 230 SmallPtrSet<Instruction *, 4> CastInsts; 231 Instruction *Start = Phi; 232 bool IsSigned = false; 233 234 SmallPtrSet<Instruction *, 8> VisitedInsts; 235 SmallVector<Instruction *, 8> Worklist; 236 237 // Return early if the recurrence kind does not match the type of Phi. If the 238 // recurrence kind is arithmetic, we attempt to look through AND operations 239 // resulting from the type promotion performed by InstCombine. Vector 240 // operations are not limited to the legal integer widths, so we may be able 241 // to evaluate the reduction in the narrower width. 242 if (RecurrenceType->isFloatingPointTy()) { 243 if (!isFloatingPointRecurrenceKind(Kind)) 244 return false; 245 } else { 246 if (!isIntegerRecurrenceKind(Kind)) 247 return false; 248 if (isArithmeticRecurrenceKind(Kind)) 249 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); 250 } 251 252 Worklist.push_back(Start); 253 VisitedInsts.insert(Start); 254 255 // A value in the reduction can be used: 256 // - By the reduction: 257 // - Reduction operation: 258 // - One use of reduction value (safe). 259 // - Multiple use of reduction value (not safe). 260 // - PHI: 261 // - All uses of the PHI must be the reduction (safe). 262 // - Otherwise, not safe. 263 // - By instructions outside of the loop (safe). 264 // * One value may have several outside users, but all outside 265 // uses must be of the same value. 266 // - By an instruction that is not part of the reduction (not safe). 267 // This is either: 268 // * An instruction type other than PHI or the reduction operation. 269 // * A PHI in the header other than the initial PHI. 270 while (!Worklist.empty()) { 271 Instruction *Cur = Worklist.back(); 272 Worklist.pop_back(); 273 274 // No Users. 275 // If the instruction has no users then this is a broken chain and can't be 276 // a reduction variable. 277 if (Cur->use_empty()) 278 return false; 279 280 bool IsAPhi = isa<PHINode>(Cur); 281 282 // A header PHI use other than the original PHI. 283 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) 284 return false; 285 286 // Reductions of instructions such as Div, and Sub is only possible if the 287 // LHS is the reduction variable. 288 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && 289 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && 290 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) 291 return false; 292 293 // Any reduction instruction must be of one of the allowed kinds. We ignore 294 // the starting value (the Phi or an AND instruction if the Phi has been 295 // type-promoted). 296 if (Cur != Start) { 297 ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); 298 if (!ReduxDesc.isRecurrence()) 299 return false; 300 } 301 302 // A reduction operation must only have one use of the reduction value. 303 if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && 304 hasMultipleUsesOf(Cur, VisitedInsts)) 305 return false; 306 307 // All inputs to a PHI node must be a reduction value. 308 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) 309 return false; 310 311 if (Kind == RK_IntegerMinMax && 312 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) 313 ++NumCmpSelectPatternInst; 314 if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) 315 ++NumCmpSelectPatternInst; 316 317 // Check whether we found a reduction operator. 318 FoundReduxOp |= !IsAPhi && Cur != Start; 319 320 // Process users of current instruction. Push non-PHI nodes after PHI nodes 321 // onto the stack. This way we are going to have seen all inputs to PHI 322 // nodes once we get to them. 323 SmallVector<Instruction *, 8> NonPHIs; 324 SmallVector<Instruction *, 8> PHIs; 325 for (User *U : Cur->users()) { 326 Instruction *UI = cast<Instruction>(U); 327 328 // Check if we found the exit user. 329 BasicBlock *Parent = UI->getParent(); 330 if (!TheLoop->contains(Parent)) { 331 // If we already know this instruction is used externally, move on to 332 // the next user. 333 if (ExitInstruction == Cur) 334 continue; 335 336 // Exit if you find multiple values used outside or if the header phi 337 // node is being used. In this case the user uses the value of the 338 // previous iteration, in which case we would loose "VF-1" iterations of 339 // the reduction operation if we vectorize. 340 if (ExitInstruction != nullptr || Cur == Phi) 341 return false; 342 343 // The instruction used by an outside user must be the last instruction 344 // before we feed back to the reduction phi. Otherwise, we loose VF-1 345 // operations on the value. 346 if (!is_contained(Phi->operands(), Cur)) 347 return false; 348 349 ExitInstruction = Cur; 350 continue; 351 } 352 353 // Process instructions only once (termination). Each reduction cycle 354 // value must only be used once, except by phi nodes and min/max 355 // reductions which are represented as a cmp followed by a select. 356 InstDesc IgnoredVal(false, nullptr); 357 if (VisitedInsts.insert(UI).second) { 358 if (isa<PHINode>(UI)) 359 PHIs.push_back(UI); 360 else 361 NonPHIs.push_back(UI); 362 } else if (!isa<PHINode>(UI) && 363 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && 364 !isa<SelectInst>(UI)) || 365 !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) 366 return false; 367 368 // Remember that we completed the cycle. 369 if (UI == Phi) 370 FoundStartPHI = true; 371 } 372 Worklist.append(PHIs.begin(), PHIs.end()); 373 Worklist.append(NonPHIs.begin(), NonPHIs.end()); 374 } 375 376 // This means we have seen one but not the other instruction of the 377 // pattern or more than just a select and cmp. 378 if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && 379 NumCmpSelectPatternInst != 2) 380 return false; 381 382 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) 383 return false; 384 385 if (Start != Phi) { 386 // If the starting value is not the same as the phi node, we speculatively 387 // looked through an 'and' instruction when evaluating a potential 388 // arithmetic reduction to determine if it may have been type-promoted. 389 // 390 // We now compute the minimal bit width that is required to represent the 391 // reduction. If this is the same width that was indicated by the 'and', we 392 // can represent the reduction in the smaller type. The 'and' instruction 393 // will be eliminated since it will essentially be a cast instruction that 394 // can be ignore in the cost model. If we compute a different type than we 395 // did when evaluating the 'and', the 'and' will not be eliminated, and we 396 // will end up with different kinds of operations in the recurrence 397 // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is 398 // the case. 399 // 400 // The vectorizer relies on InstCombine to perform the actual 401 // type-shrinking. It does this by inserting instructions to truncate the 402 // exit value of the reduction to the width indicated by RecurrenceType and 403 // then extend this value back to the original width. If IsSigned is false, 404 // a 'zext' instruction will be generated; otherwise, a 'sext' will be 405 // used. 406 // 407 // TODO: We should not rely on InstCombine to rewrite the reduction in the 408 // smaller type. We should just generate a correctly typed expression 409 // to begin with. 410 Type *ComputedType; 411 std::tie(ComputedType, IsSigned) = 412 computeRecurrenceType(ExitInstruction, DB, AC, DT); 413 if (ComputedType != RecurrenceType) 414 return false; 415 416 // The recurrence expression will be represented in a narrower type. If 417 // there are any cast instructions that will be unnecessary, collect them 418 // in CastInsts. Note that the 'and' instruction was already included in 419 // this list. 420 // 421 // TODO: A better way to represent this may be to tag in some way all the 422 // instructions that are a part of the reduction. The vectorizer cost 423 // model could then apply the recurrence type to these instructions, 424 // without needing a white list of instructions to ignore. 425 collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts); 426 } 427 428 // We found a reduction var if we have reached the original phi node and we 429 // only have a single instruction with out-of-loop users. 430 431 // The ExitInstruction(Instruction which is allowed to have out-of-loop users) 432 // is saved as part of the RecurrenceDescriptor. 433 434 // Save the description of this reduction variable. 435 RecurrenceDescriptor RD( 436 RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), 437 ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); 438 RedDes = RD; 439 440 return true; 441 } 442 443 /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction 444 /// pattern corresponding to a min(X, Y) or max(X, Y). 445 RecurrenceDescriptor::InstDesc 446 RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { 447 448 assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && 449 "Expect a select instruction"); 450 Instruction *Cmp = nullptr; 451 SelectInst *Select = nullptr; 452 453 // We must handle the select(cmp()) as a single instruction. Advance to the 454 // select. 455 if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) { 456 if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin()))) 457 return InstDesc(false, I); 458 return InstDesc(Select, Prev.getMinMaxKind()); 459 } 460 461 // Only handle single use cases for now. 462 if (!(Select = dyn_cast<SelectInst>(I))) 463 return InstDesc(false, I); 464 if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && 465 !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) 466 return InstDesc(false, I); 467 if (!Cmp->hasOneUse()) 468 return InstDesc(false, I); 469 470 Value *CmpLeft; 471 Value *CmpRight; 472 473 // Look for a min/max pattern. 474 if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 475 return InstDesc(Select, MRK_UIntMin); 476 else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 477 return InstDesc(Select, MRK_UIntMax); 478 else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 479 return InstDesc(Select, MRK_SIntMax); 480 else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 481 return InstDesc(Select, MRK_SIntMin); 482 else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 483 return InstDesc(Select, MRK_FloatMin); 484 else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 485 return InstDesc(Select, MRK_FloatMax); 486 else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 487 return InstDesc(Select, MRK_FloatMin); 488 else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 489 return InstDesc(Select, MRK_FloatMax); 490 491 return InstDesc(false, I); 492 } 493 494 RecurrenceDescriptor::InstDesc 495 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, 496 InstDesc &Prev, bool HasFunNoNaNAttr) { 497 bool FP = I->getType()->isFloatingPointTy(); 498 Instruction *UAI = Prev.getUnsafeAlgebraInst(); 499 if (!UAI && FP && !I->isFast()) 500 UAI = I; // Found an unsafe (unvectorizable) algebra instruction. 501 502 switch (I->getOpcode()) { 503 default: 504 return InstDesc(false, I); 505 case Instruction::PHI: 506 return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); 507 case Instruction::Sub: 508 case Instruction::Add: 509 return InstDesc(Kind == RK_IntegerAdd, I); 510 case Instruction::Mul: 511 return InstDesc(Kind == RK_IntegerMult, I); 512 case Instruction::And: 513 return InstDesc(Kind == RK_IntegerAnd, I); 514 case Instruction::Or: 515 return InstDesc(Kind == RK_IntegerOr, I); 516 case Instruction::Xor: 517 return InstDesc(Kind == RK_IntegerXor, I); 518 case Instruction::FMul: 519 return InstDesc(Kind == RK_FloatMult, I, UAI); 520 case Instruction::FSub: 521 case Instruction::FAdd: 522 return InstDesc(Kind == RK_FloatAdd, I, UAI); 523 case Instruction::FCmp: 524 case Instruction::ICmp: 525 case Instruction::Select: 526 if (Kind != RK_IntegerMinMax && 527 (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) 528 return InstDesc(false, I); 529 return isMinMaxSelectCmpPattern(I, Prev); 530 } 531 } 532 533 bool RecurrenceDescriptor::hasMultipleUsesOf( 534 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) { 535 unsigned NumUses = 0; 536 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; 537 ++Use) { 538 if (Insts.count(dyn_cast<Instruction>(*Use))) 539 ++NumUses; 540 if (NumUses > 1) 541 return true; 542 } 543 544 return false; 545 } 546 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, 547 RecurrenceDescriptor &RedDes, 548 DemandedBits *DB, AssumptionCache *AC, 549 DominatorTree *DT) { 550 551 BasicBlock *Header = TheLoop->getHeader(); 552 Function &F = *Header->getParent(); 553 bool HasFunNoNaNAttr = 554 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 555 556 if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 557 AC, DT)) { 558 LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); 559 return true; 560 } 561 if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, 562 AC, DT)) { 563 LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); 564 return true; 565 } 566 if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB, 567 AC, DT)) { 568 LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); 569 return true; 570 } 571 if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 572 AC, DT)) { 573 LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); 574 return true; 575 } 576 if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB, 577 AC, DT)) { 578 LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); 579 return true; 580 } 581 if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes, 582 DB, AC, DT)) { 583 LLVM_DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); 584 return true; 585 } 586 if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, 587 AC, DT)) { 588 LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); 589 return true; 590 } 591 if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 592 AC, DT)) { 593 LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); 594 return true; 595 } 596 if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB, 597 AC, DT)) { 598 LLVM_DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi 599 << "\n"); 600 return true; 601 } 602 // Not a reduction of known type. 603 return false; 604 } 605 606 bool RecurrenceDescriptor::isFirstOrderRecurrence( 607 PHINode *Phi, Loop *TheLoop, 608 DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) { 609 610 // Ensure the phi node is in the loop header and has two incoming values. 611 if (Phi->getParent() != TheLoop->getHeader() || 612 Phi->getNumIncomingValues() != 2) 613 return false; 614 615 // Ensure the loop has a preheader and a single latch block. The loop 616 // vectorizer will need the latch to set up the next iteration of the loop. 617 auto *Preheader = TheLoop->getLoopPreheader(); 618 auto *Latch = TheLoop->getLoopLatch(); 619 if (!Preheader || !Latch) 620 return false; 621 622 // Ensure the phi node's incoming blocks are the loop preheader and latch. 623 if (Phi->getBasicBlockIndex(Preheader) < 0 || 624 Phi->getBasicBlockIndex(Latch) < 0) 625 return false; 626 627 // Get the previous value. The previous value comes from the latch edge while 628 // the initial value comes form the preheader edge. 629 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); 630 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) || 631 SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. 632 return false; 633 634 // Ensure every user of the phi node is dominated by the previous value. 635 // The dominance requirement ensures the loop vectorizer will not need to 636 // vectorize the initial value prior to the first iteration of the loop. 637 // TODO: Consider extending this sinking to handle other kinds of instructions 638 // and expressions, beyond sinking a single cast past Previous. 639 if (Phi->hasOneUse()) { 640 auto *I = Phi->user_back(); 641 if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() && 642 DT->dominates(Previous, I->user_back())) { 643 if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking. 644 SinkAfter[I] = Previous; 645 return true; 646 } 647 } 648 649 for (User *U : Phi->users()) 650 if (auto *I = dyn_cast<Instruction>(U)) { 651 if (!DT->dominates(Previous, I)) 652 return false; 653 } 654 655 return true; 656 } 657 658 /// This function returns the identity element (or neutral element) for 659 /// the operation K. 660 Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, 661 Type *Tp) { 662 switch (K) { 663 case RK_IntegerXor: 664 case RK_IntegerAdd: 665 case RK_IntegerOr: 666 // Adding, Xoring, Oring zero to a number does not change it. 667 return ConstantInt::get(Tp, 0); 668 case RK_IntegerMult: 669 // Multiplying a number by 1 does not change it. 670 return ConstantInt::get(Tp, 1); 671 case RK_IntegerAnd: 672 // AND-ing a number with an all-1 value does not change it. 673 return ConstantInt::get(Tp, -1, true); 674 case RK_FloatMult: 675 // Multiplying a number by 1 does not change it. 676 return ConstantFP::get(Tp, 1.0L); 677 case RK_FloatAdd: 678 // Adding zero to a number does not change it. 679 return ConstantFP::get(Tp, 0.0L); 680 default: 681 llvm_unreachable("Unknown recurrence kind"); 682 } 683 } 684 685 /// This function translates the recurrence kind to an LLVM binary operator. 686 unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { 687 switch (Kind) { 688 case RK_IntegerAdd: 689 return Instruction::Add; 690 case RK_IntegerMult: 691 return Instruction::Mul; 692 case RK_IntegerOr: 693 return Instruction::Or; 694 case RK_IntegerAnd: 695 return Instruction::And; 696 case RK_IntegerXor: 697 return Instruction::Xor; 698 case RK_FloatMult: 699 return Instruction::FMul; 700 case RK_FloatAdd: 701 return Instruction::FAdd; 702 case RK_IntegerMinMax: 703 return Instruction::ICmp; 704 case RK_FloatMinMax: 705 return Instruction::FCmp; 706 default: 707 llvm_unreachable("Unknown recurrence operation"); 708 } 709 } 710 711 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, 712 const SCEV *Step, BinaryOperator *BOp, 713 SmallVectorImpl<Instruction *> *Casts) 714 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { 715 assert(IK != IK_NoInduction && "Not an induction"); 716 717 // Start value type should match the induction kind and the value 718 // itself should not be null. 719 assert(StartValue && "StartValue is null"); 720 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && 721 "StartValue is not a pointer for pointer induction"); 722 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && 723 "StartValue is not an integer for integer induction"); 724 725 // Check the Step Value. It should be non-zero integer value. 726 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && 727 "Step value is zero"); 728 729 assert((IK != IK_PtrInduction || getConstIntStepValue()) && 730 "Step value should be constant for pointer induction"); 731 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && 732 "StepValue is not an integer"); 733 734 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && 735 "StepValue is not FP for FpInduction"); 736 assert((IK != IK_FpInduction || 737 (InductionBinOp && 738 (InductionBinOp->getOpcode() == Instruction::FAdd || 739 InductionBinOp->getOpcode() == Instruction::FSub))) && 740 "Binary opcode should be specified for FP induction"); 741 742 if (Casts) { 743 for (auto &Inst : *Casts) { 744 RedundantCasts.push_back(Inst); 745 } 746 } 747 } 748 749 int InductionDescriptor::getConsecutiveDirection() const { 750 ConstantInt *ConstStep = getConstIntStepValue(); 751 if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) 752 return ConstStep->getSExtValue(); 753 return 0; 754 } 755 756 ConstantInt *InductionDescriptor::getConstIntStepValue() const { 757 if (isa<SCEVConstant>(Step)) 758 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); 759 return nullptr; 760 } 761 762 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, 763 ScalarEvolution *SE, 764 InductionDescriptor &D) { 765 766 // Here we only handle FP induction variables. 767 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); 768 769 if (TheLoop->getHeader() != Phi->getParent()) 770 return false; 771 772 // The loop may have multiple entrances or multiple exits; we can analyze 773 // this phi if it has a unique entry value and a unique backedge value. 774 if (Phi->getNumIncomingValues() != 2) 775 return false; 776 Value *BEValue = nullptr, *StartValue = nullptr; 777 if (TheLoop->contains(Phi->getIncomingBlock(0))) { 778 BEValue = Phi->getIncomingValue(0); 779 StartValue = Phi->getIncomingValue(1); 780 } else { 781 assert(TheLoop->contains(Phi->getIncomingBlock(1)) && 782 "Unexpected Phi node in the loop"); 783 BEValue = Phi->getIncomingValue(1); 784 StartValue = Phi->getIncomingValue(0); 785 } 786 787 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); 788 if (!BOp) 789 return false; 790 791 Value *Addend = nullptr; 792 if (BOp->getOpcode() == Instruction::FAdd) { 793 if (BOp->getOperand(0) == Phi) 794 Addend = BOp->getOperand(1); 795 else if (BOp->getOperand(1) == Phi) 796 Addend = BOp->getOperand(0); 797 } else if (BOp->getOpcode() == Instruction::FSub) 798 if (BOp->getOperand(0) == Phi) 799 Addend = BOp->getOperand(1); 800 801 if (!Addend) 802 return false; 803 804 // The addend should be loop invariant 805 if (auto *I = dyn_cast<Instruction>(Addend)) 806 if (TheLoop->contains(I)) 807 return false; 808 809 // FP Step has unknown SCEV 810 const SCEV *Step = SE->getUnknown(Addend); 811 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); 812 return true; 813 } 814 815 /// This function is called when we suspect that the update-chain of a phi node 816 /// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, 817 /// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime 818 /// predicate P under which the SCEV expression for the phi can be the 819 /// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the 820 /// cast instructions that are involved in the update-chain of this induction. 821 /// A caller that adds the required runtime predicate can be free to drop these 822 /// cast instructions, and compute the phi using \p AR (instead of some scev 823 /// expression with casts). 824 /// 825 /// For example, without a predicate the scev expression can take the following 826 /// form: 827 /// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) 828 /// 829 /// It corresponds to the following IR sequence: 830 /// %for.body: 831 /// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] 832 /// %casted_phi = "ExtTrunc i64 %x" 833 /// %add = add i64 %casted_phi, %step 834 /// 835 /// where %x is given in \p PN, 836 /// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate, 837 /// and the IR sequence that "ExtTrunc i64 %x" represents can take one of 838 /// several forms, for example, such as: 839 /// ExtTrunc1: %casted_phi = and %x, 2^n-1 840 /// or: 841 /// ExtTrunc2: %t = shl %x, m 842 /// %casted_phi = ashr %t, m 843 /// 844 /// If we are able to find such sequence, we return the instructions 845 /// we found, namely %casted_phi and the instructions on its use-def chain up 846 /// to the phi (not including the phi). 847 static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, 848 const SCEVUnknown *PhiScev, 849 const SCEVAddRecExpr *AR, 850 SmallVectorImpl<Instruction *> &CastInsts) { 851 852 assert(CastInsts.empty() && "CastInsts is expected to be empty."); 853 auto *PN = cast<PHINode>(PhiScev->getValue()); 854 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); 855 const Loop *L = AR->getLoop(); 856 857 // Find any cast instructions that participate in the def-use chain of 858 // PhiScev in the loop. 859 // FORNOW/TODO: We currently expect the def-use chain to include only 860 // two-operand instructions, where one of the operands is an invariant. 861 // createAddRecFromPHIWithCasts() currently does not support anything more 862 // involved than that, so we keep the search simple. This can be 863 // extended/generalized as needed. 864 865 auto getDef = [&](const Value *Val) -> Value * { 866 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val); 867 if (!BinOp) 868 return nullptr; 869 Value *Op0 = BinOp->getOperand(0); 870 Value *Op1 = BinOp->getOperand(1); 871 Value *Def = nullptr; 872 if (L->isLoopInvariant(Op0)) 873 Def = Op1; 874 else if (L->isLoopInvariant(Op1)) 875 Def = Op0; 876 return Def; 877 }; 878 879 // Look for the instruction that defines the induction via the 880 // loop backedge. 881 BasicBlock *Latch = L->getLoopLatch(); 882 if (!Latch) 883 return false; 884 Value *Val = PN->getIncomingValueForBlock(Latch); 885 if (!Val) 886 return false; 887 888 // Follow the def-use chain until the induction phi is reached. 889 // If on the way we encounter a Value that has the same SCEV Expr as the 890 // phi node, we can consider the instructions we visit from that point 891 // as part of the cast-sequence that can be ignored. 892 bool InCastSequence = false; 893 auto *Inst = dyn_cast<Instruction>(Val); 894 while (Val != PN) { 895 // If we encountered a phi node other than PN, or if we left the loop, 896 // we bail out. 897 if (!Inst || !L->contains(Inst)) { 898 return false; 899 } 900 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val)); 901 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR)) 902 InCastSequence = true; 903 if (InCastSequence) { 904 // Only the last instruction in the cast sequence is expected to have 905 // uses outside the induction def-use chain. 906 if (!CastInsts.empty()) 907 if (!Inst->hasOneUse()) 908 return false; 909 CastInsts.push_back(Inst); 910 } 911 Val = getDef(Val); 912 if (!Val) 913 return false; 914 Inst = dyn_cast<Instruction>(Val); 915 } 916 917 return InCastSequence; 918 } 919 920 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 921 PredicatedScalarEvolution &PSE, 922 InductionDescriptor &D, bool Assume) { 923 Type *PhiTy = Phi->getType(); 924 925 // Handle integer and pointer inductions variables. 926 // Now we handle also FP induction but not trying to make a 927 // recurrent expression from the PHI node in-place. 928 929 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() && 930 !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) 931 return false; 932 933 if (PhiTy->isFloatingPointTy()) 934 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); 935 936 const SCEV *PhiScev = PSE.getSCEV(Phi); 937 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 938 939 // We need this expression to be an AddRecExpr. 940 if (Assume && !AR) 941 AR = PSE.getAsAddRec(Phi); 942 943 if (!AR) { 944 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 945 return false; 946 } 947 948 // Record any Cast instructions that participate in the induction update 949 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev); 950 // If we started from an UnknownSCEV, and managed to build an addRecurrence 951 // only after enabling Assume with PSCEV, this means we may have encountered 952 // cast instructions that required adding a runtime check in order to 953 // guarantee the correctness of the AddRecurence respresentation of the 954 // induction. 955 if (PhiScev != AR && SymbolicPhi) { 956 SmallVector<Instruction *, 2> Casts; 957 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts)) 958 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts); 959 } 960 961 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); 962 } 963 964 bool InductionDescriptor::isInductionPHI( 965 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, 966 InductionDescriptor &D, const SCEV *Expr, 967 SmallVectorImpl<Instruction *> *CastsToIgnore) { 968 Type *PhiTy = Phi->getType(); 969 // We only handle integer and pointer inductions variables. 970 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) 971 return false; 972 973 // Check that the PHI is consecutive. 974 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); 975 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 976 977 if (!AR) { 978 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 979 return false; 980 } 981 982 if (AR->getLoop() != TheLoop) { 983 // FIXME: We should treat this as a uniform. Unfortunately, we 984 // don't currently know how to handled uniform PHIs. 985 LLVM_DEBUG( 986 dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); 987 return false; 988 } 989 990 Value *StartValue = 991 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); 992 const SCEV *Step = AR->getStepRecurrence(*SE); 993 // Calculate the pointer stride and check if it is consecutive. 994 // The stride may be a constant or a loop invariant integer value. 995 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); 996 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) 997 return false; 998 999 if (PhiTy->isIntegerTy()) { 1000 D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/nullptr, 1001 CastsToIgnore); 1002 return true; 1003 } 1004 1005 assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); 1006 // Pointer induction should be a constant. 1007 if (!ConstStep) 1008 return false; 1009 1010 ConstantInt *CV = ConstStep->getValue(); 1011 Type *PointerElementType = PhiTy->getPointerElementType(); 1012 // The pointer stride cannot be determined if the pointer element type is not 1013 // sized. 1014 if (!PointerElementType->isSized()) 1015 return false; 1016 1017 const DataLayout &DL = Phi->getModule()->getDataLayout(); 1018 int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); 1019 if (!Size) 1020 return false; 1021 1022 int64_t CVSize = CV->getSExtValue(); 1023 if (CVSize % Size) 1024 return false; 1025 auto *StepValue = 1026 SE->getConstant(CV->getType(), CVSize / Size, true /* signed */); 1027 D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); 1028 return true; 1029 } 1030