1 //===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file "describes" induction and recurrence variables. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/IVDescriptors.h" 15 #include "llvm/ADT/ScopeExit.h" 16 #include "llvm/Analysis/AliasAnalysis.h" 17 #include "llvm/Analysis/BasicAliasAnalysis.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/InstructionSimplify.h" 20 #include "llvm/Analysis/LoopInfo.h" 21 #include "llvm/Analysis/LoopPass.h" 22 #include "llvm/Analysis/MustExecute.h" 23 #include "llvm/Analysis/ScalarEvolution.h" 24 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 25 #include "llvm/Analysis/ScalarEvolutionExpander.h" 26 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 27 #include "llvm/Analysis/TargetTransformInfo.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/IR/DomTreeUpdater.h" 30 #include "llvm/IR/Dominators.h" 31 #include "llvm/IR/Instructions.h" 32 #include "llvm/IR/Module.h" 33 #include "llvm/IR/PatternMatch.h" 34 #include "llvm/IR/ValueHandle.h" 35 #include "llvm/Pass.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/KnownBits.h" 38 39 using namespace llvm; 40 using namespace llvm::PatternMatch; 41 42 #define DEBUG_TYPE "iv-descriptors" 43 44 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, 45 SmallPtrSetImpl<Instruction *> &Set) { 46 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) 47 if (!Set.count(dyn_cast<Instruction>(*Use))) 48 return false; 49 return true; 50 } 51 52 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { 53 switch (Kind) { 54 default: 55 break; 56 case RK_IntegerAdd: 57 case RK_IntegerMult: 58 case RK_IntegerOr: 59 case RK_IntegerAnd: 60 case RK_IntegerXor: 61 case RK_IntegerMinMax: 62 return true; 63 } 64 return false; 65 } 66 67 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { 68 return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); 69 } 70 71 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { 72 switch (Kind) { 73 default: 74 break; 75 case RK_IntegerAdd: 76 case RK_IntegerMult: 77 case RK_FloatAdd: 78 case RK_FloatMult: 79 return true; 80 } 81 return false; 82 } 83 84 /// Determines if Phi may have been type-promoted. If Phi has a single user 85 /// that ANDs the Phi with a type mask, return the user. RT is updated to 86 /// account for the narrower bit width represented by the mask, and the AND 87 /// instruction is added to CI. 88 static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, 89 SmallPtrSetImpl<Instruction *> &Visited, 90 SmallPtrSetImpl<Instruction *> &CI) { 91 if (!Phi->hasOneUse()) 92 return Phi; 93 94 const APInt *M = nullptr; 95 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); 96 97 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT 98 // with a new integer type of the corresponding bit width. 99 if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { 100 int32_t Bits = (*M + 1).exactLogBase2(); 101 if (Bits > 0) { 102 RT = IntegerType::get(Phi->getContext(), Bits); 103 Visited.insert(Phi); 104 CI.insert(J); 105 return J; 106 } 107 } 108 return Phi; 109 } 110 111 /// Compute the minimal bit width needed to represent a reduction whose exit 112 /// instruction is given by Exit. 113 static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit, 114 DemandedBits *DB, 115 AssumptionCache *AC, 116 DominatorTree *DT) { 117 bool IsSigned = false; 118 const DataLayout &DL = Exit->getModule()->getDataLayout(); 119 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType()); 120 121 if (DB) { 122 // Use the demanded bits analysis to determine the bits that are live out 123 // of the exit instruction, rounding up to the nearest power of two. If the 124 // use of demanded bits results in a smaller bit width, we know the value 125 // must be positive (i.e., IsSigned = false), because if this were not the 126 // case, the sign bit would have been demanded. 127 auto Mask = DB->getDemandedBits(Exit); 128 MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros(); 129 } 130 131 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) { 132 // If demanded bits wasn't able to limit the bit width, we can try to use 133 // value tracking instead. This can be the case, for example, if the value 134 // may be negative. 135 auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT); 136 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType()); 137 MaxBitWidth = NumTypeBits - NumSignBits; 138 KnownBits Bits = computeKnownBits(Exit, DL); 139 if (!Bits.isNonNegative()) { 140 // If the value is not known to be non-negative, we set IsSigned to true, 141 // meaning that we will use sext instructions instead of zext 142 // instructions to restore the original type. 143 IsSigned = true; 144 if (!Bits.isNegative()) 145 // If the value is not known to be negative, we don't known what the 146 // upper bit is, and therefore, we don't know what kind of extend we 147 // will need. In this case, just increase the bit width by one bit and 148 // use sext. 149 ++MaxBitWidth; 150 } 151 } 152 if (!isPowerOf2_64(MaxBitWidth)) 153 MaxBitWidth = NextPowerOf2(MaxBitWidth); 154 155 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth), 156 IsSigned); 157 } 158 159 /// Collect cast instructions that can be ignored in the vectorizer's cost 160 /// model, given a reduction exit value and the minimal type in which the 161 /// reduction can be represented. 162 static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, 163 Type *RecurrenceType, 164 SmallPtrSetImpl<Instruction *> &Casts) { 165 166 SmallVector<Instruction *, 8> Worklist; 167 SmallPtrSet<Instruction *, 8> Visited; 168 Worklist.push_back(Exit); 169 170 while (!Worklist.empty()) { 171 Instruction *Val = Worklist.pop_back_val(); 172 Visited.insert(Val); 173 if (auto *Cast = dyn_cast<CastInst>(Val)) 174 if (Cast->getSrcTy() == RecurrenceType) { 175 // If the source type of a cast instruction is equal to the recurrence 176 // type, it will be eliminated, and should be ignored in the vectorizer 177 // cost model. 178 Casts.insert(Cast); 179 continue; 180 } 181 182 // Add all operands to the work list if they are loop-varying values that 183 // we haven't yet visited. 184 for (Value *O : cast<User>(Val)->operands()) 185 if (auto *I = dyn_cast<Instruction>(O)) 186 if (TheLoop->contains(I) && !Visited.count(I)) 187 Worklist.push_back(I); 188 } 189 } 190 191 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, 192 Loop *TheLoop, bool HasFunNoNaNAttr, 193 RecurrenceDescriptor &RedDes, 194 DemandedBits *DB, 195 AssumptionCache *AC, 196 DominatorTree *DT) { 197 if (Phi->getNumIncomingValues() != 2) 198 return false; 199 200 // Reduction variables are only found in the loop header block. 201 if (Phi->getParent() != TheLoop->getHeader()) 202 return false; 203 204 // Obtain the reduction start value from the value that comes from the loop 205 // preheader. 206 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); 207 208 // ExitInstruction is the single value which is used outside the loop. 209 // We only allow for a single reduction value to be used outside the loop. 210 // This includes users of the reduction, variables (which form a cycle 211 // which ends in the phi node). 212 Instruction *ExitInstruction = nullptr; 213 // Indicates that we found a reduction operation in our scan. 214 bool FoundReduxOp = false; 215 216 // We start with the PHI node and scan for all of the users of this 217 // instruction. All users must be instructions that can be used as reduction 218 // variables (such as ADD). We must have a single out-of-block user. The cycle 219 // must include the original PHI. 220 bool FoundStartPHI = false; 221 222 // To recognize min/max patterns formed by a icmp select sequence, we store 223 // the number of instruction we saw from the recognized min/max pattern, 224 // to make sure we only see exactly the two instructions. 225 unsigned NumCmpSelectPatternInst = 0; 226 InstDesc ReduxDesc(false, nullptr); 227 228 // Data used for determining if the recurrence has been type-promoted. 229 Type *RecurrenceType = Phi->getType(); 230 SmallPtrSet<Instruction *, 4> CastInsts; 231 Instruction *Start = Phi; 232 bool IsSigned = false; 233 234 SmallPtrSet<Instruction *, 8> VisitedInsts; 235 SmallVector<Instruction *, 8> Worklist; 236 237 // Return early if the recurrence kind does not match the type of Phi. If the 238 // recurrence kind is arithmetic, we attempt to look through AND operations 239 // resulting from the type promotion performed by InstCombine. Vector 240 // operations are not limited to the legal integer widths, so we may be able 241 // to evaluate the reduction in the narrower width. 242 if (RecurrenceType->isFloatingPointTy()) { 243 if (!isFloatingPointRecurrenceKind(Kind)) 244 return false; 245 } else { 246 if (!isIntegerRecurrenceKind(Kind)) 247 return false; 248 if (isArithmeticRecurrenceKind(Kind)) 249 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); 250 } 251 252 Worklist.push_back(Start); 253 VisitedInsts.insert(Start); 254 255 // A value in the reduction can be used: 256 // - By the reduction: 257 // - Reduction operation: 258 // - One use of reduction value (safe). 259 // - Multiple use of reduction value (not safe). 260 // - PHI: 261 // - All uses of the PHI must be the reduction (safe). 262 // - Otherwise, not safe. 263 // - By instructions outside of the loop (safe). 264 // * One value may have several outside users, but all outside 265 // uses must be of the same value. 266 // - By an instruction that is not part of the reduction (not safe). 267 // This is either: 268 // * An instruction type other than PHI or the reduction operation. 269 // * A PHI in the header other than the initial PHI. 270 while (!Worklist.empty()) { 271 Instruction *Cur = Worklist.back(); 272 Worklist.pop_back(); 273 274 // No Users. 275 // If the instruction has no users then this is a broken chain and can't be 276 // a reduction variable. 277 if (Cur->use_empty()) 278 return false; 279 280 bool IsAPhi = isa<PHINode>(Cur); 281 282 // A header PHI use other than the original PHI. 283 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) 284 return false; 285 286 // Reductions of instructions such as Div, and Sub is only possible if the 287 // LHS is the reduction variable. 288 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && 289 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && 290 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) 291 return false; 292 293 // Any reduction instruction must be of one of the allowed kinds. We ignore 294 // the starting value (the Phi or an AND instruction if the Phi has been 295 // type-promoted). 296 if (Cur != Start) { 297 ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); 298 if (!ReduxDesc.isRecurrence()) 299 return false; 300 } 301 302 bool IsASelect = isa<SelectInst>(Cur); 303 304 // A conditional reduction operation must only have 2 or less uses in 305 // VisitedInsts. 306 if (IsASelect && (Kind == RK_FloatAdd || Kind == RK_FloatMult) && 307 hasMultipleUsesOf(Cur, VisitedInsts, 2)) 308 return false; 309 310 // A reduction operation must only have one use of the reduction value. 311 if (!IsAPhi && !IsASelect && Kind != RK_IntegerMinMax && 312 Kind != RK_FloatMinMax && hasMultipleUsesOf(Cur, VisitedInsts, 1)) 313 return false; 314 315 // All inputs to a PHI node must be a reduction value. 316 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) 317 return false; 318 319 if (Kind == RK_IntegerMinMax && 320 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) 321 ++NumCmpSelectPatternInst; 322 if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) 323 ++NumCmpSelectPatternInst; 324 325 // Check whether we found a reduction operator. 326 FoundReduxOp |= !IsAPhi && Cur != Start; 327 328 // Process users of current instruction. Push non-PHI nodes after PHI nodes 329 // onto the stack. This way we are going to have seen all inputs to PHI 330 // nodes once we get to them. 331 SmallVector<Instruction *, 8> NonPHIs; 332 SmallVector<Instruction *, 8> PHIs; 333 for (User *U : Cur->users()) { 334 Instruction *UI = cast<Instruction>(U); 335 336 // Check if we found the exit user. 337 BasicBlock *Parent = UI->getParent(); 338 if (!TheLoop->contains(Parent)) { 339 // If we already know this instruction is used externally, move on to 340 // the next user. 341 if (ExitInstruction == Cur) 342 continue; 343 344 // Exit if you find multiple values used outside or if the header phi 345 // node is being used. In this case the user uses the value of the 346 // previous iteration, in which case we would loose "VF-1" iterations of 347 // the reduction operation if we vectorize. 348 if (ExitInstruction != nullptr || Cur == Phi) 349 return false; 350 351 // The instruction used by an outside user must be the last instruction 352 // before we feed back to the reduction phi. Otherwise, we loose VF-1 353 // operations on the value. 354 if (!is_contained(Phi->operands(), Cur)) 355 return false; 356 357 ExitInstruction = Cur; 358 continue; 359 } 360 361 // Process instructions only once (termination). Each reduction cycle 362 // value must only be used once, except by phi nodes and min/max 363 // reductions which are represented as a cmp followed by a select. 364 InstDesc IgnoredVal(false, nullptr); 365 if (VisitedInsts.insert(UI).second) { 366 if (isa<PHINode>(UI)) 367 PHIs.push_back(UI); 368 else 369 NonPHIs.push_back(UI); 370 } else if (!isa<PHINode>(UI) && 371 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && 372 !isa<SelectInst>(UI)) || 373 (!isConditionalRdxPattern(Kind, UI).isRecurrence() && 374 !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence()))) 375 return false; 376 377 // Remember that we completed the cycle. 378 if (UI == Phi) 379 FoundStartPHI = true; 380 } 381 Worklist.append(PHIs.begin(), PHIs.end()); 382 Worklist.append(NonPHIs.begin(), NonPHIs.end()); 383 } 384 385 // This means we have seen one but not the other instruction of the 386 // pattern or more than just a select and cmp. 387 if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && 388 NumCmpSelectPatternInst != 2) 389 return false; 390 391 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) 392 return false; 393 394 if (Start != Phi) { 395 // If the starting value is not the same as the phi node, we speculatively 396 // looked through an 'and' instruction when evaluating a potential 397 // arithmetic reduction to determine if it may have been type-promoted. 398 // 399 // We now compute the minimal bit width that is required to represent the 400 // reduction. If this is the same width that was indicated by the 'and', we 401 // can represent the reduction in the smaller type. The 'and' instruction 402 // will be eliminated since it will essentially be a cast instruction that 403 // can be ignore in the cost model. If we compute a different type than we 404 // did when evaluating the 'and', the 'and' will not be eliminated, and we 405 // will end up with different kinds of operations in the recurrence 406 // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is 407 // the case. 408 // 409 // The vectorizer relies on InstCombine to perform the actual 410 // type-shrinking. It does this by inserting instructions to truncate the 411 // exit value of the reduction to the width indicated by RecurrenceType and 412 // then extend this value back to the original width. If IsSigned is false, 413 // a 'zext' instruction will be generated; otherwise, a 'sext' will be 414 // used. 415 // 416 // TODO: We should not rely on InstCombine to rewrite the reduction in the 417 // smaller type. We should just generate a correctly typed expression 418 // to begin with. 419 Type *ComputedType; 420 std::tie(ComputedType, IsSigned) = 421 computeRecurrenceType(ExitInstruction, DB, AC, DT); 422 if (ComputedType != RecurrenceType) 423 return false; 424 425 // The recurrence expression will be represented in a narrower type. If 426 // there are any cast instructions that will be unnecessary, collect them 427 // in CastInsts. Note that the 'and' instruction was already included in 428 // this list. 429 // 430 // TODO: A better way to represent this may be to tag in some way all the 431 // instructions that are a part of the reduction. The vectorizer cost 432 // model could then apply the recurrence type to these instructions, 433 // without needing a white list of instructions to ignore. 434 collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts); 435 } 436 437 // We found a reduction var if we have reached the original phi node and we 438 // only have a single instruction with out-of-loop users. 439 440 // The ExitInstruction(Instruction which is allowed to have out-of-loop users) 441 // is saved as part of the RecurrenceDescriptor. 442 443 // Save the description of this reduction variable. 444 RecurrenceDescriptor RD( 445 RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), 446 ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); 447 RedDes = RD; 448 449 return true; 450 } 451 452 /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction 453 /// pattern corresponding to a min(X, Y) or max(X, Y). 454 RecurrenceDescriptor::InstDesc 455 RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { 456 457 assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && 458 "Expect a select instruction"); 459 Instruction *Cmp = nullptr; 460 SelectInst *Select = nullptr; 461 462 // We must handle the select(cmp()) as a single instruction. Advance to the 463 // select. 464 if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) { 465 if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin()))) 466 return InstDesc(false, I); 467 return InstDesc(Select, Prev.getMinMaxKind()); 468 } 469 470 // Only handle single use cases for now. 471 if (!(Select = dyn_cast<SelectInst>(I))) 472 return InstDesc(false, I); 473 if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && 474 !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) 475 return InstDesc(false, I); 476 if (!Cmp->hasOneUse()) 477 return InstDesc(false, I); 478 479 Value *CmpLeft; 480 Value *CmpRight; 481 482 // Look for a min/max pattern. 483 if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 484 return InstDesc(Select, MRK_UIntMin); 485 else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 486 return InstDesc(Select, MRK_UIntMax); 487 else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 488 return InstDesc(Select, MRK_SIntMax); 489 else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 490 return InstDesc(Select, MRK_SIntMin); 491 else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 492 return InstDesc(Select, MRK_FloatMin); 493 else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 494 return InstDesc(Select, MRK_FloatMax); 495 else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 496 return InstDesc(Select, MRK_FloatMin); 497 else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 498 return InstDesc(Select, MRK_FloatMax); 499 500 return InstDesc(false, I); 501 } 502 503 /// Returns true if the select instruction has users in the compare-and-add 504 /// reduction pattern below. The select instruction argument is the last one 505 /// in the sequence. 506 /// 507 /// %sum.1 = phi ... 508 /// ... 509 /// %cmp = fcmp pred %0, %CFP 510 /// %add = fadd %0, %sum.1 511 /// %sum.2 = select %cmp, %add, %sum.1 512 RecurrenceDescriptor::InstDesc 513 RecurrenceDescriptor::isConditionalRdxPattern( 514 RecurrenceKind Kind, Instruction *I) { 515 SelectInst *SI = dyn_cast<SelectInst>(I); 516 if (!SI) 517 return InstDesc(false, I); 518 519 CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition()); 520 // Only handle single use cases for now. 521 if (!CI || !CI->hasOneUse()) 522 return InstDesc(false, I); 523 524 Value *TrueVal = SI->getTrueValue(); 525 Value *FalseVal = SI->getFalseValue(); 526 // Handle only when either of operands of select instruction is a PHI 527 // node for now. 528 if ((isa<PHINode>(*TrueVal) && isa<PHINode>(*FalseVal)) || 529 (!isa<PHINode>(*TrueVal) && !isa<PHINode>(*FalseVal))) 530 return InstDesc(false, I); 531 532 Instruction *I1 = 533 isa<PHINode>(*TrueVal) ? dyn_cast<Instruction>(FalseVal) 534 : dyn_cast<Instruction>(TrueVal); 535 if (!I1 || !I1->isBinaryOp()) 536 return InstDesc(false, I); 537 538 Value *Op1, *Op2; 539 if ((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) || 540 m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) && 541 I1->isFast()) 542 return InstDesc(Kind == RK_FloatAdd, SI); 543 544 if (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast())) 545 return InstDesc(Kind == RK_FloatMult, SI); 546 547 return InstDesc(false, I); 548 } 549 550 RecurrenceDescriptor::InstDesc 551 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, 552 InstDesc &Prev, bool HasFunNoNaNAttr) { 553 bool FP = I->getType()->isFloatingPointTy(); 554 Instruction *UAI = Prev.getUnsafeAlgebraInst(); 555 if (!UAI && FP && !I->isFast()) 556 UAI = I; // Found an unsafe (unvectorizable) algebra instruction. 557 558 switch (I->getOpcode()) { 559 default: 560 return InstDesc(false, I); 561 case Instruction::PHI: 562 return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); 563 case Instruction::Sub: 564 case Instruction::Add: 565 return InstDesc(Kind == RK_IntegerAdd, I); 566 case Instruction::Mul: 567 return InstDesc(Kind == RK_IntegerMult, I); 568 case Instruction::And: 569 return InstDesc(Kind == RK_IntegerAnd, I); 570 case Instruction::Or: 571 return InstDesc(Kind == RK_IntegerOr, I); 572 case Instruction::Xor: 573 return InstDesc(Kind == RK_IntegerXor, I); 574 case Instruction::FMul: 575 return InstDesc(Kind == RK_FloatMult, I, UAI); 576 case Instruction::FSub: 577 case Instruction::FAdd: 578 return InstDesc(Kind == RK_FloatAdd, I, UAI); 579 case Instruction::Select: 580 if (Kind == RK_FloatAdd || Kind == RK_FloatMult) 581 return isConditionalRdxPattern(Kind, I); 582 LLVM_FALLTHROUGH; 583 case Instruction::FCmp: 584 case Instruction::ICmp: 585 if (Kind != RK_IntegerMinMax && 586 (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) 587 return InstDesc(false, I); 588 return isMinMaxSelectCmpPattern(I, Prev); 589 } 590 } 591 592 bool RecurrenceDescriptor::hasMultipleUsesOf( 593 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts, 594 unsigned MaxNumUses) { 595 unsigned NumUses = 0; 596 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; 597 ++Use) { 598 if (Insts.count(dyn_cast<Instruction>(*Use))) 599 ++NumUses; 600 if (NumUses > MaxNumUses) 601 return true; 602 } 603 604 return false; 605 } 606 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, 607 RecurrenceDescriptor &RedDes, 608 DemandedBits *DB, AssumptionCache *AC, 609 DominatorTree *DT) { 610 611 BasicBlock *Header = TheLoop->getHeader(); 612 Function &F = *Header->getParent(); 613 bool HasFunNoNaNAttr = 614 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 615 616 if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 617 AC, DT)) { 618 LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); 619 return true; 620 } 621 if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, 622 AC, DT)) { 623 LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); 624 return true; 625 } 626 if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB, 627 AC, DT)) { 628 LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); 629 return true; 630 } 631 if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 632 AC, DT)) { 633 LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); 634 return true; 635 } 636 if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB, 637 AC, DT)) { 638 LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); 639 return true; 640 } 641 if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes, 642 DB, AC, DT)) { 643 LLVM_DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); 644 return true; 645 } 646 if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, 647 AC, DT)) { 648 LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); 649 return true; 650 } 651 if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 652 AC, DT)) { 653 LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); 654 return true; 655 } 656 if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB, 657 AC, DT)) { 658 LLVM_DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi 659 << "\n"); 660 return true; 661 } 662 // Not a reduction of known type. 663 return false; 664 } 665 666 bool RecurrenceDescriptor::isFirstOrderRecurrence( 667 PHINode *Phi, Loop *TheLoop, 668 DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) { 669 670 // Ensure the phi node is in the loop header and has two incoming values. 671 if (Phi->getParent() != TheLoop->getHeader() || 672 Phi->getNumIncomingValues() != 2) 673 return false; 674 675 // Ensure the loop has a preheader and a single latch block. The loop 676 // vectorizer will need the latch to set up the next iteration of the loop. 677 auto *Preheader = TheLoop->getLoopPreheader(); 678 auto *Latch = TheLoop->getLoopLatch(); 679 if (!Preheader || !Latch) 680 return false; 681 682 // Ensure the phi node's incoming blocks are the loop preheader and latch. 683 if (Phi->getBasicBlockIndex(Preheader) < 0 || 684 Phi->getBasicBlockIndex(Latch) < 0) 685 return false; 686 687 // Get the previous value. The previous value comes from the latch edge while 688 // the initial value comes form the preheader edge. 689 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); 690 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) || 691 SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. 692 return false; 693 694 // Ensure every user of the phi node is dominated by the previous value. 695 // The dominance requirement ensures the loop vectorizer will not need to 696 // vectorize the initial value prior to the first iteration of the loop. 697 // TODO: Consider extending this sinking to handle other kinds of instructions 698 // and expressions, beyond sinking a single cast past Previous. 699 if (Phi->hasOneUse()) { 700 auto *I = Phi->user_back(); 701 if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() && 702 DT->dominates(Previous, I->user_back())) { 703 if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking. 704 SinkAfter[I] = Previous; 705 return true; 706 } 707 } 708 709 for (User *U : Phi->users()) 710 if (auto *I = dyn_cast<Instruction>(U)) { 711 if (!DT->dominates(Previous, I)) 712 return false; 713 } 714 715 return true; 716 } 717 718 /// This function returns the identity element (or neutral element) for 719 /// the operation K. 720 Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, 721 Type *Tp) { 722 switch (K) { 723 case RK_IntegerXor: 724 case RK_IntegerAdd: 725 case RK_IntegerOr: 726 // Adding, Xoring, Oring zero to a number does not change it. 727 return ConstantInt::get(Tp, 0); 728 case RK_IntegerMult: 729 // Multiplying a number by 1 does not change it. 730 return ConstantInt::get(Tp, 1); 731 case RK_IntegerAnd: 732 // AND-ing a number with an all-1 value does not change it. 733 return ConstantInt::get(Tp, -1, true); 734 case RK_FloatMult: 735 // Multiplying a number by 1 does not change it. 736 return ConstantFP::get(Tp, 1.0L); 737 case RK_FloatAdd: 738 // Adding zero to a number does not change it. 739 return ConstantFP::get(Tp, 0.0L); 740 default: 741 llvm_unreachable("Unknown recurrence kind"); 742 } 743 } 744 745 /// This function translates the recurrence kind to an LLVM binary operator. 746 unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { 747 switch (Kind) { 748 case RK_IntegerAdd: 749 return Instruction::Add; 750 case RK_IntegerMult: 751 return Instruction::Mul; 752 case RK_IntegerOr: 753 return Instruction::Or; 754 case RK_IntegerAnd: 755 return Instruction::And; 756 case RK_IntegerXor: 757 return Instruction::Xor; 758 case RK_FloatMult: 759 return Instruction::FMul; 760 case RK_FloatAdd: 761 return Instruction::FAdd; 762 case RK_IntegerMinMax: 763 return Instruction::ICmp; 764 case RK_FloatMinMax: 765 return Instruction::FCmp; 766 default: 767 llvm_unreachable("Unknown recurrence operation"); 768 } 769 } 770 771 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, 772 const SCEV *Step, BinaryOperator *BOp, 773 SmallVectorImpl<Instruction *> *Casts) 774 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { 775 assert(IK != IK_NoInduction && "Not an induction"); 776 777 // Start value type should match the induction kind and the value 778 // itself should not be null. 779 assert(StartValue && "StartValue is null"); 780 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && 781 "StartValue is not a pointer for pointer induction"); 782 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && 783 "StartValue is not an integer for integer induction"); 784 785 // Check the Step Value. It should be non-zero integer value. 786 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && 787 "Step value is zero"); 788 789 assert((IK != IK_PtrInduction || getConstIntStepValue()) && 790 "Step value should be constant for pointer induction"); 791 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && 792 "StepValue is not an integer"); 793 794 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && 795 "StepValue is not FP for FpInduction"); 796 assert((IK != IK_FpInduction || 797 (InductionBinOp && 798 (InductionBinOp->getOpcode() == Instruction::FAdd || 799 InductionBinOp->getOpcode() == Instruction::FSub))) && 800 "Binary opcode should be specified for FP induction"); 801 802 if (Casts) { 803 for (auto &Inst : *Casts) { 804 RedundantCasts.push_back(Inst); 805 } 806 } 807 } 808 809 int InductionDescriptor::getConsecutiveDirection() const { 810 ConstantInt *ConstStep = getConstIntStepValue(); 811 if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) 812 return ConstStep->getSExtValue(); 813 return 0; 814 } 815 816 ConstantInt *InductionDescriptor::getConstIntStepValue() const { 817 if (isa<SCEVConstant>(Step)) 818 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); 819 return nullptr; 820 } 821 822 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, 823 ScalarEvolution *SE, 824 InductionDescriptor &D) { 825 826 // Here we only handle FP induction variables. 827 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); 828 829 if (TheLoop->getHeader() != Phi->getParent()) 830 return false; 831 832 // The loop may have multiple entrances or multiple exits; we can analyze 833 // this phi if it has a unique entry value and a unique backedge value. 834 if (Phi->getNumIncomingValues() != 2) 835 return false; 836 Value *BEValue = nullptr, *StartValue = nullptr; 837 if (TheLoop->contains(Phi->getIncomingBlock(0))) { 838 BEValue = Phi->getIncomingValue(0); 839 StartValue = Phi->getIncomingValue(1); 840 } else { 841 assert(TheLoop->contains(Phi->getIncomingBlock(1)) && 842 "Unexpected Phi node in the loop"); 843 BEValue = Phi->getIncomingValue(1); 844 StartValue = Phi->getIncomingValue(0); 845 } 846 847 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); 848 if (!BOp) 849 return false; 850 851 Value *Addend = nullptr; 852 if (BOp->getOpcode() == Instruction::FAdd) { 853 if (BOp->getOperand(0) == Phi) 854 Addend = BOp->getOperand(1); 855 else if (BOp->getOperand(1) == Phi) 856 Addend = BOp->getOperand(0); 857 } else if (BOp->getOpcode() == Instruction::FSub) 858 if (BOp->getOperand(0) == Phi) 859 Addend = BOp->getOperand(1); 860 861 if (!Addend) 862 return false; 863 864 // The addend should be loop invariant 865 if (auto *I = dyn_cast<Instruction>(Addend)) 866 if (TheLoop->contains(I)) 867 return false; 868 869 // FP Step has unknown SCEV 870 const SCEV *Step = SE->getUnknown(Addend); 871 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); 872 return true; 873 } 874 875 /// This function is called when we suspect that the update-chain of a phi node 876 /// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, 877 /// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime 878 /// predicate P under which the SCEV expression for the phi can be the 879 /// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the 880 /// cast instructions that are involved in the update-chain of this induction. 881 /// A caller that adds the required runtime predicate can be free to drop these 882 /// cast instructions, and compute the phi using \p AR (instead of some scev 883 /// expression with casts). 884 /// 885 /// For example, without a predicate the scev expression can take the following 886 /// form: 887 /// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) 888 /// 889 /// It corresponds to the following IR sequence: 890 /// %for.body: 891 /// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] 892 /// %casted_phi = "ExtTrunc i64 %x" 893 /// %add = add i64 %casted_phi, %step 894 /// 895 /// where %x is given in \p PN, 896 /// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate, 897 /// and the IR sequence that "ExtTrunc i64 %x" represents can take one of 898 /// several forms, for example, such as: 899 /// ExtTrunc1: %casted_phi = and %x, 2^n-1 900 /// or: 901 /// ExtTrunc2: %t = shl %x, m 902 /// %casted_phi = ashr %t, m 903 /// 904 /// If we are able to find such sequence, we return the instructions 905 /// we found, namely %casted_phi and the instructions on its use-def chain up 906 /// to the phi (not including the phi). 907 static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, 908 const SCEVUnknown *PhiScev, 909 const SCEVAddRecExpr *AR, 910 SmallVectorImpl<Instruction *> &CastInsts) { 911 912 assert(CastInsts.empty() && "CastInsts is expected to be empty."); 913 auto *PN = cast<PHINode>(PhiScev->getValue()); 914 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); 915 const Loop *L = AR->getLoop(); 916 917 // Find any cast instructions that participate in the def-use chain of 918 // PhiScev in the loop. 919 // FORNOW/TODO: We currently expect the def-use chain to include only 920 // two-operand instructions, where one of the operands is an invariant. 921 // createAddRecFromPHIWithCasts() currently does not support anything more 922 // involved than that, so we keep the search simple. This can be 923 // extended/generalized as needed. 924 925 auto getDef = [&](const Value *Val) -> Value * { 926 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val); 927 if (!BinOp) 928 return nullptr; 929 Value *Op0 = BinOp->getOperand(0); 930 Value *Op1 = BinOp->getOperand(1); 931 Value *Def = nullptr; 932 if (L->isLoopInvariant(Op0)) 933 Def = Op1; 934 else if (L->isLoopInvariant(Op1)) 935 Def = Op0; 936 return Def; 937 }; 938 939 // Look for the instruction that defines the induction via the 940 // loop backedge. 941 BasicBlock *Latch = L->getLoopLatch(); 942 if (!Latch) 943 return false; 944 Value *Val = PN->getIncomingValueForBlock(Latch); 945 if (!Val) 946 return false; 947 948 // Follow the def-use chain until the induction phi is reached. 949 // If on the way we encounter a Value that has the same SCEV Expr as the 950 // phi node, we can consider the instructions we visit from that point 951 // as part of the cast-sequence that can be ignored. 952 bool InCastSequence = false; 953 auto *Inst = dyn_cast<Instruction>(Val); 954 while (Val != PN) { 955 // If we encountered a phi node other than PN, or if we left the loop, 956 // we bail out. 957 if (!Inst || !L->contains(Inst)) { 958 return false; 959 } 960 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val)); 961 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR)) 962 InCastSequence = true; 963 if (InCastSequence) { 964 // Only the last instruction in the cast sequence is expected to have 965 // uses outside the induction def-use chain. 966 if (!CastInsts.empty()) 967 if (!Inst->hasOneUse()) 968 return false; 969 CastInsts.push_back(Inst); 970 } 971 Val = getDef(Val); 972 if (!Val) 973 return false; 974 Inst = dyn_cast<Instruction>(Val); 975 } 976 977 return InCastSequence; 978 } 979 980 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 981 PredicatedScalarEvolution &PSE, 982 InductionDescriptor &D, bool Assume) { 983 Type *PhiTy = Phi->getType(); 984 985 // Handle integer and pointer inductions variables. 986 // Now we handle also FP induction but not trying to make a 987 // recurrent expression from the PHI node in-place. 988 989 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() && 990 !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) 991 return false; 992 993 if (PhiTy->isFloatingPointTy()) 994 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); 995 996 const SCEV *PhiScev = PSE.getSCEV(Phi); 997 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 998 999 // We need this expression to be an AddRecExpr. 1000 if (Assume && !AR) 1001 AR = PSE.getAsAddRec(Phi); 1002 1003 if (!AR) { 1004 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1005 return false; 1006 } 1007 1008 // Record any Cast instructions that participate in the induction update 1009 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev); 1010 // If we started from an UnknownSCEV, and managed to build an addRecurrence 1011 // only after enabling Assume with PSCEV, this means we may have encountered 1012 // cast instructions that required adding a runtime check in order to 1013 // guarantee the correctness of the AddRecurence respresentation of the 1014 // induction. 1015 if (PhiScev != AR && SymbolicPhi) { 1016 SmallVector<Instruction *, 2> Casts; 1017 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts)) 1018 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts); 1019 } 1020 1021 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); 1022 } 1023 1024 bool InductionDescriptor::isInductionPHI( 1025 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, 1026 InductionDescriptor &D, const SCEV *Expr, 1027 SmallVectorImpl<Instruction *> *CastsToIgnore) { 1028 Type *PhiTy = Phi->getType(); 1029 // We only handle integer and pointer inductions variables. 1030 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) 1031 return false; 1032 1033 // Check that the PHI is consecutive. 1034 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); 1035 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1036 1037 if (!AR) { 1038 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1039 return false; 1040 } 1041 1042 if (AR->getLoop() != TheLoop) { 1043 // FIXME: We should treat this as a uniform. Unfortunately, we 1044 // don't currently know how to handled uniform PHIs. 1045 LLVM_DEBUG( 1046 dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); 1047 return false; 1048 } 1049 1050 Value *StartValue = 1051 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); 1052 const SCEV *Step = AR->getStepRecurrence(*SE); 1053 // Calculate the pointer stride and check if it is consecutive. 1054 // The stride may be a constant or a loop invariant integer value. 1055 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); 1056 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) 1057 return false; 1058 1059 if (PhiTy->isIntegerTy()) { 1060 D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/nullptr, 1061 CastsToIgnore); 1062 return true; 1063 } 1064 1065 assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); 1066 // Pointer induction should be a constant. 1067 if (!ConstStep) 1068 return false; 1069 1070 ConstantInt *CV = ConstStep->getValue(); 1071 Type *PointerElementType = PhiTy->getPointerElementType(); 1072 // The pointer stride cannot be determined if the pointer element type is not 1073 // sized. 1074 if (!PointerElementType->isSized()) 1075 return false; 1076 1077 const DataLayout &DL = Phi->getModule()->getDataLayout(); 1078 int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); 1079 if (!Size) 1080 return false; 1081 1082 int64_t CVSize = CV->getSExtValue(); 1083 if (CVSize % Size) 1084 return false; 1085 auto *StepValue = 1086 SE->getConstant(CV->getType(), CVSize / Size, true /* signed */); 1087 D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); 1088 return true; 1089 } 1090