1 //===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file "describes" induction and recurrence variables. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Analysis/IVDescriptors.h" 14 #include "llvm/ADT/ScopeExit.h" 15 #include "llvm/Analysis/BasicAliasAnalysis.h" 16 #include "llvm/Analysis/DemandedBits.h" 17 #include "llvm/Analysis/DomTreeUpdater.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/InstructionSimplify.h" 20 #include "llvm/Analysis/LoopInfo.h" 21 #include "llvm/Analysis/LoopPass.h" 22 #include "llvm/Analysis/MustExecute.h" 23 #include "llvm/Analysis/ScalarEvolution.h" 24 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 25 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 26 #include "llvm/Analysis/TargetTransformInfo.h" 27 #include "llvm/Analysis/ValueTracking.h" 28 #include "llvm/IR/Dominators.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/Module.h" 31 #include "llvm/IR/PatternMatch.h" 32 #include "llvm/IR/ValueHandle.h" 33 #include "llvm/Pass.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/KnownBits.h" 36 37 #include <set> 38 39 using namespace llvm; 40 using namespace llvm::PatternMatch; 41 42 #define DEBUG_TYPE "iv-descriptors" 43 44 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, 45 SmallPtrSetImpl<Instruction *> &Set) { 46 for (const Use &Use : I->operands()) 47 if (!Set.count(dyn_cast<Instruction>(Use))) 48 return false; 49 return true; 50 } 51 52 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) { 53 switch (Kind) { 54 default: 55 break; 56 case RecurKind::Add: 57 case RecurKind::Mul: 58 case RecurKind::Or: 59 case RecurKind::And: 60 case RecurKind::Xor: 61 case RecurKind::SMax: 62 case RecurKind::SMin: 63 case RecurKind::UMax: 64 case RecurKind::UMin: 65 case RecurKind::SelectICmp: 66 case RecurKind::SelectFCmp: 67 return true; 68 } 69 return false; 70 } 71 72 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurKind Kind) { 73 return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind); 74 } 75 76 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurKind Kind) { 77 switch (Kind) { 78 default: 79 break; 80 case RecurKind::Add: 81 case RecurKind::Mul: 82 case RecurKind::FAdd: 83 case RecurKind::FMul: 84 case RecurKind::FMulAdd: 85 return true; 86 } 87 return false; 88 } 89 90 /// Determines if Phi may have been type-promoted. If Phi has a single user 91 /// that ANDs the Phi with a type mask, return the user. RT is updated to 92 /// account for the narrower bit width represented by the mask, and the AND 93 /// instruction is added to CI. 94 static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, 95 SmallPtrSetImpl<Instruction *> &Visited, 96 SmallPtrSetImpl<Instruction *> &CI) { 97 if (!Phi->hasOneUse()) 98 return Phi; 99 100 const APInt *M = nullptr; 101 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); 102 103 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT 104 // with a new integer type of the corresponding bit width. 105 if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { 106 int32_t Bits = (*M + 1).exactLogBase2(); 107 if (Bits > 0) { 108 RT = IntegerType::get(Phi->getContext(), Bits); 109 Visited.insert(Phi); 110 CI.insert(J); 111 return J; 112 } 113 } 114 return Phi; 115 } 116 117 /// Compute the minimal bit width needed to represent a reduction whose exit 118 /// instruction is given by Exit. 119 static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit, 120 DemandedBits *DB, 121 AssumptionCache *AC, 122 DominatorTree *DT) { 123 bool IsSigned = false; 124 const DataLayout &DL = Exit->getModule()->getDataLayout(); 125 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType()); 126 127 if (DB) { 128 // Use the demanded bits analysis to determine the bits that are live out 129 // of the exit instruction, rounding up to the nearest power of two. If the 130 // use of demanded bits results in a smaller bit width, we know the value 131 // must be positive (i.e., IsSigned = false), because if this were not the 132 // case, the sign bit would have been demanded. 133 auto Mask = DB->getDemandedBits(Exit); 134 MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros(); 135 } 136 137 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) { 138 // If demanded bits wasn't able to limit the bit width, we can try to use 139 // value tracking instead. This can be the case, for example, if the value 140 // may be negative. 141 auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT); 142 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType()); 143 MaxBitWidth = NumTypeBits - NumSignBits; 144 KnownBits Bits = computeKnownBits(Exit, DL); 145 if (!Bits.isNonNegative()) { 146 // If the value is not known to be non-negative, we set IsSigned to true, 147 // meaning that we will use sext instructions instead of zext 148 // instructions to restore the original type. 149 IsSigned = true; 150 // Make sure at at least one sign bit is included in the result, so it 151 // will get properly sign-extended. 152 ++MaxBitWidth; 153 } 154 } 155 if (!isPowerOf2_64(MaxBitWidth)) 156 MaxBitWidth = NextPowerOf2(MaxBitWidth); 157 158 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth), 159 IsSigned); 160 } 161 162 /// Collect cast instructions that can be ignored in the vectorizer's cost 163 /// model, given a reduction exit value and the minimal type in which the 164 // reduction can be represented. Also search casts to the recurrence type 165 // to find the minimum width used by the recurrence. 166 static void collectCastInstrs(Loop *TheLoop, Instruction *Exit, 167 Type *RecurrenceType, 168 SmallPtrSetImpl<Instruction *> &Casts, 169 unsigned &MinWidthCastToRecurTy) { 170 171 SmallVector<Instruction *, 8> Worklist; 172 SmallPtrSet<Instruction *, 8> Visited; 173 Worklist.push_back(Exit); 174 MinWidthCastToRecurTy = -1U; 175 176 while (!Worklist.empty()) { 177 Instruction *Val = Worklist.pop_back_val(); 178 Visited.insert(Val); 179 if (auto *Cast = dyn_cast<CastInst>(Val)) { 180 if (Cast->getSrcTy() == RecurrenceType) { 181 // If the source type of a cast instruction is equal to the recurrence 182 // type, it will be eliminated, and should be ignored in the vectorizer 183 // cost model. 184 Casts.insert(Cast); 185 continue; 186 } 187 if (Cast->getDestTy() == RecurrenceType) { 188 // The minimum width used by the recurrence is found by checking for 189 // casts on its operands. The minimum width is used by the vectorizer 190 // when finding the widest type for in-loop reductions without any 191 // loads/stores. 192 MinWidthCastToRecurTy = std::min<unsigned>( 193 MinWidthCastToRecurTy, Cast->getSrcTy()->getScalarSizeInBits()); 194 continue; 195 } 196 } 197 // Add all operands to the work list if they are loop-varying values that 198 // we haven't yet visited. 199 for (Value *O : cast<User>(Val)->operands()) 200 if (auto *I = dyn_cast<Instruction>(O)) 201 if (TheLoop->contains(I) && !Visited.count(I)) 202 Worklist.push_back(I); 203 } 204 } 205 206 // Check if a given Phi node can be recognized as an ordered reduction for 207 // vectorizing floating point operations without unsafe math. 208 static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, 209 Instruction *Exit, PHINode *Phi) { 210 // Currently only FAdd and FMulAdd are supported. 211 if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd) 212 return false; 213 214 if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd) 215 return false; 216 217 if (Kind == RecurKind::FMulAdd && 218 !RecurrenceDescriptor::isFMulAddIntrinsic(Exit)) 219 return false; 220 221 // Ensure the exit instruction has only one user other than the reduction PHI 222 if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3)) 223 return false; 224 225 // The only pattern accepted is the one in which the reduction PHI 226 // is used as one of the operands of the exit instruction 227 auto *Op0 = Exit->getOperand(0); 228 auto *Op1 = Exit->getOperand(1); 229 if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi) 230 return false; 231 if (Kind == RecurKind::FMulAdd && Exit->getOperand(2) != Phi) 232 return false; 233 234 LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi 235 << ", ExitInst: " << *Exit << "\n"); 236 237 return true; 238 } 239 240 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, 241 Loop *TheLoop, FastMathFlags FuncFMF, 242 RecurrenceDescriptor &RedDes, 243 DemandedBits *DB, 244 AssumptionCache *AC, 245 DominatorTree *DT) { 246 if (Phi->getNumIncomingValues() != 2) 247 return false; 248 249 // Reduction variables are only found in the loop header block. 250 if (Phi->getParent() != TheLoop->getHeader()) 251 return false; 252 253 // Obtain the reduction start value from the value that comes from the loop 254 // preheader. 255 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); 256 257 // ExitInstruction is the single value which is used outside the loop. 258 // We only allow for a single reduction value to be used outside the loop. 259 // This includes users of the reduction, variables (which form a cycle 260 // which ends in the phi node). 261 Instruction *ExitInstruction = nullptr; 262 // Indicates that we found a reduction operation in our scan. 263 bool FoundReduxOp = false; 264 265 // We start with the PHI node and scan for all of the users of this 266 // instruction. All users must be instructions that can be used as reduction 267 // variables (such as ADD). We must have a single out-of-block user. The cycle 268 // must include the original PHI. 269 bool FoundStartPHI = false; 270 271 // To recognize min/max patterns formed by a icmp select sequence, we store 272 // the number of instruction we saw from the recognized min/max pattern, 273 // to make sure we only see exactly the two instructions. 274 unsigned NumCmpSelectPatternInst = 0; 275 InstDesc ReduxDesc(false, nullptr); 276 277 // Data used for determining if the recurrence has been type-promoted. 278 Type *RecurrenceType = Phi->getType(); 279 SmallPtrSet<Instruction *, 4> CastInsts; 280 unsigned MinWidthCastToRecurrenceType; 281 Instruction *Start = Phi; 282 bool IsSigned = false; 283 284 SmallPtrSet<Instruction *, 8> VisitedInsts; 285 SmallVector<Instruction *, 8> Worklist; 286 287 // Return early if the recurrence kind does not match the type of Phi. If the 288 // recurrence kind is arithmetic, we attempt to look through AND operations 289 // resulting from the type promotion performed by InstCombine. Vector 290 // operations are not limited to the legal integer widths, so we may be able 291 // to evaluate the reduction in the narrower width. 292 if (RecurrenceType->isFloatingPointTy()) { 293 if (!isFloatingPointRecurrenceKind(Kind)) 294 return false; 295 } else if (RecurrenceType->isIntegerTy()) { 296 if (!isIntegerRecurrenceKind(Kind)) 297 return false; 298 if (!isMinMaxRecurrenceKind(Kind)) 299 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); 300 } else { 301 // Pointer min/max may exist, but it is not supported as a reduction op. 302 return false; 303 } 304 305 Worklist.push_back(Start); 306 VisitedInsts.insert(Start); 307 308 // Start with all flags set because we will intersect this with the reduction 309 // flags from all the reduction operations. 310 FastMathFlags FMF = FastMathFlags::getFast(); 311 312 // The first instruction in the use-def chain of the Phi node that requires 313 // exact floating point operations. 314 Instruction *ExactFPMathInst = nullptr; 315 316 // A value in the reduction can be used: 317 // - By the reduction: 318 // - Reduction operation: 319 // - One use of reduction value (safe). 320 // - Multiple use of reduction value (not safe). 321 // - PHI: 322 // - All uses of the PHI must be the reduction (safe). 323 // - Otherwise, not safe. 324 // - By instructions outside of the loop (safe). 325 // * One value may have several outside users, but all outside 326 // uses must be of the same value. 327 // - By an instruction that is not part of the reduction (not safe). 328 // This is either: 329 // * An instruction type other than PHI or the reduction operation. 330 // * A PHI in the header other than the initial PHI. 331 while (!Worklist.empty()) { 332 Instruction *Cur = Worklist.pop_back_val(); 333 334 // No Users. 335 // If the instruction has no users then this is a broken chain and can't be 336 // a reduction variable. 337 if (Cur->use_empty()) 338 return false; 339 340 bool IsAPhi = isa<PHINode>(Cur); 341 342 // A header PHI use other than the original PHI. 343 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) 344 return false; 345 346 // Reductions of instructions such as Div, and Sub is only possible if the 347 // LHS is the reduction variable. 348 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && 349 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && 350 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) 351 return false; 352 353 // Any reduction instruction must be of one of the allowed kinds. We ignore 354 // the starting value (the Phi or an AND instruction if the Phi has been 355 // type-promoted). 356 if (Cur != Start) { 357 ReduxDesc = 358 isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF); 359 ExactFPMathInst = ExactFPMathInst == nullptr 360 ? ReduxDesc.getExactFPMathInst() 361 : ExactFPMathInst; 362 if (!ReduxDesc.isRecurrence()) 363 return false; 364 // FIXME: FMF is allowed on phi, but propagation is not handled correctly. 365 if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi) { 366 FastMathFlags CurFMF = ReduxDesc.getPatternInst()->getFastMathFlags(); 367 if (auto *Sel = dyn_cast<SelectInst>(ReduxDesc.getPatternInst())) { 368 // Accept FMF on either fcmp or select of a min/max idiom. 369 // TODO: This is a hack to work-around the fact that FMF may not be 370 // assigned/propagated correctly. If that problem is fixed or we 371 // standardize on fmin/fmax via intrinsics, this can be removed. 372 if (auto *FCmp = dyn_cast<FCmpInst>(Sel->getCondition())) 373 CurFMF |= FCmp->getFastMathFlags(); 374 } 375 FMF &= CurFMF; 376 } 377 // Update this reduction kind if we matched a new instruction. 378 // TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind' 379 // state accurate while processing the worklist? 380 if (ReduxDesc.getRecKind() != RecurKind::None) 381 Kind = ReduxDesc.getRecKind(); 382 } 383 384 bool IsASelect = isa<SelectInst>(Cur); 385 386 // A conditional reduction operation must only have 2 or less uses in 387 // VisitedInsts. 388 if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) && 389 hasMultipleUsesOf(Cur, VisitedInsts, 2)) 390 return false; 391 392 // A reduction operation must only have one use of the reduction value. 393 if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) && 394 !isSelectCmpRecurrenceKind(Kind) && 395 hasMultipleUsesOf(Cur, VisitedInsts, 1)) 396 return false; 397 398 // All inputs to a PHI node must be a reduction value. 399 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) 400 return false; 401 402 if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectICmp) && 403 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) 404 ++NumCmpSelectPatternInst; 405 if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectFCmp) && 406 (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) 407 ++NumCmpSelectPatternInst; 408 409 // Check whether we found a reduction operator. 410 FoundReduxOp |= !IsAPhi && Cur != Start; 411 412 // Process users of current instruction. Push non-PHI nodes after PHI nodes 413 // onto the stack. This way we are going to have seen all inputs to PHI 414 // nodes once we get to them. 415 SmallVector<Instruction *, 8> NonPHIs; 416 SmallVector<Instruction *, 8> PHIs; 417 for (User *U : Cur->users()) { 418 Instruction *UI = cast<Instruction>(U); 419 420 // If the user is a call to llvm.fmuladd then the instruction can only be 421 // the final operand. 422 if (isFMulAddIntrinsic(UI)) 423 if (Cur == UI->getOperand(0) || Cur == UI->getOperand(1)) 424 return false; 425 426 // Check if we found the exit user. 427 BasicBlock *Parent = UI->getParent(); 428 if (!TheLoop->contains(Parent)) { 429 // If we already know this instruction is used externally, move on to 430 // the next user. 431 if (ExitInstruction == Cur) 432 continue; 433 434 // Exit if you find multiple values used outside or if the header phi 435 // node is being used. In this case the user uses the value of the 436 // previous iteration, in which case we would loose "VF-1" iterations of 437 // the reduction operation if we vectorize. 438 if (ExitInstruction != nullptr || Cur == Phi) 439 return false; 440 441 // The instruction used by an outside user must be the last instruction 442 // before we feed back to the reduction phi. Otherwise, we loose VF-1 443 // operations on the value. 444 if (!is_contained(Phi->operands(), Cur)) 445 return false; 446 447 ExitInstruction = Cur; 448 continue; 449 } 450 451 // Process instructions only once (termination). Each reduction cycle 452 // value must only be used once, except by phi nodes and min/max 453 // reductions which are represented as a cmp followed by a select. 454 InstDesc IgnoredVal(false, nullptr); 455 if (VisitedInsts.insert(UI).second) { 456 if (isa<PHINode>(UI)) 457 PHIs.push_back(UI); 458 else 459 NonPHIs.push_back(UI); 460 } else if (!isa<PHINode>(UI) && 461 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && 462 !isa<SelectInst>(UI)) || 463 (!isConditionalRdxPattern(Kind, UI).isRecurrence() && 464 !isSelectCmpPattern(TheLoop, Phi, UI, IgnoredVal) 465 .isRecurrence() && 466 !isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence()))) 467 return false; 468 469 // Remember that we completed the cycle. 470 if (UI == Phi) 471 FoundStartPHI = true; 472 } 473 Worklist.append(PHIs.begin(), PHIs.end()); 474 Worklist.append(NonPHIs.begin(), NonPHIs.end()); 475 } 476 477 // This means we have seen one but not the other instruction of the 478 // pattern or more than just a select and cmp. Zero implies that we saw a 479 // llvm.min/max instrinsic, which is always OK. 480 if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 && 481 NumCmpSelectPatternInst != 0) 482 return false; 483 484 if (isSelectCmpRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1) 485 return false; 486 487 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) 488 return false; 489 490 const bool IsOrdered = 491 checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi); 492 493 if (Start != Phi) { 494 // If the starting value is not the same as the phi node, we speculatively 495 // looked through an 'and' instruction when evaluating a potential 496 // arithmetic reduction to determine if it may have been type-promoted. 497 // 498 // We now compute the minimal bit width that is required to represent the 499 // reduction. If this is the same width that was indicated by the 'and', we 500 // can represent the reduction in the smaller type. The 'and' instruction 501 // will be eliminated since it will essentially be a cast instruction that 502 // can be ignore in the cost model. If we compute a different type than we 503 // did when evaluating the 'and', the 'and' will not be eliminated, and we 504 // will end up with different kinds of operations in the recurrence 505 // expression (e.g., IntegerAND, IntegerADD). We give up if this is 506 // the case. 507 // 508 // The vectorizer relies on InstCombine to perform the actual 509 // type-shrinking. It does this by inserting instructions to truncate the 510 // exit value of the reduction to the width indicated by RecurrenceType and 511 // then extend this value back to the original width. If IsSigned is false, 512 // a 'zext' instruction will be generated; otherwise, a 'sext' will be 513 // used. 514 // 515 // TODO: We should not rely on InstCombine to rewrite the reduction in the 516 // smaller type. We should just generate a correctly typed expression 517 // to begin with. 518 Type *ComputedType; 519 std::tie(ComputedType, IsSigned) = 520 computeRecurrenceType(ExitInstruction, DB, AC, DT); 521 if (ComputedType != RecurrenceType) 522 return false; 523 } 524 525 // Collect cast instructions and the minimum width used by the recurrence. 526 // If the starting value is not the same as the phi node and the computed 527 // recurrence type is equal to the recurrence type, the recurrence expression 528 // will be represented in a narrower or wider type. If there are any cast 529 // instructions that will be unnecessary, collect them in CastsFromRecurTy. 530 // Note that the 'and' instruction was already included in this list. 531 // 532 // TODO: A better way to represent this may be to tag in some way all the 533 // instructions that are a part of the reduction. The vectorizer cost 534 // model could then apply the recurrence type to these instructions, 535 // without needing a white list of instructions to ignore. 536 // This may also be useful for the inloop reductions, if it can be 537 // kept simple enough. 538 collectCastInstrs(TheLoop, ExitInstruction, RecurrenceType, CastInsts, 539 MinWidthCastToRecurrenceType); 540 541 // We found a reduction var if we have reached the original phi node and we 542 // only have a single instruction with out-of-loop users. 543 544 // The ExitInstruction(Instruction which is allowed to have out-of-loop users) 545 // is saved as part of the RecurrenceDescriptor. 546 547 // Save the description of this reduction variable. 548 RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, ExactFPMathInst, 549 RecurrenceType, IsSigned, IsOrdered, CastInsts, 550 MinWidthCastToRecurrenceType); 551 RedDes = RD; 552 553 return true; 554 } 555 556 // We are looking for loops that do something like this: 557 // int r = 0; 558 // for (int i = 0; i < n; i++) { 559 // if (src[i] > 3) 560 // r = 3; 561 // } 562 // where the reduction value (r) only has two states, in this example 0 or 3. 563 // The generated LLVM IR for this type of loop will be like this: 564 // for.body: 565 // %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ] 566 // ... 567 // %cmp = icmp sgt i32 %5, 3 568 // %spec.select = select i1 %cmp, i32 3, i32 %r 569 // ... 570 // In general we can support vectorization of loops where 'r' flips between 571 // any two non-constants, provided they are loop invariant. The only thing 572 // we actually care about at the end of the loop is whether or not any lane 573 // in the selected vector is different from the start value. The final 574 // across-vector reduction after the loop simply involves choosing the start 575 // value if nothing changed (0 in the example above) or the other selected 576 // value (3 in the example above). 577 RecurrenceDescriptor::InstDesc 578 RecurrenceDescriptor::isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi, 579 Instruction *I, InstDesc &Prev) { 580 // We must handle the select(cmp(),x,y) as a single instruction. Advance to 581 // the select. 582 CmpInst::Predicate Pred; 583 if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) { 584 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin())) 585 return InstDesc(Select, Prev.getRecKind()); 586 } 587 588 // Only match select with single use cmp condition. 589 if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(), 590 m_Value()))) 591 return InstDesc(false, I); 592 593 SelectInst *SI = cast<SelectInst>(I); 594 Value *NonPhi = nullptr; 595 596 if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue())) 597 NonPhi = SI->getFalseValue(); 598 else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue())) 599 NonPhi = SI->getTrueValue(); 600 else 601 return InstDesc(false, I); 602 603 // We are looking for selects of the form: 604 // select(cmp(), phi, loop_invariant) or 605 // select(cmp(), loop_invariant, phi) 606 if (!Loop->isLoopInvariant(NonPhi)) 607 return InstDesc(false, I); 608 609 return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::SelectICmp 610 : RecurKind::SelectFCmp); 611 } 612 613 RecurrenceDescriptor::InstDesc 614 RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind, 615 const InstDesc &Prev) { 616 assert((isa<CmpInst>(I) || isa<SelectInst>(I) || isa<CallInst>(I)) && 617 "Expected a cmp or select or call instruction"); 618 if (!isMinMaxRecurrenceKind(Kind)) 619 return InstDesc(false, I); 620 621 // We must handle the select(cmp()) as a single instruction. Advance to the 622 // select. 623 CmpInst::Predicate Pred; 624 if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) { 625 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin())) 626 return InstDesc(Select, Prev.getRecKind()); 627 } 628 629 // Only match select with single use cmp condition, or a min/max intrinsic. 630 if (!isa<IntrinsicInst>(I) && 631 !match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(), 632 m_Value()))) 633 return InstDesc(false, I); 634 635 // Look for a min/max pattern. 636 if (match(I, m_UMin(m_Value(), m_Value()))) 637 return InstDesc(Kind == RecurKind::UMin, I); 638 if (match(I, m_UMax(m_Value(), m_Value()))) 639 return InstDesc(Kind == RecurKind::UMax, I); 640 if (match(I, m_SMax(m_Value(), m_Value()))) 641 return InstDesc(Kind == RecurKind::SMax, I); 642 if (match(I, m_SMin(m_Value(), m_Value()))) 643 return InstDesc(Kind == RecurKind::SMin, I); 644 if (match(I, m_OrdFMin(m_Value(), m_Value()))) 645 return InstDesc(Kind == RecurKind::FMin, I); 646 if (match(I, m_OrdFMax(m_Value(), m_Value()))) 647 return InstDesc(Kind == RecurKind::FMax, I); 648 if (match(I, m_UnordFMin(m_Value(), m_Value()))) 649 return InstDesc(Kind == RecurKind::FMin, I); 650 if (match(I, m_UnordFMax(m_Value(), m_Value()))) 651 return InstDesc(Kind == RecurKind::FMax, I); 652 if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value()))) 653 return InstDesc(Kind == RecurKind::FMin, I); 654 if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value()))) 655 return InstDesc(Kind == RecurKind::FMax, I); 656 657 return InstDesc(false, I); 658 } 659 660 /// Returns true if the select instruction has users in the compare-and-add 661 /// reduction pattern below. The select instruction argument is the last one 662 /// in the sequence. 663 /// 664 /// %sum.1 = phi ... 665 /// ... 666 /// %cmp = fcmp pred %0, %CFP 667 /// %add = fadd %0, %sum.1 668 /// %sum.2 = select %cmp, %add, %sum.1 669 RecurrenceDescriptor::InstDesc 670 RecurrenceDescriptor::isConditionalRdxPattern(RecurKind Kind, Instruction *I) { 671 SelectInst *SI = dyn_cast<SelectInst>(I); 672 if (!SI) 673 return InstDesc(false, I); 674 675 CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition()); 676 // Only handle single use cases for now. 677 if (!CI || !CI->hasOneUse()) 678 return InstDesc(false, I); 679 680 Value *TrueVal = SI->getTrueValue(); 681 Value *FalseVal = SI->getFalseValue(); 682 // Handle only when either of operands of select instruction is a PHI 683 // node for now. 684 if ((isa<PHINode>(*TrueVal) && isa<PHINode>(*FalseVal)) || 685 (!isa<PHINode>(*TrueVal) && !isa<PHINode>(*FalseVal))) 686 return InstDesc(false, I); 687 688 Instruction *I1 = 689 isa<PHINode>(*TrueVal) ? dyn_cast<Instruction>(FalseVal) 690 : dyn_cast<Instruction>(TrueVal); 691 if (!I1 || !I1->isBinaryOp()) 692 return InstDesc(false, I); 693 694 Value *Op1, *Op2; 695 if ((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) || 696 m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) && 697 I1->isFast()) 698 return InstDesc(Kind == RecurKind::FAdd, SI); 699 700 if (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast())) 701 return InstDesc(Kind == RecurKind::FMul, SI); 702 703 return InstDesc(false, I); 704 } 705 706 RecurrenceDescriptor::InstDesc 707 RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi, 708 Instruction *I, RecurKind Kind, 709 InstDesc &Prev, FastMathFlags FuncFMF) { 710 assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind); 711 switch (I->getOpcode()) { 712 default: 713 return InstDesc(false, I); 714 case Instruction::PHI: 715 return InstDesc(I, Prev.getRecKind(), Prev.getExactFPMathInst()); 716 case Instruction::Sub: 717 case Instruction::Add: 718 return InstDesc(Kind == RecurKind::Add, I); 719 case Instruction::Mul: 720 return InstDesc(Kind == RecurKind::Mul, I); 721 case Instruction::And: 722 return InstDesc(Kind == RecurKind::And, I); 723 case Instruction::Or: 724 return InstDesc(Kind == RecurKind::Or, I); 725 case Instruction::Xor: 726 return InstDesc(Kind == RecurKind::Xor, I); 727 case Instruction::FDiv: 728 case Instruction::FMul: 729 return InstDesc(Kind == RecurKind::FMul, I, 730 I->hasAllowReassoc() ? nullptr : I); 731 case Instruction::FSub: 732 case Instruction::FAdd: 733 return InstDesc(Kind == RecurKind::FAdd, I, 734 I->hasAllowReassoc() ? nullptr : I); 735 case Instruction::Select: 736 if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) 737 return isConditionalRdxPattern(Kind, I); 738 LLVM_FALLTHROUGH; 739 case Instruction::FCmp: 740 case Instruction::ICmp: 741 case Instruction::Call: 742 if (isSelectCmpRecurrenceKind(Kind)) 743 return isSelectCmpPattern(L, OrigPhi, I, Prev); 744 if (isIntMinMaxRecurrenceKind(Kind) || 745 (((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) || 746 (isa<FPMathOperator>(I) && I->hasNoNaNs() && 747 I->hasNoSignedZeros())) && 748 isFPMinMaxRecurrenceKind(Kind))) 749 return isMinMaxPattern(I, Kind, Prev); 750 else if (isFMulAddIntrinsic(I)) 751 return InstDesc(Kind == RecurKind::FMulAdd, I, 752 I->hasAllowReassoc() ? nullptr : I); 753 return InstDesc(false, I); 754 } 755 } 756 757 bool RecurrenceDescriptor::hasMultipleUsesOf( 758 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts, 759 unsigned MaxNumUses) { 760 unsigned NumUses = 0; 761 for (const Use &U : I->operands()) { 762 if (Insts.count(dyn_cast<Instruction>(U))) 763 ++NumUses; 764 if (NumUses > MaxNumUses) 765 return true; 766 } 767 768 return false; 769 } 770 771 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, 772 RecurrenceDescriptor &RedDes, 773 DemandedBits *DB, AssumptionCache *AC, 774 DominatorTree *DT) { 775 BasicBlock *Header = TheLoop->getHeader(); 776 Function &F = *Header->getParent(); 777 FastMathFlags FMF; 778 FMF.setNoNaNs( 779 F.getFnAttribute("no-nans-fp-math").getValueAsBool()); 780 FMF.setNoSignedZeros( 781 F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool()); 782 783 if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT)) { 784 LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); 785 return true; 786 } 787 if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT)) { 788 LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); 789 return true; 790 } 791 if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT)) { 792 LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); 793 return true; 794 } 795 if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT)) { 796 LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); 797 return true; 798 } 799 if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT)) { 800 LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); 801 return true; 802 } 803 if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT)) { 804 LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n"); 805 return true; 806 } 807 if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT)) { 808 LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n"); 809 return true; 810 } 811 if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT)) { 812 LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n"); 813 return true; 814 } 815 if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT)) { 816 LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n"); 817 return true; 818 } 819 if (AddReductionVar(Phi, RecurKind::SelectICmp, TheLoop, FMF, RedDes, DB, AC, 820 DT)) { 821 LLVM_DEBUG(dbgs() << "Found an integer conditional select reduction PHI." 822 << *Phi << "\n"); 823 return true; 824 } 825 if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT)) { 826 LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); 827 return true; 828 } 829 if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT)) { 830 LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); 831 return true; 832 } 833 if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT)) { 834 LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n"); 835 return true; 836 } 837 if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT)) { 838 LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n"); 839 return true; 840 } 841 if (AddReductionVar(Phi, RecurKind::SelectFCmp, TheLoop, FMF, RedDes, DB, AC, 842 DT)) { 843 LLVM_DEBUG(dbgs() << "Found a float conditional select reduction PHI." 844 << " PHI." << *Phi << "\n"); 845 return true; 846 } 847 if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, 848 DT)) { 849 LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n"); 850 return true; 851 } 852 // Not a reduction of known type. 853 return false; 854 } 855 856 bool RecurrenceDescriptor::isFirstOrderRecurrence( 857 PHINode *Phi, Loop *TheLoop, 858 MapVector<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) { 859 860 // Ensure the phi node is in the loop header and has two incoming values. 861 if (Phi->getParent() != TheLoop->getHeader() || 862 Phi->getNumIncomingValues() != 2) 863 return false; 864 865 // Ensure the loop has a preheader and a single latch block. The loop 866 // vectorizer will need the latch to set up the next iteration of the loop. 867 auto *Preheader = TheLoop->getLoopPreheader(); 868 auto *Latch = TheLoop->getLoopLatch(); 869 if (!Preheader || !Latch) 870 return false; 871 872 // Ensure the phi node's incoming blocks are the loop preheader and latch. 873 if (Phi->getBasicBlockIndex(Preheader) < 0 || 874 Phi->getBasicBlockIndex(Latch) < 0) 875 return false; 876 877 // Get the previous value. The previous value comes from the latch edge while 878 // the initial value comes form the preheader edge. 879 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); 880 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) || 881 SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. 882 return false; 883 884 // Ensure every user of the phi node (recursively) is dominated by the 885 // previous value. The dominance requirement ensures the loop vectorizer will 886 // not need to vectorize the initial value prior to the first iteration of the 887 // loop. 888 // TODO: Consider extending this sinking to handle memory instructions. 889 890 // We optimistically assume we can sink all users after Previous. Keep a set 891 // of instructions to sink after Previous ordered by dominance in the common 892 // basic block. It will be applied to SinkAfter if all users can be sunk. 893 auto CompareByComesBefore = [](const Instruction *A, const Instruction *B) { 894 return A->comesBefore(B); 895 }; 896 std::set<Instruction *, decltype(CompareByComesBefore)> InstrsToSink( 897 CompareByComesBefore); 898 899 BasicBlock *PhiBB = Phi->getParent(); 900 SmallVector<Instruction *, 8> WorkList; 901 auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) { 902 // Already sunk SinkCandidate. 903 if (SinkCandidate->getParent() == PhiBB && 904 InstrsToSink.find(SinkCandidate) != InstrsToSink.end()) 905 return true; 906 907 // Cyclic dependence. 908 if (Previous == SinkCandidate) 909 return false; 910 911 if (DT->dominates(Previous, 912 SinkCandidate)) // We already are good w/o sinking. 913 return true; 914 915 if (SinkCandidate->getParent() != PhiBB || 916 SinkCandidate->mayHaveSideEffects() || 917 SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator()) 918 return false; 919 920 // Try to sink an instruction after the 'deepest' previous instruction, 921 // which has multiple operands for first order recurrences. 922 auto It = SinkAfter.find(SinkCandidate); 923 if (It != SinkAfter.end()) { 924 auto LastPrev = It->second; 925 if (LastPrev->getParent() != Previous->getParent()) 926 return false; 927 928 // If LastPrev comes after the current Previous, SinkCandidate already 929 // gets sunk past Previous and nothing left to do. 930 return Previous->comesBefore(LastPrev); 931 } 932 933 // If we reach a PHI node that is not dominated by Previous, we reached a 934 // header PHI. No need for sinking. 935 if (isa<PHINode>(SinkCandidate)) 936 return true; 937 938 // Sink User tentatively and check its users 939 InstrsToSink.insert(SinkCandidate); 940 WorkList.push_back(SinkCandidate); 941 return true; 942 }; 943 944 WorkList.push_back(Phi); 945 // Try to recursively sink instructions and their users after Previous. 946 while (!WorkList.empty()) { 947 Instruction *Current = WorkList.pop_back_val(); 948 for (User *User : Current->users()) { 949 if (!TryToPushSinkCandidate(cast<Instruction>(User))) 950 return false; 951 } 952 } 953 954 // We can sink all users of Phi. Update the mapping. 955 for (Instruction *I : InstrsToSink) { 956 SinkAfter[I] = Previous; 957 Previous = I; 958 } 959 return true; 960 } 961 962 /// This function returns the identity element (or neutral element) for 963 /// the operation K. 964 Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp, 965 FastMathFlags FMF) const { 966 switch (K) { 967 case RecurKind::Xor: 968 case RecurKind::Add: 969 case RecurKind::Or: 970 // Adding, Xoring, Oring zero to a number does not change it. 971 return ConstantInt::get(Tp, 0); 972 case RecurKind::Mul: 973 // Multiplying a number by 1 does not change it. 974 return ConstantInt::get(Tp, 1); 975 case RecurKind::And: 976 // AND-ing a number with an all-1 value does not change it. 977 return ConstantInt::get(Tp, -1, true); 978 case RecurKind::FMul: 979 // Multiplying a number by 1 does not change it. 980 return ConstantFP::get(Tp, 1.0L); 981 case RecurKind::FMulAdd: 982 case RecurKind::FAdd: 983 // Adding zero to a number does not change it. 984 // FIXME: Ideally we should not need to check FMF for FAdd and should always 985 // use -0.0. However, this will currently result in mixed vectors of 0.0/-0.0. 986 // Instead, we should ensure that 1) the FMF from FAdd are propagated to the PHI 987 // nodes where possible, and 2) PHIs with the nsz flag + -0.0 use 0.0. This would 988 // mean we can then remove the check for noSignedZeros() below (see D98963). 989 if (FMF.noSignedZeros()) 990 return ConstantFP::get(Tp, 0.0L); 991 return ConstantFP::get(Tp, -0.0L); 992 case RecurKind::UMin: 993 return ConstantInt::get(Tp, -1); 994 case RecurKind::UMax: 995 return ConstantInt::get(Tp, 0); 996 case RecurKind::SMin: 997 return ConstantInt::get(Tp, 998 APInt::getSignedMaxValue(Tp->getIntegerBitWidth())); 999 case RecurKind::SMax: 1000 return ConstantInt::get(Tp, 1001 APInt::getSignedMinValue(Tp->getIntegerBitWidth())); 1002 case RecurKind::FMin: 1003 return ConstantFP::getInfinity(Tp, true); 1004 case RecurKind::FMax: 1005 return ConstantFP::getInfinity(Tp, false); 1006 case RecurKind::SelectICmp: 1007 case RecurKind::SelectFCmp: 1008 return getRecurrenceStartValue(); 1009 break; 1010 default: 1011 llvm_unreachable("Unknown recurrence kind"); 1012 } 1013 } 1014 1015 unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) { 1016 switch (Kind) { 1017 case RecurKind::Add: 1018 return Instruction::Add; 1019 case RecurKind::Mul: 1020 return Instruction::Mul; 1021 case RecurKind::Or: 1022 return Instruction::Or; 1023 case RecurKind::And: 1024 return Instruction::And; 1025 case RecurKind::Xor: 1026 return Instruction::Xor; 1027 case RecurKind::FMul: 1028 return Instruction::FMul; 1029 case RecurKind::FMulAdd: 1030 case RecurKind::FAdd: 1031 return Instruction::FAdd; 1032 case RecurKind::SMax: 1033 case RecurKind::SMin: 1034 case RecurKind::UMax: 1035 case RecurKind::UMin: 1036 case RecurKind::SelectICmp: 1037 return Instruction::ICmp; 1038 case RecurKind::FMax: 1039 case RecurKind::FMin: 1040 case RecurKind::SelectFCmp: 1041 return Instruction::FCmp; 1042 default: 1043 llvm_unreachable("Unknown recurrence operation"); 1044 } 1045 } 1046 1047 SmallVector<Instruction *, 4> 1048 RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const { 1049 SmallVector<Instruction *, 4> ReductionOperations; 1050 unsigned RedOp = getOpcode(Kind); 1051 1052 // Search down from the Phi to the LoopExitInstr, looking for instructions 1053 // with a single user of the correct type for the reduction. 1054 1055 // Note that we check that the type of the operand is correct for each item in 1056 // the chain, including the last (the loop exit value). This can come up from 1057 // sub, which would otherwise be treated as an add reduction. MinMax also need 1058 // to check for a pair of icmp/select, for which we use getNextInstruction and 1059 // isCorrectOpcode functions to step the right number of instruction, and 1060 // check the icmp/select pair. 1061 // FIXME: We also do not attempt to look through Phi/Select's yet, which might 1062 // be part of the reduction chain, or attempt to looks through And's to find a 1063 // smaller bitwidth. Subs are also currently not allowed (which are usually 1064 // treated as part of a add reduction) as they are expected to generally be 1065 // more expensive than out-of-loop reductions, and need to be costed more 1066 // carefully. 1067 unsigned ExpectedUses = 1; 1068 if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) 1069 ExpectedUses = 2; 1070 1071 auto getNextInstruction = [&](Instruction *Cur) { 1072 if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) { 1073 // We are expecting a icmp/select pair, which we go to the next select 1074 // instruction if we can. We already know that Cur has 2 uses. 1075 if (isa<SelectInst>(*Cur->user_begin())) 1076 return cast<Instruction>(*Cur->user_begin()); 1077 else 1078 return cast<Instruction>(*std::next(Cur->user_begin())); 1079 } 1080 return cast<Instruction>(*Cur->user_begin()); 1081 }; 1082 auto isCorrectOpcode = [&](Instruction *Cur) { 1083 if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) { 1084 Value *LHS, *RHS; 1085 return SelectPatternResult::isMinOrMax( 1086 matchSelectPattern(Cur, LHS, RHS).Flavor); 1087 } 1088 // Recognize a call to the llvm.fmuladd intrinsic. 1089 if (isFMulAddIntrinsic(Cur)) 1090 return true; 1091 1092 return Cur->getOpcode() == RedOp; 1093 }; 1094 1095 // The loop exit instruction we check first (as a quick test) but add last. We 1096 // check the opcode is correct (and dont allow them to be Subs) and that they 1097 // have expected to have the expected number of uses. They will have one use 1098 // from the phi and one from a LCSSA value, no matter the type. 1099 if (!isCorrectOpcode(LoopExitInstr) || !LoopExitInstr->hasNUses(2)) 1100 return {}; 1101 1102 // Check that the Phi has one (or two for min/max) uses. 1103 if (!Phi->hasNUses(ExpectedUses)) 1104 return {}; 1105 Instruction *Cur = getNextInstruction(Phi); 1106 1107 // Each other instruction in the chain should have the expected number of uses 1108 // and be the correct opcode. 1109 while (Cur != LoopExitInstr) { 1110 if (!isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses)) 1111 return {}; 1112 1113 ReductionOperations.push_back(Cur); 1114 Cur = getNextInstruction(Cur); 1115 } 1116 1117 ReductionOperations.push_back(Cur); 1118 return ReductionOperations; 1119 } 1120 1121 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, 1122 const SCEV *Step, BinaryOperator *BOp, 1123 Type *ElementType, 1124 SmallVectorImpl<Instruction *> *Casts) 1125 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp), 1126 ElementType(ElementType) { 1127 assert(IK != IK_NoInduction && "Not an induction"); 1128 1129 // Start value type should match the induction kind and the value 1130 // itself should not be null. 1131 assert(StartValue && "StartValue is null"); 1132 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && 1133 "StartValue is not a pointer for pointer induction"); 1134 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && 1135 "StartValue is not an integer for integer induction"); 1136 1137 // Check the Step Value. It should be non-zero integer value. 1138 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && 1139 "Step value is zero"); 1140 1141 assert((IK != IK_PtrInduction || getConstIntStepValue()) && 1142 "Step value should be constant for pointer induction"); 1143 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && 1144 "StepValue is not an integer"); 1145 1146 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && 1147 "StepValue is not FP for FpInduction"); 1148 assert((IK != IK_FpInduction || 1149 (InductionBinOp && 1150 (InductionBinOp->getOpcode() == Instruction::FAdd || 1151 InductionBinOp->getOpcode() == Instruction::FSub))) && 1152 "Binary opcode should be specified for FP induction"); 1153 1154 if (IK == IK_PtrInduction) 1155 assert(ElementType && "Pointer induction must have element type"); 1156 else 1157 assert(!ElementType && "Non-pointer induction cannot have element type"); 1158 1159 if (Casts) { 1160 for (auto &Inst : *Casts) { 1161 RedundantCasts.push_back(Inst); 1162 } 1163 } 1164 } 1165 1166 ConstantInt *InductionDescriptor::getConstIntStepValue() const { 1167 if (isa<SCEVConstant>(Step)) 1168 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); 1169 return nullptr; 1170 } 1171 1172 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, 1173 ScalarEvolution *SE, 1174 InductionDescriptor &D) { 1175 1176 // Here we only handle FP induction variables. 1177 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); 1178 1179 if (TheLoop->getHeader() != Phi->getParent()) 1180 return false; 1181 1182 // The loop may have multiple entrances or multiple exits; we can analyze 1183 // this phi if it has a unique entry value and a unique backedge value. 1184 if (Phi->getNumIncomingValues() != 2) 1185 return false; 1186 Value *BEValue = nullptr, *StartValue = nullptr; 1187 if (TheLoop->contains(Phi->getIncomingBlock(0))) { 1188 BEValue = Phi->getIncomingValue(0); 1189 StartValue = Phi->getIncomingValue(1); 1190 } else { 1191 assert(TheLoop->contains(Phi->getIncomingBlock(1)) && 1192 "Unexpected Phi node in the loop"); 1193 BEValue = Phi->getIncomingValue(1); 1194 StartValue = Phi->getIncomingValue(0); 1195 } 1196 1197 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); 1198 if (!BOp) 1199 return false; 1200 1201 Value *Addend = nullptr; 1202 if (BOp->getOpcode() == Instruction::FAdd) { 1203 if (BOp->getOperand(0) == Phi) 1204 Addend = BOp->getOperand(1); 1205 else if (BOp->getOperand(1) == Phi) 1206 Addend = BOp->getOperand(0); 1207 } else if (BOp->getOpcode() == Instruction::FSub) 1208 if (BOp->getOperand(0) == Phi) 1209 Addend = BOp->getOperand(1); 1210 1211 if (!Addend) 1212 return false; 1213 1214 // The addend should be loop invariant 1215 if (auto *I = dyn_cast<Instruction>(Addend)) 1216 if (TheLoop->contains(I)) 1217 return false; 1218 1219 // FP Step has unknown SCEV 1220 const SCEV *Step = SE->getUnknown(Addend); 1221 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); 1222 return true; 1223 } 1224 1225 /// This function is called when we suspect that the update-chain of a phi node 1226 /// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, 1227 /// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime 1228 /// predicate P under which the SCEV expression for the phi can be the 1229 /// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the 1230 /// cast instructions that are involved in the update-chain of this induction. 1231 /// A caller that adds the required runtime predicate can be free to drop these 1232 /// cast instructions, and compute the phi using \p AR (instead of some scev 1233 /// expression with casts). 1234 /// 1235 /// For example, without a predicate the scev expression can take the following 1236 /// form: 1237 /// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) 1238 /// 1239 /// It corresponds to the following IR sequence: 1240 /// %for.body: 1241 /// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] 1242 /// %casted_phi = "ExtTrunc i64 %x" 1243 /// %add = add i64 %casted_phi, %step 1244 /// 1245 /// where %x is given in \p PN, 1246 /// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate, 1247 /// and the IR sequence that "ExtTrunc i64 %x" represents can take one of 1248 /// several forms, for example, such as: 1249 /// ExtTrunc1: %casted_phi = and %x, 2^n-1 1250 /// or: 1251 /// ExtTrunc2: %t = shl %x, m 1252 /// %casted_phi = ashr %t, m 1253 /// 1254 /// If we are able to find such sequence, we return the instructions 1255 /// we found, namely %casted_phi and the instructions on its use-def chain up 1256 /// to the phi (not including the phi). 1257 static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, 1258 const SCEVUnknown *PhiScev, 1259 const SCEVAddRecExpr *AR, 1260 SmallVectorImpl<Instruction *> &CastInsts) { 1261 1262 assert(CastInsts.empty() && "CastInsts is expected to be empty."); 1263 auto *PN = cast<PHINode>(PhiScev->getValue()); 1264 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); 1265 const Loop *L = AR->getLoop(); 1266 1267 // Find any cast instructions that participate in the def-use chain of 1268 // PhiScev in the loop. 1269 // FORNOW/TODO: We currently expect the def-use chain to include only 1270 // two-operand instructions, where one of the operands is an invariant. 1271 // createAddRecFromPHIWithCasts() currently does not support anything more 1272 // involved than that, so we keep the search simple. This can be 1273 // extended/generalized as needed. 1274 1275 auto getDef = [&](const Value *Val) -> Value * { 1276 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val); 1277 if (!BinOp) 1278 return nullptr; 1279 Value *Op0 = BinOp->getOperand(0); 1280 Value *Op1 = BinOp->getOperand(1); 1281 Value *Def = nullptr; 1282 if (L->isLoopInvariant(Op0)) 1283 Def = Op1; 1284 else if (L->isLoopInvariant(Op1)) 1285 Def = Op0; 1286 return Def; 1287 }; 1288 1289 // Look for the instruction that defines the induction via the 1290 // loop backedge. 1291 BasicBlock *Latch = L->getLoopLatch(); 1292 if (!Latch) 1293 return false; 1294 Value *Val = PN->getIncomingValueForBlock(Latch); 1295 if (!Val) 1296 return false; 1297 1298 // Follow the def-use chain until the induction phi is reached. 1299 // If on the way we encounter a Value that has the same SCEV Expr as the 1300 // phi node, we can consider the instructions we visit from that point 1301 // as part of the cast-sequence that can be ignored. 1302 bool InCastSequence = false; 1303 auto *Inst = dyn_cast<Instruction>(Val); 1304 while (Val != PN) { 1305 // If we encountered a phi node other than PN, or if we left the loop, 1306 // we bail out. 1307 if (!Inst || !L->contains(Inst)) { 1308 return false; 1309 } 1310 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val)); 1311 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR)) 1312 InCastSequence = true; 1313 if (InCastSequence) { 1314 // Only the last instruction in the cast sequence is expected to have 1315 // uses outside the induction def-use chain. 1316 if (!CastInsts.empty()) 1317 if (!Inst->hasOneUse()) 1318 return false; 1319 CastInsts.push_back(Inst); 1320 } 1321 Val = getDef(Val); 1322 if (!Val) 1323 return false; 1324 Inst = dyn_cast<Instruction>(Val); 1325 } 1326 1327 return InCastSequence; 1328 } 1329 1330 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 1331 PredicatedScalarEvolution &PSE, 1332 InductionDescriptor &D, bool Assume) { 1333 Type *PhiTy = Phi->getType(); 1334 1335 // Handle integer and pointer inductions variables. 1336 // Now we handle also FP induction but not trying to make a 1337 // recurrent expression from the PHI node in-place. 1338 1339 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() && 1340 !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) 1341 return false; 1342 1343 if (PhiTy->isFloatingPointTy()) 1344 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); 1345 1346 const SCEV *PhiScev = PSE.getSCEV(Phi); 1347 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1348 1349 // We need this expression to be an AddRecExpr. 1350 if (Assume && !AR) 1351 AR = PSE.getAsAddRec(Phi); 1352 1353 if (!AR) { 1354 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1355 return false; 1356 } 1357 1358 // Record any Cast instructions that participate in the induction update 1359 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev); 1360 // If we started from an UnknownSCEV, and managed to build an addRecurrence 1361 // only after enabling Assume with PSCEV, this means we may have encountered 1362 // cast instructions that required adding a runtime check in order to 1363 // guarantee the correctness of the AddRecurrence respresentation of the 1364 // induction. 1365 if (PhiScev != AR && SymbolicPhi) { 1366 SmallVector<Instruction *, 2> Casts; 1367 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts)) 1368 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts); 1369 } 1370 1371 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); 1372 } 1373 1374 bool InductionDescriptor::isInductionPHI( 1375 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, 1376 InductionDescriptor &D, const SCEV *Expr, 1377 SmallVectorImpl<Instruction *> *CastsToIgnore) { 1378 Type *PhiTy = Phi->getType(); 1379 // We only handle integer and pointer inductions variables. 1380 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) 1381 return false; 1382 1383 // Check that the PHI is consecutive. 1384 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); 1385 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1386 1387 if (!AR) { 1388 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1389 return false; 1390 } 1391 1392 if (AR->getLoop() != TheLoop) { 1393 // FIXME: We should treat this as a uniform. Unfortunately, we 1394 // don't currently know how to handled uniform PHIs. 1395 LLVM_DEBUG( 1396 dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); 1397 return false; 1398 } 1399 1400 Value *StartValue = 1401 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); 1402 1403 BasicBlock *Latch = AR->getLoop()->getLoopLatch(); 1404 if (!Latch) 1405 return false; 1406 1407 const SCEV *Step = AR->getStepRecurrence(*SE); 1408 // Calculate the pointer stride and check if it is consecutive. 1409 // The stride may be a constant or a loop invariant integer value. 1410 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); 1411 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) 1412 return false; 1413 1414 if (PhiTy->isIntegerTy()) { 1415 BinaryOperator *BOp = 1416 dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch)); 1417 D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp, 1418 /* ElementType */ nullptr, CastsToIgnore); 1419 return true; 1420 } 1421 1422 assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); 1423 // Pointer induction should be a constant. 1424 if (!ConstStep) 1425 return false; 1426 1427 // Always use i8 element type for opaque pointer inductions. 1428 PointerType *PtrTy = cast<PointerType>(PhiTy); 1429 Type *ElementType = PtrTy->isOpaque() 1430 ? Type::getInt8Ty(PtrTy->getContext()) 1431 : PtrTy->getNonOpaquePointerElementType(); 1432 if (!ElementType->isSized()) 1433 return false; 1434 1435 ConstantInt *CV = ConstStep->getValue(); 1436 const DataLayout &DL = Phi->getModule()->getDataLayout(); 1437 TypeSize TySize = DL.getTypeAllocSize(ElementType); 1438 // TODO: We could potentially support this for scalable vectors if we can 1439 // prove at compile time that the constant step is always a multiple of 1440 // the scalable type. 1441 if (TySize.isZero() || TySize.isScalable()) 1442 return false; 1443 1444 int64_t Size = static_cast<int64_t>(TySize.getFixedSize()); 1445 int64_t CVSize = CV->getSExtValue(); 1446 if (CVSize % Size) 1447 return false; 1448 auto *StepValue = 1449 SE->getConstant(CV->getType(), CVSize / Size, true /* signed */); 1450 D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue, 1451 /* BinOp */ nullptr, ElementType); 1452 return true; 1453 } 1454