1 //===-- LoopUtils.cpp - Loop Utility functions -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines common loop utility functions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Utils/LoopUtils.h" 15 #include "llvm/Analysis/AliasAnalysis.h" 16 #include "llvm/Analysis/BasicAliasAnalysis.h" 17 #include "llvm/Analysis/GlobalsModRef.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/LoopInfo.h" 20 #include "llvm/Analysis/LoopPass.h" 21 #include "llvm/Analysis/TargetTransformInfo.h" 22 #include "llvm/Analysis/ScalarEvolution.h" 23 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 24 #include "llvm/Analysis/ScalarEvolutionExpander.h" 25 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Instructions.h" 28 #include "llvm/IR/Module.h" 29 #include "llvm/IR/PatternMatch.h" 30 #include "llvm/IR/ValueHandle.h" 31 #include "llvm/Pass.h" 32 #include "llvm/Support/Debug.h" 33 34 using namespace llvm; 35 using namespace llvm::PatternMatch; 36 37 #define DEBUG_TYPE "loop-utils" 38 39 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, 40 SmallPtrSetImpl<Instruction *> &Set) { 41 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) 42 if (!Set.count(dyn_cast<Instruction>(*Use))) 43 return false; 44 return true; 45 } 46 47 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { 48 switch (Kind) { 49 default: 50 break; 51 case RK_IntegerAdd: 52 case RK_IntegerMult: 53 case RK_IntegerOr: 54 case RK_IntegerAnd: 55 case RK_IntegerXor: 56 case RK_IntegerMinMax: 57 return true; 58 } 59 return false; 60 } 61 62 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { 63 return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); 64 } 65 66 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { 67 switch (Kind) { 68 default: 69 break; 70 case RK_IntegerAdd: 71 case RK_IntegerMult: 72 case RK_FloatAdd: 73 case RK_FloatMult: 74 return true; 75 } 76 return false; 77 } 78 79 Instruction * 80 RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT, 81 SmallPtrSetImpl<Instruction *> &Visited, 82 SmallPtrSetImpl<Instruction *> &CI) { 83 if (!Phi->hasOneUse()) 84 return Phi; 85 86 const APInt *M = nullptr; 87 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); 88 89 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT 90 // with a new integer type of the corresponding bit width. 91 if (match(J, m_CombineOr(m_And(m_Instruction(I), m_APInt(M)), 92 m_And(m_APInt(M), m_Instruction(I))))) { 93 int32_t Bits = (*M + 1).exactLogBase2(); 94 if (Bits > 0) { 95 RT = IntegerType::get(Phi->getContext(), Bits); 96 Visited.insert(Phi); 97 CI.insert(J); 98 return J; 99 } 100 } 101 return Phi; 102 } 103 104 bool RecurrenceDescriptor::getSourceExtensionKind( 105 Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned, 106 SmallPtrSetImpl<Instruction *> &Visited, 107 SmallPtrSetImpl<Instruction *> &CI) { 108 109 SmallVector<Instruction *, 8> Worklist; 110 bool FoundOneOperand = false; 111 unsigned DstSize = RT->getPrimitiveSizeInBits(); 112 Worklist.push_back(Exit); 113 114 // Traverse the instructions in the reduction expression, beginning with the 115 // exit value. 116 while (!Worklist.empty()) { 117 Instruction *I = Worklist.pop_back_val(); 118 for (Use &U : I->operands()) { 119 120 // Terminate the traversal if the operand is not an instruction, or we 121 // reach the starting value. 122 Instruction *J = dyn_cast<Instruction>(U.get()); 123 if (!J || J == Start) 124 continue; 125 126 // Otherwise, investigate the operation if it is also in the expression. 127 if (Visited.count(J)) { 128 Worklist.push_back(J); 129 continue; 130 } 131 132 // If the operand is not in Visited, it is not a reduction operation, but 133 // it does feed into one. Make sure it is either a single-use sign- or 134 // zero-extend instruction. 135 CastInst *Cast = dyn_cast<CastInst>(J); 136 bool IsSExtInst = isa<SExtInst>(J); 137 if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst)) 138 return false; 139 140 // Ensure the source type of the extend is no larger than the reduction 141 // type. It is not necessary for the types to be identical. 142 unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); 143 if (SrcSize > DstSize) 144 return false; 145 146 // Furthermore, ensure that all such extends are of the same kind. 147 if (FoundOneOperand) { 148 if (IsSigned != IsSExtInst) 149 return false; 150 } else { 151 FoundOneOperand = true; 152 IsSigned = IsSExtInst; 153 } 154 155 // Lastly, if the source type of the extend matches the reduction type, 156 // add the extend to CI so that we can avoid accounting for it in the 157 // cost model. 158 if (SrcSize == DstSize) 159 CI.insert(Cast); 160 } 161 } 162 return true; 163 } 164 165 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, 166 Loop *TheLoop, bool HasFunNoNaNAttr, 167 RecurrenceDescriptor &RedDes) { 168 if (Phi->getNumIncomingValues() != 2) 169 return false; 170 171 // Reduction variables are only found in the loop header block. 172 if (Phi->getParent() != TheLoop->getHeader()) 173 return false; 174 175 // Obtain the reduction start value from the value that comes from the loop 176 // preheader. 177 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); 178 179 // ExitInstruction is the single value which is used outside the loop. 180 // We only allow for a single reduction value to be used outside the loop. 181 // This includes users of the reduction, variables (which form a cycle 182 // which ends in the phi node). 183 Instruction *ExitInstruction = nullptr; 184 // Indicates that we found a reduction operation in our scan. 185 bool FoundReduxOp = false; 186 187 // We start with the PHI node and scan for all of the users of this 188 // instruction. All users must be instructions that can be used as reduction 189 // variables (such as ADD). We must have a single out-of-block user. The cycle 190 // must include the original PHI. 191 bool FoundStartPHI = false; 192 193 // To recognize min/max patterns formed by a icmp select sequence, we store 194 // the number of instruction we saw from the recognized min/max pattern, 195 // to make sure we only see exactly the two instructions. 196 unsigned NumCmpSelectPatternInst = 0; 197 InstDesc ReduxDesc(false, nullptr); 198 199 // Data used for determining if the recurrence has been type-promoted. 200 Type *RecurrenceType = Phi->getType(); 201 SmallPtrSet<Instruction *, 4> CastInsts; 202 Instruction *Start = Phi; 203 bool IsSigned = false; 204 205 SmallPtrSet<Instruction *, 8> VisitedInsts; 206 SmallVector<Instruction *, 8> Worklist; 207 208 // Return early if the recurrence kind does not match the type of Phi. If the 209 // recurrence kind is arithmetic, we attempt to look through AND operations 210 // resulting from the type promotion performed by InstCombine. Vector 211 // operations are not limited to the legal integer widths, so we may be able 212 // to evaluate the reduction in the narrower width. 213 if (RecurrenceType->isFloatingPointTy()) { 214 if (!isFloatingPointRecurrenceKind(Kind)) 215 return false; 216 } else { 217 if (!isIntegerRecurrenceKind(Kind)) 218 return false; 219 if (isArithmeticRecurrenceKind(Kind)) 220 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); 221 } 222 223 Worklist.push_back(Start); 224 VisitedInsts.insert(Start); 225 226 // A value in the reduction can be used: 227 // - By the reduction: 228 // - Reduction operation: 229 // - One use of reduction value (safe). 230 // - Multiple use of reduction value (not safe). 231 // - PHI: 232 // - All uses of the PHI must be the reduction (safe). 233 // - Otherwise, not safe. 234 // - By instructions outside of the loop (safe). 235 // * One value may have several outside users, but all outside 236 // uses must be of the same value. 237 // - By an instruction that is not part of the reduction (not safe). 238 // This is either: 239 // * An instruction type other than PHI or the reduction operation. 240 // * A PHI in the header other than the initial PHI. 241 while (!Worklist.empty()) { 242 Instruction *Cur = Worklist.back(); 243 Worklist.pop_back(); 244 245 // No Users. 246 // If the instruction has no users then this is a broken chain and can't be 247 // a reduction variable. 248 if (Cur->use_empty()) 249 return false; 250 251 bool IsAPhi = isa<PHINode>(Cur); 252 253 // A header PHI use other than the original PHI. 254 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) 255 return false; 256 257 // Reductions of instructions such as Div, and Sub is only possible if the 258 // LHS is the reduction variable. 259 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && 260 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && 261 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) 262 return false; 263 264 // Any reduction instruction must be of one of the allowed kinds. We ignore 265 // the starting value (the Phi or an AND instruction if the Phi has been 266 // type-promoted). 267 if (Cur != Start) { 268 ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); 269 if (!ReduxDesc.isRecurrence()) 270 return false; 271 } 272 273 // A reduction operation must only have one use of the reduction value. 274 if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && 275 hasMultipleUsesOf(Cur, VisitedInsts)) 276 return false; 277 278 // All inputs to a PHI node must be a reduction value. 279 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) 280 return false; 281 282 if (Kind == RK_IntegerMinMax && 283 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) 284 ++NumCmpSelectPatternInst; 285 if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) 286 ++NumCmpSelectPatternInst; 287 288 // Check whether we found a reduction operator. 289 FoundReduxOp |= !IsAPhi && Cur != Start; 290 291 // Process users of current instruction. Push non-PHI nodes after PHI nodes 292 // onto the stack. This way we are going to have seen all inputs to PHI 293 // nodes once we get to them. 294 SmallVector<Instruction *, 8> NonPHIs; 295 SmallVector<Instruction *, 8> PHIs; 296 for (User *U : Cur->users()) { 297 Instruction *UI = cast<Instruction>(U); 298 299 // Check if we found the exit user. 300 BasicBlock *Parent = UI->getParent(); 301 if (!TheLoop->contains(Parent)) { 302 // If we already know this instruction is used externally, move on to 303 // the next user. 304 if (ExitInstruction == Cur) 305 continue; 306 307 // Exit if you find multiple values used outside or if the header phi 308 // node is being used. In this case the user uses the value of the 309 // previous iteration, in which case we would loose "VF-1" iterations of 310 // the reduction operation if we vectorize. 311 if (ExitInstruction != nullptr || Cur == Phi) 312 return false; 313 314 // The instruction used by an outside user must be the last instruction 315 // before we feed back to the reduction phi. Otherwise, we loose VF-1 316 // operations on the value. 317 if (!is_contained(Phi->operands(), Cur)) 318 return false; 319 320 ExitInstruction = Cur; 321 continue; 322 } 323 324 // Process instructions only once (termination). Each reduction cycle 325 // value must only be used once, except by phi nodes and min/max 326 // reductions which are represented as a cmp followed by a select. 327 InstDesc IgnoredVal(false, nullptr); 328 if (VisitedInsts.insert(UI).second) { 329 if (isa<PHINode>(UI)) 330 PHIs.push_back(UI); 331 else 332 NonPHIs.push_back(UI); 333 } else if (!isa<PHINode>(UI) && 334 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && 335 !isa<SelectInst>(UI)) || 336 !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) 337 return false; 338 339 // Remember that we completed the cycle. 340 if (UI == Phi) 341 FoundStartPHI = true; 342 } 343 Worklist.append(PHIs.begin(), PHIs.end()); 344 Worklist.append(NonPHIs.begin(), NonPHIs.end()); 345 } 346 347 // This means we have seen one but not the other instruction of the 348 // pattern or more than just a select and cmp. 349 if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && 350 NumCmpSelectPatternInst != 2) 351 return false; 352 353 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) 354 return false; 355 356 // If we think Phi may have been type-promoted, we also need to ensure that 357 // all source operands of the reduction are either SExtInsts or ZEstInsts. If 358 // so, we will be able to evaluate the reduction in the narrower bit width. 359 if (Start != Phi) 360 if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType, 361 IsSigned, VisitedInsts, CastInsts)) 362 return false; 363 364 // We found a reduction var if we have reached the original phi node and we 365 // only have a single instruction with out-of-loop users. 366 367 // The ExitInstruction(Instruction which is allowed to have out-of-loop users) 368 // is saved as part of the RecurrenceDescriptor. 369 370 // Save the description of this reduction variable. 371 RecurrenceDescriptor RD( 372 RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), 373 ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); 374 RedDes = RD; 375 376 return true; 377 } 378 379 /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction 380 /// pattern corresponding to a min(X, Y) or max(X, Y). 381 RecurrenceDescriptor::InstDesc 382 RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { 383 384 assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && 385 "Expect a select instruction"); 386 Instruction *Cmp = nullptr; 387 SelectInst *Select = nullptr; 388 389 // We must handle the select(cmp()) as a single instruction. Advance to the 390 // select. 391 if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) { 392 if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin()))) 393 return InstDesc(false, I); 394 return InstDesc(Select, Prev.getMinMaxKind()); 395 } 396 397 // Only handle single use cases for now. 398 if (!(Select = dyn_cast<SelectInst>(I))) 399 return InstDesc(false, I); 400 if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && 401 !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) 402 return InstDesc(false, I); 403 if (!Cmp->hasOneUse()) 404 return InstDesc(false, I); 405 406 Value *CmpLeft; 407 Value *CmpRight; 408 409 // Look for a min/max pattern. 410 if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 411 return InstDesc(Select, MRK_UIntMin); 412 else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 413 return InstDesc(Select, MRK_UIntMax); 414 else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 415 return InstDesc(Select, MRK_SIntMax); 416 else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 417 return InstDesc(Select, MRK_SIntMin); 418 else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 419 return InstDesc(Select, MRK_FloatMin); 420 else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 421 return InstDesc(Select, MRK_FloatMax); 422 else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 423 return InstDesc(Select, MRK_FloatMin); 424 else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 425 return InstDesc(Select, MRK_FloatMax); 426 427 return InstDesc(false, I); 428 } 429 430 RecurrenceDescriptor::InstDesc 431 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, 432 InstDesc &Prev, bool HasFunNoNaNAttr) { 433 bool FP = I->getType()->isFloatingPointTy(); 434 Instruction *UAI = Prev.getUnsafeAlgebraInst(); 435 if (!UAI && FP && !I->hasUnsafeAlgebra()) 436 UAI = I; // Found an unsafe (unvectorizable) algebra instruction. 437 438 switch (I->getOpcode()) { 439 default: 440 return InstDesc(false, I); 441 case Instruction::PHI: 442 return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); 443 case Instruction::Sub: 444 case Instruction::Add: 445 return InstDesc(Kind == RK_IntegerAdd, I); 446 case Instruction::Mul: 447 return InstDesc(Kind == RK_IntegerMult, I); 448 case Instruction::And: 449 return InstDesc(Kind == RK_IntegerAnd, I); 450 case Instruction::Or: 451 return InstDesc(Kind == RK_IntegerOr, I); 452 case Instruction::Xor: 453 return InstDesc(Kind == RK_IntegerXor, I); 454 case Instruction::FMul: 455 return InstDesc(Kind == RK_FloatMult, I, UAI); 456 case Instruction::FSub: 457 case Instruction::FAdd: 458 return InstDesc(Kind == RK_FloatAdd, I, UAI); 459 case Instruction::FCmp: 460 case Instruction::ICmp: 461 case Instruction::Select: 462 if (Kind != RK_IntegerMinMax && 463 (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) 464 return InstDesc(false, I); 465 return isMinMaxSelectCmpPattern(I, Prev); 466 } 467 } 468 469 bool RecurrenceDescriptor::hasMultipleUsesOf( 470 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) { 471 unsigned NumUses = 0; 472 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; 473 ++Use) { 474 if (Insts.count(dyn_cast<Instruction>(*Use))) 475 ++NumUses; 476 if (NumUses > 1) 477 return true; 478 } 479 480 return false; 481 } 482 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, 483 RecurrenceDescriptor &RedDes) { 484 485 BasicBlock *Header = TheLoop->getHeader(); 486 Function &F = *Header->getParent(); 487 bool HasFunNoNaNAttr = 488 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 489 490 if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { 491 DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); 492 return true; 493 } 494 if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) { 495 DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); 496 return true; 497 } 498 if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) { 499 DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); 500 return true; 501 } 502 if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) { 503 DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); 504 return true; 505 } 506 if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) { 507 DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); 508 return true; 509 } 510 if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, 511 RedDes)) { 512 DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); 513 return true; 514 } 515 if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) { 516 DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); 517 return true; 518 } 519 if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { 520 DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); 521 return true; 522 } 523 if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) { 524 DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); 525 return true; 526 } 527 // Not a reduction of known type. 528 return false; 529 } 530 531 bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, 532 DominatorTree *DT) { 533 534 // Ensure the phi node is in the loop header and has two incoming values. 535 if (Phi->getParent() != TheLoop->getHeader() || 536 Phi->getNumIncomingValues() != 2) 537 return false; 538 539 // Ensure the loop has a preheader and a single latch block. The loop 540 // vectorizer will need the latch to set up the next iteration of the loop. 541 auto *Preheader = TheLoop->getLoopPreheader(); 542 auto *Latch = TheLoop->getLoopLatch(); 543 if (!Preheader || !Latch) 544 return false; 545 546 // Ensure the phi node's incoming blocks are the loop preheader and latch. 547 if (Phi->getBasicBlockIndex(Preheader) < 0 || 548 Phi->getBasicBlockIndex(Latch) < 0) 549 return false; 550 551 // Get the previous value. The previous value comes from the latch edge while 552 // the initial value comes form the preheader edge. 553 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); 554 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous)) 555 return false; 556 557 // Ensure every user of the phi node is dominated by the previous value. 558 // The dominance requirement ensures the loop vectorizer will not need to 559 // vectorize the initial value prior to the first iteration of the loop. 560 for (User *U : Phi->users()) 561 if (auto *I = dyn_cast<Instruction>(U)) { 562 if (!DT->dominates(Previous, I)) 563 return false; 564 } 565 566 return true; 567 } 568 569 /// This function returns the identity element (or neutral element) for 570 /// the operation K. 571 Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, 572 Type *Tp) { 573 switch (K) { 574 case RK_IntegerXor: 575 case RK_IntegerAdd: 576 case RK_IntegerOr: 577 // Adding, Xoring, Oring zero to a number does not change it. 578 return ConstantInt::get(Tp, 0); 579 case RK_IntegerMult: 580 // Multiplying a number by 1 does not change it. 581 return ConstantInt::get(Tp, 1); 582 case RK_IntegerAnd: 583 // AND-ing a number with an all-1 value does not change it. 584 return ConstantInt::get(Tp, -1, true); 585 case RK_FloatMult: 586 // Multiplying a number by 1 does not change it. 587 return ConstantFP::get(Tp, 1.0L); 588 case RK_FloatAdd: 589 // Adding zero to a number does not change it. 590 return ConstantFP::get(Tp, 0.0L); 591 default: 592 llvm_unreachable("Unknown recurrence kind"); 593 } 594 } 595 596 /// This function translates the recurrence kind to an LLVM binary operator. 597 unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { 598 switch (Kind) { 599 case RK_IntegerAdd: 600 return Instruction::Add; 601 case RK_IntegerMult: 602 return Instruction::Mul; 603 case RK_IntegerOr: 604 return Instruction::Or; 605 case RK_IntegerAnd: 606 return Instruction::And; 607 case RK_IntegerXor: 608 return Instruction::Xor; 609 case RK_FloatMult: 610 return Instruction::FMul; 611 case RK_FloatAdd: 612 return Instruction::FAdd; 613 case RK_IntegerMinMax: 614 return Instruction::ICmp; 615 case RK_FloatMinMax: 616 return Instruction::FCmp; 617 default: 618 llvm_unreachable("Unknown recurrence operation"); 619 } 620 } 621 622 Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, 623 MinMaxRecurrenceKind RK, 624 Value *Left, Value *Right) { 625 CmpInst::Predicate P = CmpInst::ICMP_NE; 626 switch (RK) { 627 default: 628 llvm_unreachable("Unknown min/max recurrence kind"); 629 case MRK_UIntMin: 630 P = CmpInst::ICMP_ULT; 631 break; 632 case MRK_UIntMax: 633 P = CmpInst::ICMP_UGT; 634 break; 635 case MRK_SIntMin: 636 P = CmpInst::ICMP_SLT; 637 break; 638 case MRK_SIntMax: 639 P = CmpInst::ICMP_SGT; 640 break; 641 case MRK_FloatMin: 642 P = CmpInst::FCMP_OLT; 643 break; 644 case MRK_FloatMax: 645 P = CmpInst::FCMP_OGT; 646 break; 647 } 648 649 // We only match FP sequences with unsafe algebra, so we can unconditionally 650 // set it on any generated instructions. 651 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 652 FastMathFlags FMF; 653 FMF.setUnsafeAlgebra(); 654 Builder.setFastMathFlags(FMF); 655 656 Value *Cmp; 657 if (RK == MRK_FloatMin || RK == MRK_FloatMax) 658 Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); 659 else 660 Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); 661 662 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); 663 return Select; 664 } 665 666 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, 667 const SCEV *Step, BinaryOperator *BOp) 668 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { 669 assert(IK != IK_NoInduction && "Not an induction"); 670 671 // Start value type should match the induction kind and the value 672 // itself should not be null. 673 assert(StartValue && "StartValue is null"); 674 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && 675 "StartValue is not a pointer for pointer induction"); 676 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && 677 "StartValue is not an integer for integer induction"); 678 679 // Check the Step Value. It should be non-zero integer value. 680 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && 681 "Step value is zero"); 682 683 assert((IK != IK_PtrInduction || getConstIntStepValue()) && 684 "Step value should be constant for pointer induction"); 685 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && 686 "StepValue is not an integer"); 687 688 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && 689 "StepValue is not FP for FpInduction"); 690 assert((IK != IK_FpInduction || (InductionBinOp && 691 (InductionBinOp->getOpcode() == Instruction::FAdd || 692 InductionBinOp->getOpcode() == Instruction::FSub))) && 693 "Binary opcode should be specified for FP induction"); 694 } 695 696 int InductionDescriptor::getConsecutiveDirection() const { 697 ConstantInt *ConstStep = getConstIntStepValue(); 698 if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) 699 return ConstStep->getSExtValue(); 700 return 0; 701 } 702 703 ConstantInt *InductionDescriptor::getConstIntStepValue() const { 704 if (isa<SCEVConstant>(Step)) 705 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); 706 return nullptr; 707 } 708 709 Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, 710 ScalarEvolution *SE, 711 const DataLayout& DL) const { 712 713 SCEVExpander Exp(*SE, DL, "induction"); 714 assert(Index->getType() == Step->getType() && 715 "Index type does not match StepValue type"); 716 switch (IK) { 717 case IK_IntInduction: { 718 assert(Index->getType() == StartValue->getType() && 719 "Index type does not match StartValue type"); 720 721 // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution 722 // and calculate (Start + Index * Step) for all cases, without 723 // special handling for "isOne" and "isMinusOne". 724 // But in the real life the result code getting worse. We mix SCEV 725 // expressions and ADD/SUB operations and receive redundant 726 // intermediate values being calculated in different ways and 727 // Instcombine is unable to reduce them all. 728 729 if (getConstIntStepValue() && 730 getConstIntStepValue()->isMinusOne()) 731 return B.CreateSub(StartValue, Index); 732 if (getConstIntStepValue() && 733 getConstIntStepValue()->isOne()) 734 return B.CreateAdd(StartValue, Index); 735 const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), 736 SE->getMulExpr(Step, SE->getSCEV(Index))); 737 return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); 738 } 739 case IK_PtrInduction: { 740 assert(isa<SCEVConstant>(Step) && 741 "Expected constant step for pointer induction"); 742 const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); 743 Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); 744 return B.CreateGEP(nullptr, StartValue, Index); 745 } 746 case IK_FpInduction: { 747 assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); 748 assert(InductionBinOp && 749 (InductionBinOp->getOpcode() == Instruction::FAdd || 750 InductionBinOp->getOpcode() == Instruction::FSub) && 751 "Original bin op should be defined for FP induction"); 752 753 Value *StepValue = cast<SCEVUnknown>(Step)->getValue(); 754 755 // Floating point operations had to be 'fast' to enable the induction. 756 FastMathFlags Flags; 757 Flags.setUnsafeAlgebra(); 758 759 Value *MulExp = B.CreateFMul(StepValue, Index); 760 if (isa<Instruction>(MulExp)) 761 // We have to check, the MulExp may be a constant. 762 cast<Instruction>(MulExp)->setFastMathFlags(Flags); 763 764 Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue, 765 MulExp, "induction"); 766 if (isa<Instruction>(BOp)) 767 cast<Instruction>(BOp)->setFastMathFlags(Flags); 768 769 return BOp; 770 } 771 case IK_NoInduction: 772 return nullptr; 773 } 774 llvm_unreachable("invalid enum"); 775 } 776 777 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, 778 ScalarEvolution *SE, 779 InductionDescriptor &D) { 780 781 // Here we only handle FP induction variables. 782 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); 783 784 if (TheLoop->getHeader() != Phi->getParent()) 785 return false; 786 787 // The loop may have multiple entrances or multiple exits; we can analyze 788 // this phi if it has a unique entry value and a unique backedge value. 789 if (Phi->getNumIncomingValues() != 2) 790 return false; 791 Value *BEValue = nullptr, *StartValue = nullptr; 792 if (TheLoop->contains(Phi->getIncomingBlock(0))) { 793 BEValue = Phi->getIncomingValue(0); 794 StartValue = Phi->getIncomingValue(1); 795 } else { 796 assert(TheLoop->contains(Phi->getIncomingBlock(1)) && 797 "Unexpected Phi node in the loop"); 798 BEValue = Phi->getIncomingValue(1); 799 StartValue = Phi->getIncomingValue(0); 800 } 801 802 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); 803 if (!BOp) 804 return false; 805 806 Value *Addend = nullptr; 807 if (BOp->getOpcode() == Instruction::FAdd) { 808 if (BOp->getOperand(0) == Phi) 809 Addend = BOp->getOperand(1); 810 else if (BOp->getOperand(1) == Phi) 811 Addend = BOp->getOperand(0); 812 } else if (BOp->getOpcode() == Instruction::FSub) 813 if (BOp->getOperand(0) == Phi) 814 Addend = BOp->getOperand(1); 815 816 if (!Addend) 817 return false; 818 819 // The addend should be loop invariant 820 if (auto *I = dyn_cast<Instruction>(Addend)) 821 if (TheLoop->contains(I)) 822 return false; 823 824 // FP Step has unknown SCEV 825 const SCEV *Step = SE->getUnknown(Addend); 826 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); 827 return true; 828 } 829 830 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 831 PredicatedScalarEvolution &PSE, 832 InductionDescriptor &D, 833 bool Assume) { 834 Type *PhiTy = Phi->getType(); 835 836 // Handle integer and pointer inductions variables. 837 // Now we handle also FP induction but not trying to make a 838 // recurrent expression from the PHI node in-place. 839 840 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && 841 !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) 842 return false; 843 844 if (PhiTy->isFloatingPointTy()) 845 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); 846 847 const SCEV *PhiScev = PSE.getSCEV(Phi); 848 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 849 850 // We need this expression to be an AddRecExpr. 851 if (Assume && !AR) 852 AR = PSE.getAsAddRec(Phi); 853 854 if (!AR) { 855 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 856 return false; 857 } 858 859 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); 860 } 861 862 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 863 ScalarEvolution *SE, 864 InductionDescriptor &D, 865 const SCEV *Expr) { 866 Type *PhiTy = Phi->getType(); 867 // We only handle integer and pointer inductions variables. 868 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) 869 return false; 870 871 // Check that the PHI is consecutive. 872 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); 873 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 874 875 if (!AR) { 876 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 877 return false; 878 } 879 880 if (AR->getLoop() != TheLoop) { 881 // FIXME: We should treat this as a uniform. Unfortunately, we 882 // don't currently know how to handled uniform PHIs. 883 DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); 884 return false; 885 } 886 887 Value *StartValue = 888 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); 889 const SCEV *Step = AR->getStepRecurrence(*SE); 890 // Calculate the pointer stride and check if it is consecutive. 891 // The stride may be a constant or a loop invariant integer value. 892 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); 893 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) 894 return false; 895 896 if (PhiTy->isIntegerTy()) { 897 D = InductionDescriptor(StartValue, IK_IntInduction, Step); 898 return true; 899 } 900 901 assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); 902 // Pointer induction should be a constant. 903 if (!ConstStep) 904 return false; 905 906 ConstantInt *CV = ConstStep->getValue(); 907 Type *PointerElementType = PhiTy->getPointerElementType(); 908 // The pointer stride cannot be determined if the pointer element type is not 909 // sized. 910 if (!PointerElementType->isSized()) 911 return false; 912 913 const DataLayout &DL = Phi->getModule()->getDataLayout(); 914 int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); 915 if (!Size) 916 return false; 917 918 int64_t CVSize = CV->getSExtValue(); 919 if (CVSize % Size) 920 return false; 921 auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, 922 true /* signed */); 923 D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); 924 return true; 925 } 926 927 /// \brief Returns the instructions that use values defined in the loop. 928 SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { 929 SmallVector<Instruction *, 8> UsedOutside; 930 931 for (auto *Block : L->getBlocks()) 932 // FIXME: I believe that this could use copy_if if the Inst reference could 933 // be adapted into a pointer. 934 for (auto &Inst : *Block) { 935 auto Users = Inst.users(); 936 if (any_of(Users, [&](User *U) { 937 auto *Use = cast<Instruction>(U); 938 return !L->contains(Use->getParent()); 939 })) 940 UsedOutside.push_back(&Inst); 941 } 942 943 return UsedOutside; 944 } 945 946 void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { 947 // By definition, all loop passes need the LoopInfo analysis and the 948 // Dominator tree it depends on. Because they all participate in the loop 949 // pass manager, they must also preserve these. 950 AU.addRequired<DominatorTreeWrapperPass>(); 951 AU.addPreserved<DominatorTreeWrapperPass>(); 952 AU.addRequired<LoopInfoWrapperPass>(); 953 AU.addPreserved<LoopInfoWrapperPass>(); 954 955 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs 956 // here because users shouldn't directly get them from this header. 957 extern char &LoopSimplifyID; 958 extern char &LCSSAID; 959 AU.addRequiredID(LoopSimplifyID); 960 AU.addPreservedID(LoopSimplifyID); 961 AU.addRequiredID(LCSSAID); 962 AU.addPreservedID(LCSSAID); 963 // This is used in the LPPassManager to perform LCSSA verification on passes 964 // which preserve lcssa form 965 AU.addRequired<LCSSAVerificationPass>(); 966 AU.addPreserved<LCSSAVerificationPass>(); 967 968 // Loop passes are designed to run inside of a loop pass manager which means 969 // that any function analyses they require must be required by the first loop 970 // pass in the manager (so that it is computed before the loop pass manager 971 // runs) and preserved by all loop pasess in the manager. To make this 972 // reasonably robust, the set needed for most loop passes is maintained here. 973 // If your loop pass requires an analysis not listed here, you will need to 974 // carefully audit the loop pass manager nesting structure that results. 975 AU.addRequired<AAResultsWrapperPass>(); 976 AU.addPreserved<AAResultsWrapperPass>(); 977 AU.addPreserved<BasicAAWrapperPass>(); 978 AU.addPreserved<GlobalsAAWrapperPass>(); 979 AU.addPreserved<SCEVAAWrapperPass>(); 980 AU.addRequired<ScalarEvolutionWrapperPass>(); 981 AU.addPreserved<ScalarEvolutionWrapperPass>(); 982 } 983 984 /// Manually defined generic "LoopPass" dependency initialization. This is used 985 /// to initialize the exact set of passes from above in \c 986 /// getLoopAnalysisUsage. It can be used within a loop pass's initialization 987 /// with: 988 /// 989 /// INITIALIZE_PASS_DEPENDENCY(LoopPass) 990 /// 991 /// As-if "LoopPass" were a pass. 992 void llvm::initializeLoopPassPass(PassRegistry &Registry) { 993 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 994 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 995 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 996 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 997 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 998 INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) 999 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 1000 INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) 1001 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 1002 } 1003 1004 /// \brief Find string metadata for loop 1005 /// 1006 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an 1007 /// operand or null otherwise. If the string metadata is not found return 1008 /// Optional's not-a-value. 1009 Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, 1010 StringRef Name) { 1011 MDNode *LoopID = TheLoop->getLoopID(); 1012 // Return none if LoopID is false. 1013 if (!LoopID) 1014 return None; 1015 1016 // First operand should refer to the loop id itself. 1017 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 1018 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 1019 1020 // Iterate over LoopID operands and look for MDString Metadata 1021 for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { 1022 MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); 1023 if (!MD) 1024 continue; 1025 MDString *S = dyn_cast<MDString>(MD->getOperand(0)); 1026 if (!S) 1027 continue; 1028 // Return true if MDString holds expected MetaData. 1029 if (Name.equals(S->getString())) 1030 switch (MD->getNumOperands()) { 1031 case 1: 1032 return nullptr; 1033 case 2: 1034 return &MD->getOperand(1); 1035 default: 1036 llvm_unreachable("loop metadata has 0 or 1 operand"); 1037 } 1038 } 1039 return None; 1040 } 1041 1042 /// Returns true if the instruction in a loop is guaranteed to execute at least 1043 /// once. 1044 bool llvm::isGuaranteedToExecute(const Instruction &Inst, 1045 const DominatorTree *DT, const Loop *CurLoop, 1046 const LoopSafetyInfo *SafetyInfo) { 1047 // We have to check to make sure that the instruction dominates all 1048 // of the exit blocks. If it doesn't, then there is a path out of the loop 1049 // which does not execute this instruction, so we can't hoist it. 1050 1051 // If the instruction is in the header block for the loop (which is very 1052 // common), it is always guaranteed to dominate the exit blocks. Since this 1053 // is a common case, and can save some work, check it now. 1054 if (Inst.getParent() == CurLoop->getHeader()) 1055 // If there's a throw in the header block, we can't guarantee we'll reach 1056 // Inst. 1057 return !SafetyInfo->HeaderMayThrow; 1058 1059 // Somewhere in this loop there is an instruction which may throw and make us 1060 // exit the loop. 1061 if (SafetyInfo->MayThrow) 1062 return false; 1063 1064 // Get the exit blocks for the current loop. 1065 SmallVector<BasicBlock *, 8> ExitBlocks; 1066 CurLoop->getExitBlocks(ExitBlocks); 1067 1068 // Verify that the block dominates each of the exit blocks of the loop. 1069 for (BasicBlock *ExitBlock : ExitBlocks) 1070 if (!DT->dominates(Inst.getParent(), ExitBlock)) 1071 return false; 1072 1073 // As a degenerate case, if the loop is statically infinite then we haven't 1074 // proven anything since there are no exit blocks. 1075 if (ExitBlocks.empty()) 1076 return false; 1077 1078 // FIXME: In general, we have to prove that the loop isn't an infinite loop. 1079 // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is 1080 // just a special case of this.) 1081 return true; 1082 } 1083 1084 Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { 1085 // Only support loops with a unique exiting block, and a latch. 1086 if (!L->getExitingBlock()) 1087 return None; 1088 1089 // Get the branch weights for the the loop's backedge. 1090 BranchInst *LatchBR = 1091 dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator()); 1092 if (!LatchBR || LatchBR->getNumSuccessors() != 2) 1093 return None; 1094 1095 assert((LatchBR->getSuccessor(0) == L->getHeader() || 1096 LatchBR->getSuccessor(1) == L->getHeader()) && 1097 "At least one edge out of the latch must go to the header"); 1098 1099 // To estimate the number of times the loop body was executed, we want to 1100 // know the number of times the backedge was taken, vs. the number of times 1101 // we exited the loop. 1102 uint64_t TrueVal, FalseVal; 1103 if (!LatchBR->extractProfMetadata(TrueVal, FalseVal)) 1104 return None; 1105 1106 if (!TrueVal || !FalseVal) 1107 return 0; 1108 1109 // Divide the count of the backedge by the count of the edge exiting the loop, 1110 // rounding to nearest. 1111 if (LatchBR->getSuccessor(0) == L->getHeader()) 1112 return (TrueVal + (FalseVal / 2)) / FalseVal; 1113 else 1114 return (FalseVal + (TrueVal / 2)) / TrueVal; 1115 } 1116 1117 /// \brief Adds a 'fast' flag to floating point operations. 1118 static Value *addFastMathFlag(Value *V) { 1119 if (isa<FPMathOperator>(V)) { 1120 FastMathFlags Flags; 1121 Flags.setUnsafeAlgebra(); 1122 cast<Instruction>(V)->setFastMathFlags(Flags); 1123 } 1124 return V; 1125 } 1126 1127 // Helper to generate a log2 shuffle reduction. 1128 Value * 1129 llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, 1130 RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, 1131 ArrayRef<Value *> RedOps) { 1132 unsigned VF = Src->getType()->getVectorNumElements(); 1133 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles 1134 // and vector ops, reducing the set of values being computed by half each 1135 // round. 1136 assert(isPowerOf2_32(VF) && 1137 "Reduction emission only supported for pow2 vectors!"); 1138 Value *TmpVec = Src; 1139 SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); 1140 for (unsigned i = VF; i != 1; i >>= 1) { 1141 // Move the upper half of the vector to the lower half. 1142 for (unsigned j = 0; j != i / 2; ++j) 1143 ShuffleMask[j] = Builder.getInt32(i / 2 + j); 1144 1145 // Fill the rest of the mask with undef. 1146 std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), 1147 UndefValue::get(Builder.getInt32Ty())); 1148 1149 Value *Shuf = Builder.CreateShuffleVector( 1150 TmpVec, UndefValue::get(TmpVec->getType()), 1151 ConstantVector::get(ShuffleMask), "rdx.shuf"); 1152 1153 if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 1154 // Floating point operations had to be 'fast' to enable the reduction. 1155 TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, 1156 TmpVec, Shuf, "bin.rdx")); 1157 } else { 1158 assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && 1159 "Invalid min/max"); 1160 TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec, 1161 Shuf); 1162 } 1163 if (!RedOps.empty()) 1164 propagateIRFlags(TmpVec, RedOps); 1165 } 1166 // The result is in the first element of the vector. 1167 return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); 1168 } 1169 1170 /// Create a simple vector reduction specified by an opcode and some 1171 /// flags (if generating min/max reductions). 1172 Value *llvm::createSimpleTargetReduction( 1173 IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, 1174 Value *Src, TargetTransformInfo::ReductionFlags Flags, 1175 ArrayRef<Value *> RedOps) { 1176 assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); 1177 1178 Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); 1179 std::function<Value*()> BuildFunc; 1180 using RD = RecurrenceDescriptor; 1181 RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; 1182 // TODO: Support creating ordered reductions. 1183 FastMathFlags FMFUnsafe; 1184 FMFUnsafe.setUnsafeAlgebra(); 1185 1186 switch (Opcode) { 1187 case Instruction::Add: 1188 BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; 1189 break; 1190 case Instruction::Mul: 1191 BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; 1192 break; 1193 case Instruction::And: 1194 BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; 1195 break; 1196 case Instruction::Or: 1197 BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; 1198 break; 1199 case Instruction::Xor: 1200 BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; 1201 break; 1202 case Instruction::FAdd: 1203 BuildFunc = [&]() { 1204 auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); 1205 cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); 1206 return Rdx; 1207 }; 1208 break; 1209 case Instruction::FMul: 1210 BuildFunc = [&]() { 1211 auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); 1212 cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); 1213 return Rdx; 1214 }; 1215 break; 1216 case Instruction::ICmp: 1217 if (Flags.IsMaxOp) { 1218 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; 1219 BuildFunc = [&]() { 1220 return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); 1221 }; 1222 } else { 1223 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; 1224 BuildFunc = [&]() { 1225 return Builder.CreateIntMinReduce(Src, Flags.IsSigned); 1226 }; 1227 } 1228 break; 1229 case Instruction::FCmp: 1230 if (Flags.IsMaxOp) { 1231 MinMaxKind = RD::MRK_FloatMax; 1232 BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; 1233 } else { 1234 MinMaxKind = RD::MRK_FloatMin; 1235 BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; 1236 } 1237 break; 1238 default: 1239 llvm_unreachable("Unhandled opcode"); 1240 break; 1241 } 1242 if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) 1243 return BuildFunc(); 1244 return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); 1245 } 1246 1247 /// Create a vector reduction using a given recurrence descriptor. 1248 Value *llvm::createTargetReduction(IRBuilder<> &Builder, 1249 const TargetTransformInfo *TTI, 1250 RecurrenceDescriptor &Desc, Value *Src, 1251 bool NoNaN) { 1252 // TODO: Support in-order reductions based on the recurrence descriptor. 1253 RecurrenceDescriptor::RecurrenceKind RecKind = Desc.getRecurrenceKind(); 1254 TargetTransformInfo::ReductionFlags Flags; 1255 Flags.NoNaN = NoNaN; 1256 auto getSimpleRdx = [&](unsigned Opc) { 1257 return createSimpleTargetReduction(Builder, TTI, Opc, Src, Flags); 1258 }; 1259 switch (RecKind) { 1260 case RecurrenceDescriptor::RK_FloatAdd: 1261 return getSimpleRdx(Instruction::FAdd); 1262 case RecurrenceDescriptor::RK_FloatMult: 1263 return getSimpleRdx(Instruction::FMul); 1264 case RecurrenceDescriptor::RK_IntegerAdd: 1265 return getSimpleRdx(Instruction::Add); 1266 case RecurrenceDescriptor::RK_IntegerMult: 1267 return getSimpleRdx(Instruction::Mul); 1268 case RecurrenceDescriptor::RK_IntegerAnd: 1269 return getSimpleRdx(Instruction::And); 1270 case RecurrenceDescriptor::RK_IntegerOr: 1271 return getSimpleRdx(Instruction::Or); 1272 case RecurrenceDescriptor::RK_IntegerXor: 1273 return getSimpleRdx(Instruction::Xor); 1274 case RecurrenceDescriptor::RK_IntegerMinMax: { 1275 switch (Desc.getMinMaxRecurrenceKind()) { 1276 case RecurrenceDescriptor::MRK_SIntMax: 1277 Flags.IsSigned = true; 1278 Flags.IsMaxOp = true; 1279 break; 1280 case RecurrenceDescriptor::MRK_UIntMax: 1281 Flags.IsMaxOp = true; 1282 break; 1283 case RecurrenceDescriptor::MRK_SIntMin: 1284 Flags.IsSigned = true; 1285 break; 1286 case RecurrenceDescriptor::MRK_UIntMin: 1287 break; 1288 default: 1289 llvm_unreachable("Unhandled MRK"); 1290 } 1291 return getSimpleRdx(Instruction::ICmp); 1292 } 1293 case RecurrenceDescriptor::RK_FloatMinMax: { 1294 Flags.IsMaxOp = 1295 Desc.getMinMaxRecurrenceKind() == RecurrenceDescriptor::MRK_FloatMax; 1296 return getSimpleRdx(Instruction::FCmp); 1297 } 1298 default: 1299 llvm_unreachable("Unhandled RecKind"); 1300 } 1301 } 1302 1303 void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL) { 1304 if (auto *VecOp = dyn_cast<Instruction>(I)) { 1305 if (auto *I0 = dyn_cast<Instruction>(VL[0])) { 1306 // VecOVp is initialized to the 0th scalar, so start counting from index 1307 // '1'. 1308 VecOp->copyIRFlags(I0); 1309 for (int i = 1, e = VL.size(); i < e; ++i) { 1310 if (auto *Scalar = dyn_cast<Instruction>(VL[i])) 1311 VecOp->andIRFlags(Scalar); 1312 } 1313 } 1314 } 1315 } 1316