1 //===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // This pass implements the Bottom Up SLP vectorizer. It detects consecutive 10 // stores that can be put together into vector-stores. Next, it attempts to 11 // construct vectorizable tree using the use-def chains. If a profitable tree 12 // was found, the SLP vectorizer performs vectorization on the tree. 13 // 14 // The pass is inspired by the work described in the paper: 15 // "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks. 16 // 17 //===----------------------------------------------------------------------===// 18 #define SV_NAME "slp-vectorizer" 19 #define DEBUG_TYPE "SLP" 20 21 #include "llvm/Transforms/Vectorize.h" 22 #include "llvm/ADT/MapVector.h" 23 #include "llvm/ADT/PostOrderIterator.h" 24 #include "llvm/ADT/SetVector.h" 25 #include "llvm/Analysis/AliasAnalysis.h" 26 #include "llvm/Analysis/ScalarEvolution.h" 27 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 28 #include "llvm/Analysis/AliasAnalysis.h" 29 #include "llvm/Analysis/TargetTransformInfo.h" 30 #include "llvm/Analysis/Verifier.h" 31 #include "llvm/Analysis/LoopInfo.h" 32 #include "llvm/IR/DataLayout.h" 33 #include "llvm/IR/Instructions.h" 34 #include "llvm/IR/IntrinsicInst.h" 35 #include "llvm/IR/IRBuilder.h" 36 #include "llvm/IR/Module.h" 37 #include "llvm/IR/Type.h" 38 #include "llvm/IR/Value.h" 39 #include "llvm/Pass.h" 40 #include "llvm/Support/CommandLine.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <algorithm> 44 #include <map> 45 46 using namespace llvm; 47 48 static cl::opt<int> 49 SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, 50 cl::desc("Only vectorize if you gain more than this " 51 "number ")); 52 namespace { 53 54 static const unsigned MinVecRegSize = 128; 55 56 static const unsigned RecursionMaxDepth = 12; 57 58 /// RAII pattern to save the insertion point of the IR builder. 59 class BuilderLocGuard { 60 public: 61 BuilderLocGuard(IRBuilder<> &B) : Builder(B), Loc(B.GetInsertPoint()) {} 62 ~BuilderLocGuard() { if (Loc) Builder.SetInsertPoint(Loc); } 63 64 private: 65 // Prevent copying. 66 BuilderLocGuard(const BuilderLocGuard &); 67 BuilderLocGuard &operator=(const BuilderLocGuard &); 68 IRBuilder<> &Builder; 69 BasicBlock::iterator Loc; 70 }; 71 72 /// A helper class for numbering instructions in multible blocks. 73 /// Numbers starts at zero for each basic block. 74 struct BlockNumbering { 75 76 BlockNumbering(BasicBlock *Bb) : BB(Bb), Valid(false) {} 77 78 BlockNumbering() : BB(0), Valid(false) {} 79 80 void numberInstructions() { 81 unsigned Loc = 0; 82 InstrIdx.clear(); 83 InstrVec.clear(); 84 // Number the instructions in the block. 85 for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { 86 InstrIdx[it] = Loc++; 87 InstrVec.push_back(it); 88 assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation"); 89 } 90 Valid = true; 91 } 92 93 int getIndex(Instruction *I) { 94 assert(I->getParent() == BB && "Invalid instruction"); 95 if (!Valid) 96 numberInstructions(); 97 assert(InstrIdx.count(I) && "Unknown instruction"); 98 return InstrIdx[I]; 99 } 100 101 Instruction *getInstruction(unsigned loc) { 102 if (!Valid) 103 numberInstructions(); 104 assert(InstrVec.size() > loc && "Invalid Index"); 105 return InstrVec[loc]; 106 } 107 108 void forget() { Valid = false; } 109 110 private: 111 /// The block we are numbering. 112 BasicBlock *BB; 113 /// Is the block numbered. 114 bool Valid; 115 /// Maps instructions to numbers and back. 116 SmallDenseMap<Instruction *, int> InstrIdx; 117 /// Maps integers to Instructions. 118 std::vector<Instruction *> InstrVec; 119 }; 120 121 /// \returns the parent basic block if all of the instructions in \p VL 122 /// are in the same block or null otherwise. 123 static BasicBlock *getSameBlock(ArrayRef<Value *> VL) { 124 Instruction *I0 = dyn_cast<Instruction>(VL[0]); 125 if (!I0) 126 return 0; 127 BasicBlock *BB = I0->getParent(); 128 for (int i = 1, e = VL.size(); i < e; i++) { 129 Instruction *I = dyn_cast<Instruction>(VL[i]); 130 if (!I) 131 return 0; 132 133 if (BB != I->getParent()) 134 return 0; 135 } 136 return BB; 137 } 138 139 /// \returns True if all of the values in \p VL are constants. 140 static bool allConstant(ArrayRef<Value *> VL) { 141 for (unsigned i = 0, e = VL.size(); i < e; ++i) 142 if (!isa<Constant>(VL[i])) 143 return false; 144 return true; 145 } 146 147 /// \returns True if all of the values in \p VL are identical. 148 static bool isSplat(ArrayRef<Value *> VL) { 149 for (unsigned i = 1, e = VL.size(); i < e; ++i) 150 if (VL[i] != VL[0]) 151 return false; 152 return true; 153 } 154 155 /// \returns The opcode if all of the Instructions in \p VL have the same 156 /// opcode, or zero. 157 static unsigned getSameOpcode(ArrayRef<Value *> VL) { 158 Instruction *I0 = dyn_cast<Instruction>(VL[0]); 159 if (!I0) 160 return 0; 161 unsigned Opcode = I0->getOpcode(); 162 for (int i = 1, e = VL.size(); i < e; i++) { 163 Instruction *I = dyn_cast<Instruction>(VL[i]); 164 if (!I || Opcode != I->getOpcode()) 165 return 0; 166 } 167 return Opcode; 168 } 169 170 /// \returns The type that all of the values in \p VL have or null if there 171 /// are different types. 172 static Type* getSameType(ArrayRef<Value *> VL) { 173 Type *Ty = VL[0]->getType(); 174 for (int i = 1, e = VL.size(); i < e; i++) 175 if (VL[0]->getType() != Ty) 176 return 0; 177 178 return Ty; 179 } 180 181 /// \returns True if the ExtractElement instructions in VL can be vectorized 182 /// to use the original vector. 183 static bool CanReuseExtract(ArrayRef<Value *> VL) { 184 assert(Instruction::ExtractElement == getSameOpcode(VL) && "Invalid opcode"); 185 // Check if all of the extracts come from the same vector and from the 186 // correct offset. 187 Value *VL0 = VL[0]; 188 ExtractElementInst *E0 = cast<ExtractElementInst>(VL0); 189 Value *Vec = E0->getOperand(0); 190 191 // We have to extract from the same vector type. 192 unsigned NElts = Vec->getType()->getVectorNumElements(); 193 194 if (NElts != VL.size()) 195 return false; 196 197 // Check that all of the indices extract from the correct offset. 198 ConstantInt *CI = dyn_cast<ConstantInt>(E0->getOperand(1)); 199 if (!CI || CI->getZExtValue()) 200 return false; 201 202 for (unsigned i = 1, e = VL.size(); i < e; ++i) { 203 ExtractElementInst *E = cast<ExtractElementInst>(VL[i]); 204 ConstantInt *CI = dyn_cast<ConstantInt>(E->getOperand(1)); 205 206 if (!CI || CI->getZExtValue() != i || E->getOperand(0) != Vec) 207 return false; 208 } 209 210 return true; 211 } 212 213 /// Bottom Up SLP Vectorizer. 214 class BoUpSLP { 215 public: 216 typedef SmallVector<Value *, 8> ValueList; 217 typedef SmallVector<Instruction *, 16> InstrList; 218 typedef SmallPtrSet<Value *, 16> ValueSet; 219 typedef SmallVector<StoreInst *, 8> StoreList; 220 221 BoUpSLP(Function *Func, ScalarEvolution *Se, DataLayout *Dl, 222 TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li, 223 DominatorTree *Dt) : 224 F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li), DT(Dt), 225 Builder(Se->getContext()) { 226 // Setup the block numbering utility for all of the blocks in the 227 // function. 228 for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) { 229 BasicBlock *BB = it; 230 BlocksNumbers[BB] = BlockNumbering(BB); 231 } 232 } 233 234 /// \brief Vectorize the tree that starts with the elements in \p VL. 235 void vectorizeTree(); 236 237 /// \returns the vectorization cost of the subtree that starts at \p VL. 238 /// A negative number means that this is profitable. 239 int getTreeCost(); 240 241 /// Construct a vectorizable tree that starts at \p Roots. 242 void buildTree(ArrayRef<Value *> Roots); 243 244 /// Clear the internal data structures that are created by 'buildTree'. 245 void deleteTree() { 246 VectorizableTree.clear(); 247 ScalarToTreeEntry.clear(); 248 MustGather.clear(); 249 MemBarrierIgnoreList.clear(); 250 } 251 252 /// \returns the scalarization cost for this list of values. Assuming that 253 /// this subtree gets vectorized, we may need to extract the values from the 254 /// roots. This method calculates the cost of extracting the values. 255 int getGatherCost(ArrayRef<Value *> VL); 256 257 /// \returns true if the memory operations A and B are consecutive. 258 bool isConsecutiveAccess(Value *A, Value *B); 259 260 /// \brief Perform LICM and CSE on the newly generated gather sequences. 261 void optimizeGatherSequence(); 262 private: 263 struct TreeEntry; 264 265 /// \returns the cost of the vectorizable entry. 266 int getEntryCost(TreeEntry *E); 267 268 /// This is the recursive part of buildTree. 269 void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth); 270 271 /// Vectorizer a single entry in the tree. 272 Value *vectorizeTree(TreeEntry *E); 273 274 /// Vectorizer a single entry in the tree, starting in \p VL. 275 Value *vectorizeTree(ArrayRef<Value *> VL); 276 277 /// \brief Take the pointer operand from the Load/Store instruction. 278 /// \returns NULL if this is not a valid Load/Store instruction. 279 static Value *getPointerOperand(Value *I); 280 281 /// \brief Take the address space operand from the Load/Store instruction. 282 /// \returns -1 if this is not a valid Load/Store instruction. 283 static unsigned getAddressSpaceOperand(Value *I); 284 285 /// \returns the scalarization cost for this type. Scalarization in this 286 /// context means the creation of vectors from a group of scalars. 287 int getGatherCost(Type *Ty); 288 289 /// \returns the AA location that is being access by the instruction. 290 AliasAnalysis::Location getLocation(Instruction *I); 291 292 /// \brief Checks if it is possible to sink an instruction from 293 /// \p Src to \p Dst. 294 /// \returns the pointer to the barrier instruction if we can't sink. 295 Value *getSinkBarrier(Instruction *Src, Instruction *Dst); 296 297 /// \returns the index of the last instrucion in the BB from \p VL. 298 int getLastIndex(ArrayRef<Value *> VL); 299 300 /// \returns the Instrucion in the bundle \p VL. 301 Instruction *getLastInstruction(ArrayRef<Value *> VL); 302 303 /// \returns the Instruction at index \p Index which is in Block \p BB. 304 Instruction *getInstructionForIndex(unsigned Index, BasicBlock *BB); 305 306 /// \returns the index of the first User of \p VL. 307 int getFirstUserIndex(ArrayRef<Value *> VL); 308 309 /// \returns a vector from a collection of scalars in \p VL. 310 Value *Gather(ArrayRef<Value *> VL, VectorType *Ty); 311 312 struct TreeEntry { 313 TreeEntry() : Scalars(), VectorizedValue(0), LastScalarIndex(0), 314 NeedToGather(0) {} 315 316 /// \returns true if the scalars in VL are equal to this entry. 317 bool isSame(ArrayRef<Value *> VL) { 318 assert(VL.size() == Scalars.size() && "Invalid size"); 319 for (int i = 0, e = VL.size(); i != e; ++i) 320 if (VL[i] != Scalars[i]) 321 return false; 322 return true; 323 } 324 325 /// A vector of scalars. 326 ValueList Scalars; 327 328 /// The Scalars are vectorized into this value. It is initialized to Null. 329 Value *VectorizedValue; 330 331 /// The index in the basic block of the last scalar. 332 int LastScalarIndex; 333 334 /// Do we need to gather this sequence ? 335 bool NeedToGather; 336 }; 337 338 /// Create a new VectorizableTree entry. 339 TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized) { 340 VectorizableTree.push_back(TreeEntry()); 341 int idx = VectorizableTree.size() - 1; 342 TreeEntry *Last = &VectorizableTree[idx]; 343 Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end()); 344 Last->NeedToGather = !Vectorized; 345 if (Vectorized) { 346 Last->LastScalarIndex = getLastIndex(VL); 347 for (int i = 0, e = VL.size(); i != e; ++i) { 348 assert(!ScalarToTreeEntry.count(VL[i]) && "Scalar already in tree!"); 349 ScalarToTreeEntry[VL[i]] = idx; 350 } 351 } else { 352 Last->LastScalarIndex = 0; 353 MustGather.insert(VL.begin(), VL.end()); 354 } 355 return Last; 356 } 357 358 /// -- Vectorization State -- 359 /// Holds all of the tree entries. 360 std::vector<TreeEntry> VectorizableTree; 361 362 /// Maps a specific scalar to its tree entry. 363 SmallDenseMap<Value*, int> ScalarToTreeEntry; 364 365 /// A list of scalars that we found that we need to keep as scalars. 366 ValueSet MustGather; 367 368 /// A list of instructions to ignore while sinking 369 /// memory instructions. This map must be reset between runs of getCost. 370 ValueSet MemBarrierIgnoreList; 371 372 /// Holds all of the instructions that we gathered. 373 SetVector<Instruction *> GatherSeq; 374 375 /// Numbers instructions in different blocks. 376 std::map<BasicBlock *, BlockNumbering> BlocksNumbers; 377 378 // Analysis and block reference. 379 Function *F; 380 ScalarEvolution *SE; 381 DataLayout *DL; 382 TargetTransformInfo *TTI; 383 AliasAnalysis *AA; 384 LoopInfo *LI; 385 DominatorTree *DT; 386 /// Instruction builder to construct the vectorized tree. 387 IRBuilder<> Builder; 388 }; 389 390 void BoUpSLP::buildTree(ArrayRef<Value *> Roots) { 391 deleteTree(); 392 buildTree_rec(Roots, 0); 393 } 394 395 396 void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { 397 bool SameTy = getSameType(VL); (void)SameTy; 398 assert(SameTy && "Invalid types!"); 399 400 if (Depth == RecursionMaxDepth) { 401 DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); 402 newTreeEntry(VL, false); 403 return; 404 } 405 406 // Don't handle vectors. 407 if (VL[0]->getType()->isVectorTy()) { 408 DEBUG(dbgs() << "SLP: Gathering due to vector type.\n"); 409 newTreeEntry(VL, false); 410 return; 411 } 412 413 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 414 if (SI->getValueOperand()->getType()->isVectorTy()) { 415 DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n"); 416 newTreeEntry(VL, false); 417 return; 418 } 419 420 // If all of the operands are identical or constant we have a simple solution. 421 if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) || 422 !getSameOpcode(VL)) { 423 DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); 424 newTreeEntry(VL, false); 425 return; 426 } 427 428 // We now know that this is a vector of instructions of the same type from 429 // the same block. 430 431 // Check if this is a duplicate of another entry. 432 if (ScalarToTreeEntry.count(VL[0])) { 433 int Idx = ScalarToTreeEntry[VL[0]]; 434 TreeEntry *E = &VectorizableTree[Idx]; 435 for (unsigned i = 0, e = VL.size(); i != e; ++i) { 436 DEBUG(dbgs() << "SLP: \tChecking bundle: " << *VL[i] << ".\n"); 437 if (E->Scalars[i] != VL[i]) { 438 DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n"); 439 newTreeEntry(VL, false); 440 return; 441 } 442 } 443 DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *VL[0] << ".\n"); 444 return; 445 } 446 447 // Check that none of the instructions in the bundle are already in the tree. 448 for (unsigned i = 0, e = VL.size(); i != e; ++i) { 449 if (ScalarToTreeEntry.count(VL[i])) { 450 DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] << 451 ") is already in tree.\n"); 452 newTreeEntry(VL, false); 453 return; 454 } 455 } 456 457 // If any of the scalars appears in the table OR it is marked as a value that 458 // needs to stat scalar then we need to gather the scalars. 459 for (unsigned i = 0, e = VL.size(); i != e; ++i) { 460 if (ScalarToTreeEntry.count(VL[i]) || MustGather.count(VL[i])) { 461 DEBUG(dbgs() << "SLP: Gathering due to gathered scalar. \n"); 462 newTreeEntry(VL, false); 463 return; 464 } 465 } 466 467 // Check that all of the users of the scalars that we want to vectorize are 468 // schedulable. 469 Instruction *VL0 = cast<Instruction>(VL[0]); 470 int MyLastIndex = getLastIndex(VL); 471 BasicBlock *BB = cast<Instruction>(VL0)->getParent(); 472 473 for (unsigned i = 0, e = VL.size(); i != e; ++i) { 474 Instruction *Scalar = cast<Instruction>(VL[i]); 475 DEBUG(dbgs() << "SLP: Checking users of " << *Scalar << ". \n"); 476 for (Value::use_iterator U = Scalar->use_begin(), UE = Scalar->use_end(); 477 U != UE; ++U) { 478 DEBUG(dbgs() << "SLP: \tUser " << **U << ". \n"); 479 Instruction *User = dyn_cast<Instruction>(*U); 480 if (!User) { 481 DEBUG(dbgs() << "SLP: Gathering due unknown user. \n"); 482 newTreeEntry(VL, false); 483 return; 484 } 485 486 // We don't care if the user is in a different basic block. 487 BasicBlock *UserBlock = User->getParent(); 488 if (UserBlock != BB) { 489 DEBUG(dbgs() << "SLP: User from a different basic block " 490 << *User << ". \n"); 491 continue; 492 } 493 494 // If this is a PHINode within this basic block then we can place the 495 // extract wherever we want. 496 if (isa<PHINode>(*User)) { 497 DEBUG(dbgs() << "SLP: \tWe can schedule PHIs:" << *User << ". \n"); 498 continue; 499 } 500 501 // Check if this is a safe in-tree user. 502 if (ScalarToTreeEntry.count(User)) { 503 int Idx = ScalarToTreeEntry[User]; 504 int VecLocation = VectorizableTree[Idx].LastScalarIndex; 505 if (VecLocation <= MyLastIndex) { 506 DEBUG(dbgs() << "SLP: Gathering due to unschedulable vector. \n"); 507 newTreeEntry(VL, false); 508 return; 509 } 510 DEBUG(dbgs() << "SLP: In-tree user (" << *User << ") at #" << 511 VecLocation << " vector value (" << *Scalar << ") at #" 512 << MyLastIndex << ".\n"); 513 continue; 514 } 515 516 // Make sure that we can schedule this unknown user. 517 BlockNumbering &BN = BlocksNumbers[BB]; 518 int UserIndex = BN.getIndex(User); 519 if (UserIndex < MyLastIndex) { 520 521 DEBUG(dbgs() << "SLP: Can't schedule extractelement for " 522 << *User << ". \n"); 523 newTreeEntry(VL, false); 524 return; 525 } 526 } 527 } 528 529 // Check that every instructions appears once in this bundle. 530 for (unsigned i = 0, e = VL.size(); i < e; ++i) 531 for (unsigned j = i+1; j < e; ++j) 532 if (VL[i] == VL[j]) { 533 DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); 534 newTreeEntry(VL, false); 535 return; 536 } 537 538 // Check that instructions in this bundle don't reference other instructions. 539 // The runtime of this check is O(N * N-1 * uses(N)) and a typical N is 4. 540 for (unsigned i = 0, e = VL.size(); i < e; ++i) { 541 for (Value::use_iterator U = VL[i]->use_begin(), UE = VL[i]->use_end(); 542 U != UE; ++U) { 543 for (unsigned j = 0; j < e; ++j) { 544 if (i != j && *U == VL[j]) { 545 DEBUG(dbgs() << "SLP: Intra-bundle dependencies!" << **U << ". \n"); 546 newTreeEntry(VL, false); 547 return; 548 } 549 } 550 } 551 } 552 553 DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n"); 554 555 unsigned Opcode = getSameOpcode(VL); 556 557 // Check if it is safe to sink the loads or the stores. 558 if (Opcode == Instruction::Load || Opcode == Instruction::Store) { 559 Instruction *Last = getLastInstruction(VL); 560 561 for (unsigned i = 0, e = VL.size(); i < e; ++i) { 562 if (VL[i] == Last) 563 continue; 564 Value *Barrier = getSinkBarrier(cast<Instruction>(VL[i]), Last); 565 if (Barrier) { 566 DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << *Last 567 << "\n because of " << *Barrier << ". Gathering.\n"); 568 newTreeEntry(VL, false); 569 return; 570 } 571 } 572 } 573 574 switch (Opcode) { 575 case Instruction::PHI: { 576 PHINode *PH = dyn_cast<PHINode>(VL0); 577 newTreeEntry(VL, true); 578 DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n"); 579 580 for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) { 581 ValueList Operands; 582 // Prepare the operand vector. 583 for (unsigned j = 0; j < VL.size(); ++j) 584 Operands.push_back(cast<PHINode>(VL[j])->getIncomingValue(i)); 585 586 buildTree_rec(Operands, Depth + 1); 587 } 588 return; 589 } 590 case Instruction::ExtractElement: { 591 bool Reuse = CanReuseExtract(VL); 592 if (Reuse) { 593 DEBUG(dbgs() << "SLP: Reusing extract sequence.\n"); 594 } 595 newTreeEntry(VL, Reuse); 596 return; 597 } 598 case Instruction::Load: { 599 // Check if the loads are consecutive or of we need to swizzle them. 600 for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) 601 if (!isConsecutiveAccess(VL[i], VL[i + 1])) { 602 newTreeEntry(VL, false); 603 DEBUG(dbgs() << "SLP: Need to swizzle loads.\n"); 604 return; 605 } 606 607 newTreeEntry(VL, true); 608 DEBUG(dbgs() << "SLP: added a vector of loads.\n"); 609 return; 610 } 611 case Instruction::ZExt: 612 case Instruction::SExt: 613 case Instruction::FPToUI: 614 case Instruction::FPToSI: 615 case Instruction::FPExt: 616 case Instruction::PtrToInt: 617 case Instruction::IntToPtr: 618 case Instruction::SIToFP: 619 case Instruction::UIToFP: 620 case Instruction::Trunc: 621 case Instruction::FPTrunc: 622 case Instruction::BitCast: { 623 Type *SrcTy = VL0->getOperand(0)->getType(); 624 for (unsigned i = 0; i < VL.size(); ++i) { 625 Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType(); 626 if (Ty != SrcTy || Ty->isAggregateType() || Ty->isVectorTy()) { 627 newTreeEntry(VL, false); 628 DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n"); 629 return; 630 } 631 } 632 newTreeEntry(VL, true); 633 DEBUG(dbgs() << "SLP: added a vector of casts.\n"); 634 635 for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { 636 ValueList Operands; 637 // Prepare the operand vector. 638 for (unsigned j = 0; j < VL.size(); ++j) 639 Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); 640 641 buildTree_rec(Operands, Depth+1); 642 } 643 return; 644 } 645 case Instruction::ICmp: 646 case Instruction::FCmp: { 647 // Check that all of the compares have the same predicate. 648 CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate(); 649 for (unsigned i = 1, e = VL.size(); i < e; ++i) { 650 CmpInst *Cmp = cast<CmpInst>(VL[i]); 651 if (Cmp->getPredicate() != P0) { 652 newTreeEntry(VL, false); 653 DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n"); 654 return; 655 } 656 } 657 658 newTreeEntry(VL, true); 659 DEBUG(dbgs() << "SLP: added a vector of compares.\n"); 660 661 for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { 662 ValueList Operands; 663 // Prepare the operand vector. 664 for (unsigned j = 0; j < VL.size(); ++j) 665 Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); 666 667 buildTree_rec(Operands, Depth+1); 668 } 669 return; 670 } 671 case Instruction::Select: 672 case Instruction::Add: 673 case Instruction::FAdd: 674 case Instruction::Sub: 675 case Instruction::FSub: 676 case Instruction::Mul: 677 case Instruction::FMul: 678 case Instruction::UDiv: 679 case Instruction::SDiv: 680 case Instruction::FDiv: 681 case Instruction::URem: 682 case Instruction::SRem: 683 case Instruction::FRem: 684 case Instruction::Shl: 685 case Instruction::LShr: 686 case Instruction::AShr: 687 case Instruction::And: 688 case Instruction::Or: 689 case Instruction::Xor: { 690 newTreeEntry(VL, true); 691 DEBUG(dbgs() << "SLP: added a vector of bin op.\n"); 692 693 for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { 694 ValueList Operands; 695 // Prepare the operand vector. 696 for (unsigned j = 0; j < VL.size(); ++j) 697 Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); 698 699 buildTree_rec(Operands, Depth+1); 700 } 701 return; 702 } 703 case Instruction::Store: { 704 // Check if the stores are consecutive or of we need to swizzle them. 705 for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) 706 if (!isConsecutiveAccess(VL[i], VL[i + 1])) { 707 newTreeEntry(VL, false); 708 DEBUG(dbgs() << "SLP: Non consecutive store.\n"); 709 return; 710 } 711 712 newTreeEntry(VL, true); 713 DEBUG(dbgs() << "SLP: added a vector of stores.\n"); 714 715 ValueList Operands; 716 for (unsigned j = 0; j < VL.size(); ++j) 717 Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); 718 719 // We can ignore these values because we are sinking them down. 720 MemBarrierIgnoreList.insert(VL.begin(), VL.end()); 721 buildTree_rec(Operands, Depth + 1); 722 return; 723 } 724 default: 725 newTreeEntry(VL, false); 726 DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); 727 return; 728 } 729 } 730 731 int BoUpSLP::getEntryCost(TreeEntry *E) { 732 ArrayRef<Value*> VL = E->Scalars; 733 734 Type *ScalarTy = VL[0]->getType(); 735 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 736 ScalarTy = SI->getValueOperand()->getType(); 737 VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); 738 739 if (E->NeedToGather) { 740 if (allConstant(VL)) 741 return 0; 742 if (isSplat(VL)) { 743 return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0); 744 } 745 return getGatherCost(E->Scalars); 746 } 747 748 assert(getSameOpcode(VL) && getSameType(VL) && getSameBlock(VL) && 749 "Invalid VL"); 750 Instruction *VL0 = cast<Instruction>(VL[0]); 751 unsigned Opcode = VL0->getOpcode(); 752 switch (Opcode) { 753 case Instruction::PHI: { 754 return 0; 755 } 756 case Instruction::ExtractElement: { 757 if (CanReuseExtract(VL)) 758 return 0; 759 return getGatherCost(VecTy); 760 } 761 case Instruction::ZExt: 762 case Instruction::SExt: 763 case Instruction::FPToUI: 764 case Instruction::FPToSI: 765 case Instruction::FPExt: 766 case Instruction::PtrToInt: 767 case Instruction::IntToPtr: 768 case Instruction::SIToFP: 769 case Instruction::UIToFP: 770 case Instruction::Trunc: 771 case Instruction::FPTrunc: 772 case Instruction::BitCast: { 773 Type *SrcTy = VL0->getOperand(0)->getType(); 774 775 // Calculate the cost of this instruction. 776 int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(), 777 VL0->getType(), SrcTy); 778 779 VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size()); 780 int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy); 781 return VecCost - ScalarCost; 782 } 783 case Instruction::FCmp: 784 case Instruction::ICmp: 785 case Instruction::Select: 786 case Instruction::Add: 787 case Instruction::FAdd: 788 case Instruction::Sub: 789 case Instruction::FSub: 790 case Instruction::Mul: 791 case Instruction::FMul: 792 case Instruction::UDiv: 793 case Instruction::SDiv: 794 case Instruction::FDiv: 795 case Instruction::URem: 796 case Instruction::SRem: 797 case Instruction::FRem: 798 case Instruction::Shl: 799 case Instruction::LShr: 800 case Instruction::AShr: 801 case Instruction::And: 802 case Instruction::Or: 803 case Instruction::Xor: { 804 // Calculate the cost of this instruction. 805 int ScalarCost = 0; 806 int VecCost = 0; 807 if (Opcode == Instruction::FCmp || Opcode == Instruction::ICmp || 808 Opcode == Instruction::Select) { 809 VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size()); 810 ScalarCost = VecTy->getNumElements() * 811 TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty()); 812 VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy); 813 } else { 814 ScalarCost = VecTy->getNumElements() * 815 TTI->getArithmeticInstrCost(Opcode, ScalarTy); 816 VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy); 817 } 818 return VecCost - ScalarCost; 819 } 820 case Instruction::Load: { 821 // Cost of wide load - cost of scalar loads. 822 int ScalarLdCost = VecTy->getNumElements() * 823 TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); 824 int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); 825 return VecLdCost - ScalarLdCost; 826 } 827 case Instruction::Store: { 828 // We know that we can merge the stores. Calculate the cost. 829 int ScalarStCost = VecTy->getNumElements() * 830 TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0); 831 int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0); 832 return VecStCost - ScalarStCost; 833 } 834 default: 835 llvm_unreachable("Unknown instruction"); 836 } 837 } 838 839 int BoUpSLP::getTreeCost() { 840 int Cost = 0; 841 DEBUG(dbgs() << "SLP: Calculating cost for tree of size " << 842 VectorizableTree.size() << ".\n"); 843 844 for (unsigned i = 0, e = VectorizableTree.size(); i != e; ++i) { 845 int C = getEntryCost(&VectorizableTree[i]); 846 DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle that starts with " 847 << *VectorizableTree[i].Scalars[0] << " .\n"); 848 Cost += C; 849 } 850 DEBUG(dbgs() << "SLP: Total Cost " << Cost << ".\n"); 851 return Cost; 852 } 853 854 int BoUpSLP::getGatherCost(Type *Ty) { 855 int Cost = 0; 856 for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i) 857 Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); 858 return Cost; 859 } 860 861 int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) { 862 // Find the type of the operands in VL. 863 Type *ScalarTy = VL[0]->getType(); 864 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 865 ScalarTy = SI->getValueOperand()->getType(); 866 VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); 867 // Find the cost of inserting/extracting values from the vector. 868 return getGatherCost(VecTy); 869 } 870 871 AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) { 872 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 873 return AA->getLocation(SI); 874 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 875 return AA->getLocation(LI); 876 return AliasAnalysis::Location(); 877 } 878 879 Value *BoUpSLP::getPointerOperand(Value *I) { 880 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 881 return LI->getPointerOperand(); 882 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 883 return SI->getPointerOperand(); 884 return 0; 885 } 886 887 unsigned BoUpSLP::getAddressSpaceOperand(Value *I) { 888 if (LoadInst *L = dyn_cast<LoadInst>(I)) 889 return L->getPointerAddressSpace(); 890 if (StoreInst *S = dyn_cast<StoreInst>(I)) 891 return S->getPointerAddressSpace(); 892 return -1; 893 } 894 895 bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { 896 Value *PtrA = getPointerOperand(A); 897 Value *PtrB = getPointerOperand(B); 898 unsigned ASA = getAddressSpaceOperand(A); 899 unsigned ASB = getAddressSpaceOperand(B); 900 901 // Check that the address spaces match and that the pointers are valid. 902 if (!PtrA || !PtrB || (ASA != ASB)) 903 return false; 904 905 // Check that A and B are of the same type. 906 if (PtrA->getType() != PtrB->getType()) 907 return false; 908 909 // Calculate the distance. 910 const SCEV *PtrSCEVA = SE->getSCEV(PtrA); 911 const SCEV *PtrSCEVB = SE->getSCEV(PtrB); 912 const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB); 913 const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV); 914 915 // Non constant distance. 916 if (!ConstOffSCEV) 917 return false; 918 919 int64_t Offset = ConstOffSCEV->getValue()->getSExtValue(); 920 Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); 921 // The Instructions are connsecutive if the size of the first load/store is 922 // the same as the offset. 923 int64_t Sz = DL->getTypeStoreSize(Ty); 924 return ((-Offset) == Sz); 925 } 926 927 Value *BoUpSLP::getSinkBarrier(Instruction *Src, Instruction *Dst) { 928 assert(Src->getParent() == Dst->getParent() && "Not the same BB"); 929 BasicBlock::iterator I = Src, E = Dst; 930 /// Scan all of the instruction from SRC to DST and check if 931 /// the source may alias. 932 for (++I; I != E; ++I) { 933 // Ignore store instructions that are marked as 'ignore'. 934 if (MemBarrierIgnoreList.count(I)) 935 continue; 936 if (Src->mayWriteToMemory()) /* Write */ { 937 if (!I->mayReadOrWriteMemory()) 938 continue; 939 } else /* Read */ { 940 if (!I->mayWriteToMemory()) 941 continue; 942 } 943 AliasAnalysis::Location A = getLocation(&*I); 944 AliasAnalysis::Location B = getLocation(Src); 945 946 if (!A.Ptr || !B.Ptr || AA->alias(A, B)) 947 return I; 948 } 949 return 0; 950 } 951 952 int BoUpSLP::getLastIndex(ArrayRef<Value *> VL) { 953 BasicBlock *BB = cast<Instruction>(VL[0])->getParent(); 954 assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid block"); 955 BlockNumbering &BN = BlocksNumbers[BB]; 956 957 int MaxIdx = BN.getIndex(BB->getFirstNonPHI()); 958 for (unsigned i = 0, e = VL.size(); i < e; ++i) 959 MaxIdx = std::max(MaxIdx, BN.getIndex(cast<Instruction>(VL[i]))); 960 return MaxIdx; 961 } 962 963 Instruction *BoUpSLP::getLastInstruction(ArrayRef<Value *> VL) { 964 BasicBlock *BB = cast<Instruction>(VL[0])->getParent(); 965 assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid block"); 966 BlockNumbering &BN = BlocksNumbers[BB]; 967 968 int MaxIdx = BN.getIndex(cast<Instruction>(VL[0])); 969 for (unsigned i = 1, e = VL.size(); i < e; ++i) 970 MaxIdx = std::max(MaxIdx, BN.getIndex(cast<Instruction>(VL[i]))); 971 Instruction *I = BN.getInstruction(MaxIdx); 972 assert(I && "bad location"); 973 return I; 974 } 975 976 Instruction *BoUpSLP::getInstructionForIndex(unsigned Index, BasicBlock *BB) { 977 BlockNumbering &BN = BlocksNumbers[BB]; 978 return BN.getInstruction(Index); 979 } 980 981 int BoUpSLP::getFirstUserIndex(ArrayRef<Value *> VL) { 982 BasicBlock *BB = getSameBlock(VL); 983 assert(BB && "All instructions must come from the same block"); 984 BlockNumbering &BN = BlocksNumbers[BB]; 985 986 // Find the first user of the values. 987 int FirstUser = BN.getIndex(BB->getTerminator()); 988 for (unsigned i = 0, e = VL.size(); i < e; ++i) { 989 for (Value::use_iterator U = VL[i]->use_begin(), UE = VL[i]->use_end(); 990 U != UE; ++U) { 991 Instruction *Instr = dyn_cast<Instruction>(*U); 992 993 if (!Instr || Instr->getParent() != BB) 994 continue; 995 996 FirstUser = std::min(FirstUser, BN.getIndex(Instr)); 997 } 998 } 999 return FirstUser; 1000 } 1001 1002 Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) { 1003 Value *Vec = UndefValue::get(Ty); 1004 // Generate the 'InsertElement' instruction. 1005 for (unsigned i = 0; i < Ty->getNumElements(); ++i) { 1006 Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); 1007 if (Instruction *I = dyn_cast<Instruction>(Vec)) 1008 GatherSeq.insert(I); 1009 } 1010 1011 return Vec; 1012 } 1013 1014 Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) { 1015 if (ScalarToTreeEntry.count(VL[0])) { 1016 int Idx = ScalarToTreeEntry[VL[0]]; 1017 TreeEntry *E = &VectorizableTree[Idx]; 1018 if (E->isSame(VL)) 1019 return vectorizeTree(E); 1020 } 1021 1022 Type *ScalarTy = VL[0]->getType(); 1023 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 1024 ScalarTy = SI->getValueOperand()->getType(); 1025 VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); 1026 1027 return Gather(VL, VecTy); 1028 } 1029 1030 Value *BoUpSLP::vectorizeTree(TreeEntry *E) { 1031 BuilderLocGuard Guard(Builder); 1032 1033 if (E->VectorizedValue) { 1034 DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n"); 1035 return E->VectorizedValue; 1036 } 1037 1038 Type *ScalarTy = E->Scalars[0]->getType(); 1039 if (StoreInst *SI = dyn_cast<StoreInst>(E->Scalars[0])) 1040 ScalarTy = SI->getValueOperand()->getType(); 1041 VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size()); 1042 1043 if (E->NeedToGather) { 1044 return Gather(E->Scalars, VecTy); 1045 } 1046 1047 Instruction *VL0 = cast<Instruction>(E->Scalars[0]); 1048 unsigned Opcode = VL0->getOpcode(); 1049 assert(Opcode == getSameOpcode(E->Scalars) && "Invalid opcode"); 1050 1051 switch (Opcode) { 1052 case Instruction::PHI: { 1053 PHINode *PH = dyn_cast<PHINode>(VL0); 1054 Builder.SetInsertPoint(PH->getParent()->getFirstInsertionPt()); 1055 PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues()); 1056 E->VectorizedValue = NewPhi; 1057 1058 for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) { 1059 ValueList Operands; 1060 BasicBlock *IBB = PH->getIncomingBlock(i); 1061 1062 // Prepare the operand vector. 1063 for (unsigned j = 0; j < E->Scalars.size(); ++j) 1064 Operands.push_back(cast<PHINode>(E->Scalars[j])-> 1065 getIncomingValueForBlock(IBB)); 1066 1067 Builder.SetInsertPoint(IBB->getTerminator()); 1068 Value *Vec = vectorizeTree(Operands); 1069 NewPhi->addIncoming(Vec, IBB); 1070 } 1071 1072 assert(NewPhi->getNumIncomingValues() == PH->getNumIncomingValues() && 1073 "Invalid number of incoming values"); 1074 return NewPhi; 1075 } 1076 1077 case Instruction::ExtractElement: { 1078 if (CanReuseExtract(E->Scalars)) { 1079 Value *V = VL0->getOperand(0); 1080 E->VectorizedValue = V; 1081 return V; 1082 } 1083 return Gather(E->Scalars, VecTy); 1084 } 1085 case Instruction::ZExt: 1086 case Instruction::SExt: 1087 case Instruction::FPToUI: 1088 case Instruction::FPToSI: 1089 case Instruction::FPExt: 1090 case Instruction::PtrToInt: 1091 case Instruction::IntToPtr: 1092 case Instruction::SIToFP: 1093 case Instruction::UIToFP: 1094 case Instruction::Trunc: 1095 case Instruction::FPTrunc: 1096 case Instruction::BitCast: { 1097 ValueList INVL; 1098 for (int i = 0, e = E->Scalars.size(); i < e; ++i) 1099 INVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0)); 1100 1101 Builder.SetInsertPoint(getLastInstruction(E->Scalars)); 1102 Value *InVec = vectorizeTree(INVL); 1103 CastInst *CI = dyn_cast<CastInst>(VL0); 1104 Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy); 1105 E->VectorizedValue = V; 1106 return V; 1107 } 1108 case Instruction::FCmp: 1109 case Instruction::ICmp: { 1110 ValueList LHSV, RHSV; 1111 for (int i = 0, e = E->Scalars.size(); i < e; ++i) { 1112 LHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0)); 1113 RHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1)); 1114 } 1115 1116 Builder.SetInsertPoint(getLastInstruction(E->Scalars)); 1117 Value *L = vectorizeTree(LHSV); 1118 Value *R = vectorizeTree(RHSV); 1119 Value *V; 1120 1121 CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate(); 1122 if (Opcode == Instruction::FCmp) 1123 V = Builder.CreateFCmp(P0, L, R); 1124 else 1125 V = Builder.CreateICmp(P0, L, R); 1126 1127 E->VectorizedValue = V; 1128 return V; 1129 } 1130 case Instruction::Select: { 1131 ValueList TrueVec, FalseVec, CondVec; 1132 for (int i = 0, e = E->Scalars.size(); i < e; ++i) { 1133 CondVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0)); 1134 TrueVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1)); 1135 FalseVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(2)); 1136 } 1137 1138 Builder.SetInsertPoint(getLastInstruction(E->Scalars)); 1139 Value *Cond = vectorizeTree(CondVec); 1140 Value *True = vectorizeTree(TrueVec); 1141 Value *False = vectorizeTree(FalseVec); 1142 Value *V = Builder.CreateSelect(Cond, True, False); 1143 E->VectorizedValue = V; 1144 return V; 1145 } 1146 case Instruction::Add: 1147 case Instruction::FAdd: 1148 case Instruction::Sub: 1149 case Instruction::FSub: 1150 case Instruction::Mul: 1151 case Instruction::FMul: 1152 case Instruction::UDiv: 1153 case Instruction::SDiv: 1154 case Instruction::FDiv: 1155 case Instruction::URem: 1156 case Instruction::SRem: 1157 case Instruction::FRem: 1158 case Instruction::Shl: 1159 case Instruction::LShr: 1160 case Instruction::AShr: 1161 case Instruction::And: 1162 case Instruction::Or: 1163 case Instruction::Xor: { 1164 ValueList LHSVL, RHSVL; 1165 for (int i = 0, e = E->Scalars.size(); i < e; ++i) { 1166 LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0)); 1167 RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1)); 1168 } 1169 1170 Builder.SetInsertPoint(getLastInstruction(E->Scalars)); 1171 Value *LHS = vectorizeTree(LHSVL); 1172 Value *RHS = vectorizeTree(RHSVL); 1173 1174 if (LHS == RHS && isa<Instruction>(LHS)) { 1175 assert((VL0->getOperand(0) == VL0->getOperand(1)) && "Invalid order"); 1176 } 1177 1178 BinaryOperator *BinOp = cast<BinaryOperator>(VL0); 1179 Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS); 1180 E->VectorizedValue = V; 1181 return V; 1182 } 1183 case Instruction::Load: { 1184 // Loads are inserted at the head of the tree because we don't want to 1185 // sink them all the way down past store instructions. 1186 Builder.SetInsertPoint(getLastInstruction(E->Scalars)); 1187 LoadInst *LI = cast<LoadInst>(VL0); 1188 Value *VecPtr = 1189 Builder.CreateBitCast(LI->getPointerOperand(), VecTy->getPointerTo()); 1190 unsigned Alignment = LI->getAlignment(); 1191 LI = Builder.CreateLoad(VecPtr); 1192 LI->setAlignment(Alignment); 1193 E->VectorizedValue = LI; 1194 return LI; 1195 } 1196 case Instruction::Store: { 1197 StoreInst *SI = cast<StoreInst>(VL0); 1198 unsigned Alignment = SI->getAlignment(); 1199 1200 ValueList ValueOp; 1201 for (int i = 0, e = E->Scalars.size(); i < e; ++i) 1202 ValueOp.push_back(cast<StoreInst>(E->Scalars[i])->getValueOperand()); 1203 1204 Builder.SetInsertPoint(getLastInstruction(E->Scalars)); 1205 Value *VecValue = vectorizeTree(ValueOp); 1206 Value *VecPtr = 1207 Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo()); 1208 StoreInst *S = Builder.CreateStore(VecValue, VecPtr); 1209 S->setAlignment(Alignment); 1210 E->VectorizedValue = S; 1211 return S; 1212 } 1213 default: 1214 llvm_unreachable("unknown inst"); 1215 } 1216 return 0; 1217 } 1218 1219 void BoUpSLP::vectorizeTree() { 1220 vectorizeTree(&VectorizableTree[0]); 1221 1222 // For each vectorized value: 1223 for (int EIdx = 0, EE = VectorizableTree.size(); EIdx < EE; ++EIdx) { 1224 TreeEntry *Entry = &VectorizableTree[EIdx]; 1225 1226 // For each lane: 1227 for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { 1228 Value *Scalar = Entry->Scalars[Lane]; 1229 1230 // No need to handle users of gathered values. 1231 if (Entry->NeedToGather) 1232 continue; 1233 1234 Value *Vec = Entry->VectorizedValue; 1235 assert(Vec && "Can't find vectorizable value"); 1236 1237 SmallVector<User*, 16> Users(Scalar->use_begin(), Scalar->use_end()); 1238 1239 for (SmallVector<User*, 16>::iterator User = Users.begin(), 1240 UE = Users.end(); User != UE; ++User) { 1241 DEBUG(dbgs() << "SLP: \tupdating user " << **User << ".\n"); 1242 1243 bool Gathered = MustGather.count(*User); 1244 1245 // Skip in-tree scalars that become vectors. 1246 if (ScalarToTreeEntry.count(*User) && !Gathered) { 1247 DEBUG(dbgs() << "SLP: \tUser will be removed soon:" << 1248 **User << ".\n"); 1249 int Idx = ScalarToTreeEntry[*User]; (void) Idx; 1250 assert(!VectorizableTree[Idx].NeedToGather && "bad state ?"); 1251 continue; 1252 } 1253 1254 if (!isa<Instruction>(*User)) 1255 continue; 1256 1257 // Generate extracts for out-of-tree users. 1258 // Find the insertion point for the extractelement lane. 1259 Instruction *Loc = 0; 1260 if (PHINode *PN = dyn_cast<PHINode>(Vec)) { 1261 Loc = PN->getParent()->getFirstInsertionPt(); 1262 } else if (Instruction *Iv = dyn_cast<Instruction>(Vec)){ 1263 Loc = ++((BasicBlock::iterator)*Iv); 1264 } else { 1265 Loc = F->getEntryBlock().begin(); 1266 } 1267 1268 Builder.SetInsertPoint(Loc); 1269 Value *Ex = Builder.CreateExtractElement(Vec, Builder.getInt32(Lane)); 1270 (*User)->replaceUsesOfWith(Scalar, Ex); 1271 DEBUG(dbgs() << "SLP: \tupdated user:" << **User << ".\n"); 1272 } 1273 1274 Type *Ty = Scalar->getType(); 1275 if (!Ty->isVoidTy()) { 1276 for (Value::use_iterator User = Scalar->use_begin(), UE = Scalar->use_end(); 1277 User != UE; ++User) { 1278 DEBUG(dbgs() << "SLP: \tvalidating user:" << **User << ".\n"); 1279 assert(!MustGather.count(*User) && 1280 "Replacing gathered value with undef"); 1281 assert(ScalarToTreeEntry.count(*User) && 1282 "Replacing out-of-tree value with undef"); 1283 } 1284 Value *Undef = UndefValue::get(Ty); 1285 Scalar->replaceAllUsesWith(Undef); 1286 } 1287 DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n"); 1288 cast<Instruction>(Scalar)->eraseFromParent(); 1289 } 1290 } 1291 1292 for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) { 1293 BlocksNumbers[it].forget(); 1294 } 1295 Builder.ClearInsertionPoint(); 1296 } 1297 1298 void BoUpSLP::optimizeGatherSequence() { 1299 DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size() 1300 << " gather sequences instructions.\n"); 1301 // LICM InsertElementInst sequences. 1302 for (SetVector<Instruction *>::iterator it = GatherSeq.begin(), 1303 e = GatherSeq.end(); it != e; ++it) { 1304 InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it); 1305 1306 if (!Insert) 1307 continue; 1308 1309 // Check if this block is inside a loop. 1310 Loop *L = LI->getLoopFor(Insert->getParent()); 1311 if (!L) 1312 continue; 1313 1314 // Check if it has a preheader. 1315 BasicBlock *PreHeader = L->getLoopPreheader(); 1316 if (!PreHeader) 1317 continue; 1318 1319 // If the vector or the element that we insert into it are 1320 // instructions that are defined in this basic block then we can't 1321 // hoist this instruction. 1322 Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0)); 1323 Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1)); 1324 if (CurrVec && L->contains(CurrVec)) 1325 continue; 1326 if (NewElem && L->contains(NewElem)) 1327 continue; 1328 1329 // We can hoist this instruction. Move it to the pre-header. 1330 Insert->moveBefore(PreHeader->getTerminator()); 1331 } 1332 1333 // Perform O(N^2) search over the gather sequences and merge identical 1334 // instructions. TODO: We can further optimize this scan if we split the 1335 // instructions into different buckets based on the insert lane. 1336 SmallPtrSet<Instruction*, 16> Visited; 1337 SmallVector<Instruction*, 16> ToRemove; 1338 ReversePostOrderTraversal<Function*> RPOT(F); 1339 for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(), 1340 E = RPOT.end(); I != E; ++I) { 1341 BasicBlock *BB = *I; 1342 // For all instructions in the function: 1343 for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { 1344 InsertElementInst *Insert = dyn_cast<InsertElementInst>(it); 1345 if (!Insert || !GatherSeq.count(Insert)) 1346 continue; 1347 1348 // Check if we can replace this instruction with any of the 1349 // visited instructions. 1350 for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(), 1351 ve = Visited.end(); v != ve; ++v) { 1352 if (Insert->isIdenticalTo(*v) && 1353 DT->dominates((*v)->getParent(), Insert->getParent())) { 1354 Insert->replaceAllUsesWith(*v); 1355 ToRemove.push_back(Insert); 1356 Insert = 0; 1357 break; 1358 } 1359 } 1360 if (Insert) 1361 Visited.insert(Insert); 1362 } 1363 } 1364 1365 // Erase all of the instructions that we RAUWed. 1366 for (SmallVectorImpl<Instruction *>::iterator v = ToRemove.begin(), 1367 ve = ToRemove.end(); v != ve; ++v) { 1368 assert((*v)->getNumUses() == 0 && "Can't remove instructions with uses"); 1369 (*v)->eraseFromParent(); 1370 } 1371 } 1372 1373 /// The SLPVectorizer Pass. 1374 struct SLPVectorizer : public FunctionPass { 1375 typedef SmallVector<StoreInst *, 8> StoreList; 1376 typedef MapVector<Value *, StoreList> StoreListMap; 1377 1378 /// Pass identification, replacement for typeid 1379 static char ID; 1380 1381 explicit SLPVectorizer() : FunctionPass(ID) { 1382 initializeSLPVectorizerPass(*PassRegistry::getPassRegistry()); 1383 } 1384 1385 ScalarEvolution *SE; 1386 DataLayout *DL; 1387 TargetTransformInfo *TTI; 1388 AliasAnalysis *AA; 1389 LoopInfo *LI; 1390 DominatorTree *DT; 1391 1392 virtual bool runOnFunction(Function &F) { 1393 SE = &getAnalysis<ScalarEvolution>(); 1394 DL = getAnalysisIfAvailable<DataLayout>(); 1395 TTI = &getAnalysis<TargetTransformInfo>(); 1396 AA = &getAnalysis<AliasAnalysis>(); 1397 LI = &getAnalysis<LoopInfo>(); 1398 DT = &getAnalysis<DominatorTree>(); 1399 1400 StoreRefs.clear(); 1401 bool Changed = false; 1402 1403 // Must have DataLayout. We can't require it because some tests run w/o 1404 // triple. 1405 if (!DL) 1406 return false; 1407 1408 DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n"); 1409 1410 // Use the bollom up slp vectorizer to construct chains that start with 1411 // he store instructions. 1412 BoUpSLP R(&F, SE, DL, TTI, AA, LI, DT); 1413 1414 // Scan the blocks in the function in post order. 1415 for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()), 1416 e = po_end(&F.getEntryBlock()); it != e; ++it) { 1417 BasicBlock *BB = *it; 1418 1419 // Vectorize trees that end at reductions. 1420 Changed |= vectorizeChainsInBlock(BB, R); 1421 1422 // Vectorize trees that end at stores. 1423 if (unsigned count = collectStores(BB, R)) { 1424 (void)count; 1425 DEBUG(dbgs() << "SLP: Found " << count << " stores to vectorize.\n"); 1426 Changed |= vectorizeStoreChains(R); 1427 } 1428 } 1429 1430 if (Changed) { 1431 R.optimizeGatherSequence(); 1432 DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n"); 1433 DEBUG(verifyFunction(F)); 1434 } 1435 return Changed; 1436 } 1437 1438 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 1439 FunctionPass::getAnalysisUsage(AU); 1440 AU.addRequired<ScalarEvolution>(); 1441 AU.addRequired<AliasAnalysis>(); 1442 AU.addRequired<TargetTransformInfo>(); 1443 AU.addRequired<LoopInfo>(); 1444 AU.addRequired<DominatorTree>(); 1445 AU.addPreserved<LoopInfo>(); 1446 AU.addPreserved<DominatorTree>(); 1447 AU.setPreservesCFG(); 1448 } 1449 1450 private: 1451 1452 /// \brief Collect memory references and sort them according to their base 1453 /// object. We sort the stores to their base objects to reduce the cost of the 1454 /// quadratic search on the stores. TODO: We can further reduce this cost 1455 /// if we flush the chain creation every time we run into a memory barrier. 1456 unsigned collectStores(BasicBlock *BB, BoUpSLP &R); 1457 1458 /// \brief Try to vectorize a chain that starts at two arithmetic instrs. 1459 bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R); 1460 1461 /// \brief Try to vectorize a list of operands. If \p NeedExtracts is true 1462 /// then we calculate the cost of extracting the scalars from the vector. 1463 /// \returns true if a value was vectorized. 1464 bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, bool NeedExtracts); 1465 1466 /// \brief Try to vectorize a chain that may start at the operands of \V; 1467 bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); 1468 1469 /// \brief Vectorize the stores that were collected in StoreRefs. 1470 bool vectorizeStoreChains(BoUpSLP &R); 1471 1472 /// \brief Scan the basic block and look for patterns that are likely to start 1473 /// a vectorization chain. 1474 bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R); 1475 1476 bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold, 1477 BoUpSLP &R); 1478 1479 bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold, 1480 BoUpSLP &R); 1481 private: 1482 StoreListMap StoreRefs; 1483 }; 1484 1485 bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain, 1486 int CostThreshold, BoUpSLP &R) { 1487 unsigned ChainLen = Chain.size(); 1488 DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen 1489 << "\n"); 1490 Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType(); 1491 unsigned Sz = DL->getTypeSizeInBits(StoreTy); 1492 unsigned VF = MinVecRegSize / Sz; 1493 1494 if (!isPowerOf2_32(Sz) || VF < 2) 1495 return false; 1496 1497 bool Changed = false; 1498 // Look for profitable vectorizable trees at all offsets, starting at zero. 1499 for (unsigned i = 0, e = ChainLen; i < e; ++i) { 1500 if (i + VF > e) 1501 break; 1502 DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i 1503 << "\n"); 1504 ArrayRef<Value *> Operands = Chain.slice(i, VF); 1505 1506 R.buildTree(Operands); 1507 1508 int Cost = R.getTreeCost(); 1509 1510 DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); 1511 if (Cost < CostThreshold) { 1512 DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); 1513 R.vectorizeTree(); 1514 1515 // Move to the next bundle. 1516 i += VF - 1; 1517 Changed = true; 1518 } 1519 } 1520 1521 if (Changed || ChainLen > VF) 1522 return Changed; 1523 1524 // Handle short chains. This helps us catch types such as <3 x float> that 1525 // are smaller than vector size. 1526 R.buildTree(Chain); 1527 1528 int Cost = R.getTreeCost(); 1529 1530 if (Cost < CostThreshold) { 1531 DEBUG(dbgs() << "SLP: Found store chain cost = " << Cost 1532 << " for size = " << ChainLen << "\n"); 1533 R.vectorizeTree(); 1534 return true; 1535 } 1536 1537 return false; 1538 } 1539 1540 bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores, 1541 int costThreshold, BoUpSLP &R) { 1542 SetVector<Value *> Heads, Tails; 1543 SmallDenseMap<Value *, Value *> ConsecutiveChain; 1544 1545 // We may run into multiple chains that merge into a single chain. We mark the 1546 // stores that we vectorized so that we don't visit the same store twice. 1547 BoUpSLP::ValueSet VectorizedStores; 1548 bool Changed = false; 1549 1550 // Do a quadratic search on all of the given stores and find 1551 // all of the pairs of loads that follow each other. 1552 for (unsigned i = 0, e = Stores.size(); i < e; ++i) 1553 for (unsigned j = 0; j < e; ++j) { 1554 if (i == j) 1555 continue; 1556 1557 if (R.isConsecutiveAccess(Stores[i], Stores[j])) { 1558 Tails.insert(Stores[j]); 1559 Heads.insert(Stores[i]); 1560 ConsecutiveChain[Stores[i]] = Stores[j]; 1561 } 1562 } 1563 1564 // For stores that start but don't end a link in the chain: 1565 for (SetVector<Value *>::iterator it = Heads.begin(), e = Heads.end(); 1566 it != e; ++it) { 1567 if (Tails.count(*it)) 1568 continue; 1569 1570 // We found a store instr that starts a chain. Now follow the chain and try 1571 // to vectorize it. 1572 BoUpSLP::ValueList Operands; 1573 Value *I = *it; 1574 // Collect the chain into a list. 1575 while (Tails.count(I) || Heads.count(I)) { 1576 if (VectorizedStores.count(I)) 1577 break; 1578 Operands.push_back(I); 1579 // Move to the next value in the chain. 1580 I = ConsecutiveChain[I]; 1581 } 1582 1583 bool Vectorized = vectorizeStoreChain(Operands, costThreshold, R); 1584 1585 // Mark the vectorized stores so that we don't vectorize them again. 1586 if (Vectorized) 1587 VectorizedStores.insert(Operands.begin(), Operands.end()); 1588 Changed |= Vectorized; 1589 } 1590 1591 return Changed; 1592 } 1593 1594 1595 unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) { 1596 unsigned count = 0; 1597 StoreRefs.clear(); 1598 for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { 1599 StoreInst *SI = dyn_cast<StoreInst>(it); 1600 if (!SI) 1601 continue; 1602 1603 // Check that the pointer points to scalars. 1604 Type *Ty = SI->getValueOperand()->getType(); 1605 if (Ty->isAggregateType() || Ty->isVectorTy()) 1606 return 0; 1607 1608 // Find the base of the GEP. 1609 Value *Ptr = SI->getPointerOperand(); 1610 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) 1611 Ptr = GEP->getPointerOperand(); 1612 1613 // Save the store locations. 1614 StoreRefs[Ptr].push_back(SI); 1615 count++; 1616 } 1617 return count; 1618 } 1619 1620 bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { 1621 if (!A || !B) 1622 return false; 1623 Value *VL[] = { A, B }; 1624 return tryToVectorizeList(VL, R, true); 1625 } 1626 1627 bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, 1628 bool NeedExtracts) { 1629 if (VL.size() < 2) 1630 return false; 1631 1632 DEBUG(dbgs() << "SLP: Vectorizing a list of length = " << VL.size() << ".\n"); 1633 1634 // Check that all of the parts are scalar instructions of the same type. 1635 Instruction *I0 = dyn_cast<Instruction>(VL[0]); 1636 if (!I0) 1637 return 0; 1638 1639 unsigned Opcode0 = I0->getOpcode(); 1640 1641 for (int i = 0, e = VL.size(); i < e; ++i) { 1642 Type *Ty = VL[i]->getType(); 1643 if (Ty->isAggregateType() || Ty->isVectorTy()) 1644 return 0; 1645 Instruction *Inst = dyn_cast<Instruction>(VL[i]); 1646 if (!Inst || Inst->getOpcode() != Opcode0) 1647 return 0; 1648 } 1649 1650 R.buildTree(VL); 1651 int Cost = R.getTreeCost(); 1652 1653 int ExtrCost = NeedExtracts ? R.getGatherCost(VL) : 0; 1654 DEBUG(dbgs() << "SLP: Cost of pair:" << Cost 1655 << " Cost of extract:" << ExtrCost << ".\n"); 1656 if ((Cost + ExtrCost) >= -SLPCostThreshold) 1657 return false; 1658 DEBUG(dbgs() << "SLP: Vectorizing pair.\n"); 1659 R.vectorizeTree(); 1660 return true; 1661 } 1662 1663 bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { 1664 if (!V) 1665 return false; 1666 1667 // Try to vectorize V. 1668 if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R)) 1669 return true; 1670 1671 BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0)); 1672 BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1)); 1673 // Try to skip B. 1674 if (B && B->hasOneUse()) { 1675 BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0)); 1676 BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1)); 1677 if (tryToVectorizePair(A, B0, R)) { 1678 B->moveBefore(V); 1679 return true; 1680 } 1681 if (tryToVectorizePair(A, B1, R)) { 1682 B->moveBefore(V); 1683 return true; 1684 } 1685 } 1686 1687 // Try to skip A. 1688 if (A && A->hasOneUse()) { 1689 BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0)); 1690 BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1)); 1691 if (tryToVectorizePair(A0, B, R)) { 1692 A->moveBefore(V); 1693 return true; 1694 } 1695 if (tryToVectorizePair(A1, B, R)) { 1696 A->moveBefore(V); 1697 return true; 1698 } 1699 } 1700 return 0; 1701 } 1702 1703 bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { 1704 bool Changed = false; 1705 for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { 1706 if (isa<DbgInfoIntrinsic>(it)) 1707 continue; 1708 1709 // Try to vectorize reductions that use PHINodes. 1710 if (PHINode *P = dyn_cast<PHINode>(it)) { 1711 // Check that the PHI is a reduction PHI. 1712 if (P->getNumIncomingValues() != 2) 1713 return Changed; 1714 Value *Rdx = 1715 (P->getIncomingBlock(0) == BB 1716 ? (P->getIncomingValue(0)) 1717 : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) : 0)); 1718 // Check if this is a Binary Operator. 1719 BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx); 1720 if (!BI) 1721 continue; 1722 1723 Value *Inst = BI->getOperand(0); 1724 if (Inst == P) 1725 Inst = BI->getOperand(1); 1726 1727 Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R); 1728 continue; 1729 } 1730 1731 // Try to vectorize trees that start at compare instructions. 1732 if (CmpInst *CI = dyn_cast<CmpInst>(it)) { 1733 if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) { 1734 Changed |= true; 1735 continue; 1736 } 1737 for (int i = 0; i < 2; ++i) 1738 if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) 1739 Changed |= 1740 tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R); 1741 continue; 1742 } 1743 } 1744 1745 // Scan the PHINodes in our successors in search for pairing hints. 1746 for (succ_iterator it = succ_begin(BB), e = succ_end(BB); it != e; ++it) { 1747 BasicBlock *Succ = *it; 1748 SmallVector<Value *, 4> Incoming; 1749 1750 // Collect the incoming values from the PHIs. 1751 for (BasicBlock::iterator instr = Succ->begin(), ie = Succ->end(); 1752 instr != ie; ++instr) { 1753 PHINode *P = dyn_cast<PHINode>(instr); 1754 1755 if (!P) 1756 break; 1757 1758 Value *V = P->getIncomingValueForBlock(BB); 1759 if (Instruction *I = dyn_cast<Instruction>(V)) 1760 if (I->getParent() == BB) 1761 Incoming.push_back(I); 1762 } 1763 1764 if (Incoming.size() > 1) 1765 Changed |= tryToVectorizeList(Incoming, R, true); 1766 } 1767 1768 return Changed; 1769 } 1770 1771 bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) { 1772 bool Changed = false; 1773 // Attempt to sort and vectorize each of the store-groups. 1774 for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end(); 1775 it != e; ++it) { 1776 if (it->second.size() < 2) 1777 continue; 1778 1779 DEBUG(dbgs() << "SLP: Analyzing a store chain of length " 1780 << it->second.size() << ".\n"); 1781 1782 Changed |= vectorizeStores(it->second, -SLPCostThreshold, R); 1783 } 1784 return Changed; 1785 } 1786 1787 } // end anonymous namespace 1788 1789 char SLPVectorizer::ID = 0; 1790 static const char lv_name[] = "SLP Vectorizer"; 1791 INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false) 1792 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 1793 INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) 1794 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 1795 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 1796 INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false) 1797 1798 namespace llvm { 1799 Pass *createSLPVectorizerPass() { return new SLPVectorizer(); } 1800 } 1801