1 //===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // This pass implements the Bottom Up SLP vectorizer. It detects consecutive 10 // stores that can be put together into vector-stores. Next, it attempts to 11 // construct vectorizable tree using the use-def chains. If a profitable tree 12 // was found, the SLP vectorizer performs vectorization on the tree. 13 // 14 // The pass is inspired by the work described in the paper: 15 // "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks. 16 // 17 //===----------------------------------------------------------------------===// 18 #define SV_NAME "slp-vectorizer" 19 #define DEBUG_TYPE SV_NAME 20 21 #include "VecUtils.h" 22 #include "llvm/Transforms/Vectorize.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/Analysis/ScalarEvolution.h" 25 #include "llvm/Analysis/TargetTransformInfo.h" 26 #include "llvm/Analysis/Verifier.h" 27 #include "llvm/Analysis/LoopInfo.h" 28 #include "llvm/IR/DataLayout.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/IntrinsicInst.h" 31 #include "llvm/IR/Module.h" 32 #include "llvm/IR/Type.h" 33 #include "llvm/IR/Value.h" 34 #include "llvm/Pass.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include <map> 39 40 using namespace llvm; 41 42 static cl::opt<int> 43 SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, 44 cl::desc("Only vectorize trees if the gain is above this " 45 "number. (gain = -cost of vectorization)")); 46 namespace { 47 48 /// The SLPVectorizer Pass. 49 struct SLPVectorizer : public FunctionPass { 50 typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap; 51 52 /// Pass identification, replacement for typeid 53 static char ID; 54 55 explicit SLPVectorizer() : FunctionPass(ID) { 56 initializeSLPVectorizerPass(*PassRegistry::getPassRegistry()); 57 } 58 59 ScalarEvolution *SE; 60 DataLayout *DL; 61 TargetTransformInfo *TTI; 62 AliasAnalysis *AA; 63 LoopInfo *LI; 64 65 virtual bool runOnFunction(Function &F) { 66 SE = &getAnalysis<ScalarEvolution>(); 67 DL = getAnalysisIfAvailable<DataLayout>(); 68 TTI = &getAnalysis<TargetTransformInfo>(); 69 AA = &getAnalysis<AliasAnalysis>(); 70 LI = &getAnalysis<LoopInfo>(); 71 72 StoreRefs.clear(); 73 bool Changed = false; 74 75 // Must have DataLayout. We can't require it because some tests run w/o 76 // triple. 77 if (!DL) 78 return false; 79 80 for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) { 81 BasicBlock *BB = it; 82 bool BBChanged = false; 83 84 // Use the bollom up slp vectorizer to construct chains that start with 85 // he store instructions. 86 BoUpSLP R(BB, SE, DL, TTI, AA, LI->getLoopFor(BB)); 87 88 // Vectorize trees that end at reductions. 89 BBChanged |= vectorizeReductions(BB, R); 90 91 // Vectorize trees that end at stores. 92 if (unsigned count = collectStores(BB, R)) { 93 (void)count; 94 DEBUG(dbgs()<<"SLP: Found " << count << " stores to vectorize.\n"); 95 BBChanged |= vectorizeStoreChains(R); 96 } 97 98 // Try to hoist some of the scalarization code to the preheader. 99 if (BBChanged) hoistGatherSequence(LI, BB, R); 100 101 Changed |= BBChanged; 102 } 103 104 if (Changed) { 105 DEBUG(dbgs()<<"SLP: vectorized \""<<F.getName()<<"\"\n"); 106 DEBUG(verifyFunction(F)); 107 } 108 return Changed; 109 } 110 111 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 112 FunctionPass::getAnalysisUsage(AU); 113 AU.addRequired<ScalarEvolution>(); 114 AU.addRequired<AliasAnalysis>(); 115 AU.addRequired<TargetTransformInfo>(); 116 AU.addRequired<LoopInfo>(); 117 } 118 119 private: 120 121 /// \brief Collect memory references and sort them according to their base 122 /// object. We sort the stores to their base objects to reduce the cost of the 123 /// quadratic search on the stores. TODO: We can further reduce this cost 124 /// if we flush the chain creation every time we run into a memory barrier. 125 unsigned collectStores(BasicBlock *BB, BoUpSLP &R); 126 127 /// \brief Try to vectorize a chain that starts at two arithmetic instrs. 128 bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R); 129 130 /// \brief Try to vectorize a list of operands. 131 bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R); 132 133 /// \brief Try to vectorize a chain that may start at the operands of \V; 134 bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); 135 136 /// \brief Vectorize the stores that were collected in StoreRefs. 137 bool vectorizeStoreChains(BoUpSLP &R); 138 139 /// \brief Try to hoist gather sequences outside of the loop in cases where 140 /// all of the sources are loop invariant. 141 void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R); 142 143 /// \brief Scan the basic block and look for reductions that may start a 144 /// vectorization chain. 145 bool vectorizeReductions(BasicBlock *BB, BoUpSLP &R); 146 147 private: 148 StoreListMap StoreRefs; 149 }; 150 151 unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) { 152 unsigned count = 0; 153 StoreRefs.clear(); 154 for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { 155 StoreInst *SI = dyn_cast<StoreInst>(it); 156 if (!SI) 157 continue; 158 159 // Check that the pointer points to scalars. 160 Type *Ty = SI->getValueOperand()->getType(); 161 if (Ty->isAggregateType() || Ty->isVectorTy()) 162 return 0; 163 164 // Find the base of the GEP. 165 Value *Ptr = SI->getPointerOperand(); 166 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) 167 Ptr = GEP->getPointerOperand(); 168 169 // Save the store locations. 170 StoreRefs[Ptr].push_back(SI); 171 count++; 172 } 173 return count; 174 } 175 176 bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { 177 if (!A || !B) return false; 178 Value *VL[] = { A, B }; 179 return tryToVectorizeList(VL, R); 180 } 181 182 bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) { 183 DEBUG(dbgs()<<"SLP: Vectorizing a list of length = " << VL.size() << ".\n"); 184 185 // Check that all of the parts are scalar. 186 for (int i = 0, e = VL.size(); i < e; ++i) { 187 Type *Ty = VL[i]->getType(); 188 if (Ty->isAggregateType() || Ty->isVectorTy()) 189 return 0; 190 } 191 192 int Cost = R.getTreeCost(VL); 193 int ExtrCost = R.getScalarizationCost(VL); 194 DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost << 195 " Cost of extract:" << ExtrCost << ".\n"); 196 if ((Cost+ExtrCost) >= -SLPCostThreshold) return false; 197 DEBUG(dbgs()<<"SLP: Vectorizing pair.\n"); 198 R.vectorizeArith(VL); 199 return true; 200 } 201 202 bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { 203 if (!V) return false; 204 // Try to vectorize V. 205 if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R)) 206 return true; 207 208 BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0)); 209 BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1)); 210 // Try to skip B. 211 if (B && B->hasOneUse()) { 212 BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0)); 213 BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1)); 214 if (tryToVectorizePair(A, B0, R)) { 215 B->moveBefore(V); 216 return true; 217 } 218 if (tryToVectorizePair(A, B1, R)) { 219 B->moveBefore(V); 220 return true; 221 } 222 } 223 224 // Try to skip A. 225 if (A && A->hasOneUse()) { 226 BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0)); 227 BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1)); 228 if (tryToVectorizePair(A0, B, R)) { 229 A->moveBefore(V); 230 return true; 231 } 232 if (tryToVectorizePair(A1, B, R)) { 233 A->moveBefore(V); 234 return true; 235 } 236 } 237 return 0; 238 } 239 240 bool SLPVectorizer::vectorizeReductions(BasicBlock *BB, BoUpSLP &R) { 241 bool Changed = false; 242 for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { 243 if (isa<DbgInfoIntrinsic>(it)) continue; 244 245 // Try to vectorize reductions that use PHINodes. 246 if (PHINode *P = dyn_cast<PHINode>(it)) { 247 // Check that the PHI is a reduction PHI. 248 if (P->getNumIncomingValues() != 2) return Changed; 249 Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) : 250 (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) : 251 0)); 252 // Check if this is a Binary Operator. 253 BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx); 254 if (!BI) 255 continue; 256 257 Value *Inst = BI->getOperand(0); 258 if (Inst == P) Inst = BI->getOperand(1); 259 Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R); 260 continue; 261 } 262 263 // Try to vectorize trees that start at compare instructions. 264 if (CmpInst *CI = dyn_cast<CmpInst>(it)) { 265 if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) { 266 Changed |= true; 267 continue; 268 } 269 for (int i = 0; i < 2; ++i) 270 if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) 271 Changed |= tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R); 272 continue; 273 } 274 } 275 276 return Changed; 277 } 278 279 bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) { 280 bool Changed = false; 281 // Attempt to sort and vectorize each of the store-groups. 282 for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end(); 283 it != e; ++it) { 284 if (it->second.size() < 2) 285 continue; 286 287 DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " << 288 it->second.size() << ".\n"); 289 290 Changed |= R.vectorizeStores(it->second, -SLPCostThreshold); 291 } 292 return Changed; 293 } 294 295 void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, 296 BoUpSLP &R) { 297 // Check if this block is inside a loop. 298 Loop *L = LI->getLoopFor(BB); 299 if (!L) 300 return; 301 302 // Check if it has a preheader. 303 BasicBlock *PreHeader = L->getLoopPreheader(); 304 if (!PreHeader) 305 return; 306 307 // Mark the insertion point for the block. 308 Instruction *Location = PreHeader->getTerminator(); 309 310 BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions(); 311 for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end(); 312 it != e; ++it) { 313 InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it); 314 315 // The InsertElement sequence can be simplified into a constant. 316 if (!Insert) 317 continue; 318 319 // If the vector or the element that we insert into it are 320 // instructions that are defined in this basic block then we can't 321 // hoist this instruction. 322 Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0)); 323 Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1)); 324 if (CurrVec && L->contains(CurrVec)) continue; 325 if (NewElem && L->contains(NewElem)) continue; 326 327 // We can hoist this instruction. Move it to the pre-header. 328 Insert->moveBefore(Location); 329 } 330 } 331 332 } // end anonymous namespace 333 334 char SLPVectorizer::ID = 0; 335 static const char lv_name[] = "SLP Vectorizer"; 336 INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false) 337 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 338 INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) 339 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 340 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 341 INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false) 342 343 namespace llvm { 344 Pass *createSLPVectorizerPass() { 345 return new SLPVectorizer(); 346 } 347 } 348 349