1 //===-- StraightLineStrengthReduce.cpp - ------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements straight-line strength reduction (SLSR). Unlike loop 11 // strength reduction, this algorithm is designed to reduce arithmetic 12 // redundancy in straight-line code instead of loops. It has proven to be 13 // effective in simplifying arithmetic statements derived from an unrolled loop. 14 // It can also simplify the logic of SeparateConstOffsetFromGEP. 15 // 16 // There are many optimizations we can perform in the domain of SLSR. This file 17 // for now contains only an initial step. Specifically, we look for strength 18 // reduction candidate in the form of 19 // 20 // (B + i) * S 21 // 22 // where B and S are integer constants or variables, and i is a constant 23 // integer. If we found two such candidates 24 // 25 // S1: X = (B + i) * S S2: Y = (B + i') * S 26 // 27 // and S1 dominates S2, we call S1 a basis of S2, and can replace S2 with 28 // 29 // Y = X + (i' - i) * S 30 // 31 // where (i' - i) * S is folded to the extent possible. When S2 has multiple 32 // bases, we pick the one that is closest to S2, or S2's "immediate" basis. 33 // 34 // TODO: 35 // 36 // - Handle candidates in the form of B + i * S 37 // 38 // - Handle candidates in the form of pointer arithmetics. e.g., B[i * S] 39 // 40 // - Floating point arithmetics when fast math is enabled. 41 // 42 // - SLSR may decrease ILP at the architecture level. Targets that are very 43 // sensitive to ILP may want to disable it. Having SLSR to consider ILP is 44 // left as future work. 45 #include <vector> 46 47 #include "llvm/ADT/DenseSet.h" 48 #include "llvm/IR/Dominators.h" 49 #include "llvm/IR/IRBuilder.h" 50 #include "llvm/IR/Module.h" 51 #include "llvm/IR/PatternMatch.h" 52 #include "llvm/Support/raw_ostream.h" 53 #include "llvm/Transforms/Scalar.h" 54 55 using namespace llvm; 56 using namespace PatternMatch; 57 58 namespace { 59 60 class StraightLineStrengthReduce : public FunctionPass { 61 public: 62 // SLSR candidate. Such a candidate must be in the form of 63 // (Base + Index) * Stride 64 struct Candidate : public ilist_node<Candidate> { 65 Candidate(Value *B = nullptr, ConstantInt *Idx = nullptr, 66 Value *S = nullptr, Instruction *I = nullptr) 67 : Base(B), Index(Idx), Stride(S), Ins(I), Basis(nullptr) {} 68 Value *Base; 69 ConstantInt *Index; 70 Value *Stride; 71 // The instruction this candidate corresponds to. It helps us to rewrite a 72 // candidate with respect to its immediate basis. Note that one instruction 73 // can corresponds to multiple candidates depending on how you associate the 74 // expression. For instance, 75 // 76 // (a + 1) * (b + 2) 77 // 78 // can be treated as 79 // 80 // <Base: a, Index: 1, Stride: b + 2> 81 // 82 // or 83 // 84 // <Base: b, Index: 2, Stride: a + 1> 85 Instruction *Ins; 86 // Points to the immediate basis of this candidate, or nullptr if we cannot 87 // find any basis for this candidate. 88 Candidate *Basis; 89 }; 90 91 static char ID; 92 93 StraightLineStrengthReduce() : FunctionPass(ID), DT(nullptr) { 94 initializeStraightLineStrengthReducePass(*PassRegistry::getPassRegistry()); 95 } 96 97 void getAnalysisUsage(AnalysisUsage &AU) const override { 98 AU.addRequired<DominatorTreeWrapperPass>(); 99 // We do not modify the shape of the CFG. 100 AU.setPreservesCFG(); 101 } 102 103 bool runOnFunction(Function &F) override; 104 105 private: 106 // Returns true if Basis is a basis for C, i.e., Basis dominates C and they 107 // share the same base and stride. 108 bool isBasisFor(const Candidate &Basis, const Candidate &C); 109 // Checks whether I is in a candidate form. If so, adds all the matching forms 110 // to Candidates, and tries to find the immediate basis for each of them. 111 void allocateCandidateAndFindBasis(Instruction *I); 112 // Given that I is in the form of "(B + Idx) * S", adds this form to 113 // Candidates, and finds its immediate basis. 114 void allocateCandidateAndFindBasis(Value *B, ConstantInt *Idx, Value *S, 115 Instruction *I); 116 // Rewrites candidate C with respect to Basis. 117 void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis); 118 119 DominatorTree *DT; 120 ilist<Candidate> Candidates; 121 // Temporarily holds all instructions that are unlinked (but not deleted) by 122 // rewriteCandidateWithBasis. These instructions will be actually removed 123 // after all rewriting finishes. 124 DenseSet<Instruction *> UnlinkedInstructions; 125 }; 126 } // anonymous namespace 127 128 char StraightLineStrengthReduce::ID = 0; 129 INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr", 130 "Straight line strength reduction", false, false) 131 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 132 INITIALIZE_PASS_END(StraightLineStrengthReduce, "slsr", 133 "Straight line strength reduction", false, false) 134 135 FunctionPass *llvm::createStraightLineStrengthReducePass() { 136 return new StraightLineStrengthReduce(); 137 } 138 139 bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis, 140 const Candidate &C) { 141 return (Basis.Ins != C.Ins && // skip the same instruction 142 // Basis must dominate C in order to rewrite C with respect to Basis. 143 DT->dominates(Basis.Ins->getParent(), C.Ins->getParent()) && 144 // They share the same base and stride. 145 Basis.Base == C.Base && 146 Basis.Stride == C.Stride); 147 } 148 149 // TODO: We currently implement an algorithm whose time complexity is linear to 150 // the number of existing candidates. However, a better algorithm exists. We 151 // could depth-first search the dominator tree, and maintain a hash table that 152 // contains all candidates that dominate the node being traversed. This hash 153 // table is indexed by the base and the stride of a candidate. Therefore, 154 // finding the immediate basis of a candidate boils down to one hash-table look 155 // up. 156 void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Value *B, 157 ConstantInt *Idx, 158 Value *S, 159 Instruction *I) { 160 Candidate C(B, Idx, S, I); 161 // Try to compute the immediate basis of C. 162 unsigned NumIterations = 0; 163 // Limit the scan radius to avoid running forever. 164 static const unsigned MaxNumIterations = 50; 165 for (auto Basis = Candidates.rbegin(); 166 Basis != Candidates.rend() && NumIterations < MaxNumIterations; 167 ++Basis, ++NumIterations) { 168 if (isBasisFor(*Basis, C)) { 169 C.Basis = &(*Basis); 170 break; 171 } 172 } 173 // Regardless of whether we find a basis for C, we need to push C to the 174 // candidate list. 175 Candidates.push_back(C); 176 } 177 178 void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Instruction *I) { 179 Value *B = nullptr; 180 ConstantInt *Idx = nullptr; 181 // "(Base + Index) * Stride" must be a Mul instruction at the first hand. 182 if (I->getOpcode() == Instruction::Mul) { 183 if (IntegerType *ITy = dyn_cast<IntegerType>(I->getType())) { 184 Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); 185 for (unsigned Swapped = 0; Swapped < 2; ++Swapped) { 186 // Only handle the canonical operand ordering. 187 if (match(LHS, m_Add(m_Value(B), m_ConstantInt(Idx)))) { 188 // If LHS is in the form of "Base + Index", then I is in the form of 189 // "(Base + Index) * RHS". 190 allocateCandidateAndFindBasis(B, Idx, RHS, I); 191 } else { 192 // Otherwise, at least try the form (LHS + 0) * RHS. 193 allocateCandidateAndFindBasis(LHS, ConstantInt::get(ITy, 0), RHS, I); 194 } 195 // Swap LHS and RHS so that we also cover the cases where LHS is the 196 // stride. 197 if (LHS == RHS) 198 break; 199 std::swap(LHS, RHS); 200 } 201 } 202 } 203 } 204 205 void StraightLineStrengthReduce::rewriteCandidateWithBasis( 206 const Candidate &C, const Candidate &Basis) { 207 // An instruction can correspond to multiple candidates. Therefore, instead of 208 // simply deleting an instruction when we rewrite it, we mark its parent as 209 // nullptr (i.e. unlink it) so that we can skip the candidates whose 210 // instruction is already rewritten. 211 if (!C.Ins->getParent()) 212 return; 213 assert(C.Base == Basis.Base && C.Stride == Basis.Stride); 214 // Basis = (B + i) * S 215 // C = (B + i') * S 216 // ==> 217 // C = Basis + (i' - i) * S 218 IRBuilder<> Builder(C.Ins); 219 ConstantInt *IndexOffset = ConstantInt::get( 220 C.Ins->getContext(), C.Index->getValue() - Basis.Index->getValue()); 221 Value *Reduced; 222 // TODO: preserve nsw/nuw in some cases. 223 if (IndexOffset->isOne()) { 224 // If (i' - i) is 1, fold C into Basis + S. 225 Reduced = Builder.CreateAdd(Basis.Ins, C.Stride); 226 } else if (IndexOffset->isMinusOne()) { 227 // If (i' - i) is -1, fold C into Basis - S. 228 Reduced = Builder.CreateSub(Basis.Ins, C.Stride); 229 } else { 230 Value *Bump = Builder.CreateMul(C.Stride, IndexOffset); 231 Reduced = Builder.CreateAdd(Basis.Ins, Bump); 232 } 233 Reduced->takeName(C.Ins); 234 C.Ins->replaceAllUsesWith(Reduced); 235 C.Ins->dropAllReferences(); 236 // Unlink C.Ins so that we can skip other candidates also corresponding to 237 // C.Ins. The actual deletion is postponed to the end of runOnFunction. 238 C.Ins->removeFromParent(); 239 UnlinkedInstructions.insert(C.Ins); 240 } 241 242 bool StraightLineStrengthReduce::runOnFunction(Function &F) { 243 if (skipOptnoneFunction(F)) 244 return false; 245 246 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 247 // Traverse the dominator tree in the depth-first order. This order makes sure 248 // all bases of a candidate are in Candidates when we process it. 249 for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT); 250 node != GraphTraits<DominatorTree *>::nodes_end(DT); ++node) { 251 BasicBlock *B = node->getBlock(); 252 for (auto I = B->begin(); I != B->end(); ++I) { 253 allocateCandidateAndFindBasis(I); 254 } 255 } 256 257 // Rewrite candidates in the reverse depth-first order. This order makes sure 258 // a candidate being rewritten is not a basis for any other candidate. 259 while (!Candidates.empty()) { 260 const Candidate &C = Candidates.back(); 261 if (C.Basis != nullptr) { 262 rewriteCandidateWithBasis(C, *C.Basis); 263 } 264 Candidates.pop_back(); 265 } 266 267 // Delete all unlink instructions. 268 for (auto I : UnlinkedInstructions) { 269 delete I; 270 } 271 bool Ret = !UnlinkedInstructions.empty(); 272 UnlinkedInstructions.clear(); 273 return Ret; 274 } 275