1 //===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// This file provides a LoopVectorizationPlanner class. 12 /// InnerLoopVectorizer vectorizes loops which contain only one basic 13 /// LoopVectorizationPlanner - drives the vectorization process after having 14 /// passed Legality checks. 15 /// The planner builds and optimizes the Vectorization Plans which record the 16 /// decisions how to vectorize the given loop. In particular, represent the 17 /// control-flow of the vectorized version, the replication of instructions that 18 /// are to be scalarized, and interleave access groups. 19 /// 20 /// Also provides a VPlan-based builder utility analogous to IRBuilder. 21 /// It provides an instruction-level API for generating VPInstructions while 22 /// abstracting away the Recipe manipulation details. 23 //===----------------------------------------------------------------------===// 24 25 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H 26 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H 27 28 #include "VPlan.h" 29 #include "llvm/Analysis/LoopInfo.h" 30 #include "llvm/Analysis/TargetLibraryInfo.h" 31 #include "llvm/Analysis/TargetTransformInfo.h" 32 33 namespace llvm { 34 35 /// VPlan-based builder utility analogous to IRBuilder. 36 class VPBuilder { 37 private: 38 VPBasicBlock *BB = nullptr; 39 VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator(); 40 41 VPInstruction *createInstruction(unsigned Opcode, 42 std::initializer_list<VPValue *> Operands) { 43 VPInstruction *Instr = new VPInstruction(Opcode, Operands); 44 BB->insert(Instr, InsertPt); 45 return Instr; 46 } 47 48 public: 49 VPBuilder() {} 50 51 /// \brief This specifies that created VPInstructions should be appended to 52 /// the end of the specified block. 53 void setInsertPoint(VPBasicBlock *TheBB) { 54 assert(TheBB && "Attempting to set a null insert point"); 55 BB = TheBB; 56 InsertPt = BB->end(); 57 } 58 59 VPValue *createNot(VPValue *Operand) { 60 return createInstruction(VPInstruction::Not, {Operand}); 61 } 62 63 VPValue *createAnd(VPValue *LHS, VPValue *RHS) { 64 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}); 65 } 66 67 VPValue *createOr(VPValue *LHS, VPValue *RHS) { 68 return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}); 69 } 70 }; 71 72 73 /// TODO: The following VectorizationFactor was pulled out of 74 /// LoopVectorizationCostModel class. LV also deals with 75 /// VectorizerParams::VectorizationFactor and VectorizationCostTy. 76 /// We need to streamline them. 77 78 /// Information about vectorization costs 79 struct VectorizationFactor { 80 // Vector width with best cost 81 unsigned Width; 82 // Cost of the loop with that width 83 unsigned Cost; 84 }; 85 86 /// Planner drives the vectorization process after having passed 87 /// Legality checks. 88 class LoopVectorizationPlanner { 89 /// The loop that we evaluate. 90 Loop *OrigLoop; 91 92 /// Loop Info analysis. 93 LoopInfo *LI; 94 95 /// Target Library Info. 96 const TargetLibraryInfo *TLI; 97 98 /// Target Transform Info. 99 const TargetTransformInfo *TTI; 100 101 /// The legality analysis. 102 LoopVectorizationLegality *Legal; 103 104 /// The profitablity analysis. 105 LoopVectorizationCostModel &CM; 106 107 using VPlanPtr = std::unique_ptr<VPlan>; 108 109 SmallVector<VPlanPtr, 4> VPlans; 110 111 /// This class is used to enable the VPlan to invoke a method of ILV. This is 112 /// needed until the method is refactored out of ILV and becomes reusable. 113 struct VPCallbackILV : public VPCallback { 114 InnerLoopVectorizer &ILV; 115 116 VPCallbackILV(InnerLoopVectorizer &ILV) : ILV(ILV) {} 117 118 Value *getOrCreateVectorValues(Value *V, unsigned Part) override; 119 }; 120 121 /// A builder used to construct the current plan. 122 VPBuilder Builder; 123 124 /// When we if-convert we need to create edge masks. We have to cache values 125 /// so that we don't end up with exponential recursion/IR. Note that 126 /// if-conversion currently takes place during VPlan-construction, so these 127 /// caches are only used at that stage. 128 using EdgeMaskCacheTy = 129 DenseMap<std::pair<BasicBlock *, BasicBlock *>, VPValue *>; 130 using BlockMaskCacheTy = DenseMap<BasicBlock *, VPValue *>; 131 EdgeMaskCacheTy EdgeMaskCache; 132 BlockMaskCacheTy BlockMaskCache; 133 134 unsigned BestVF = 0; 135 unsigned BestUF = 0; 136 137 public: 138 LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, 139 const TargetTransformInfo *TTI, 140 LoopVectorizationLegality *Legal, 141 LoopVectorizationCostModel &CM) 142 : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {} 143 144 /// Plan how to best vectorize, return the best VF and its cost. 145 VectorizationFactor plan(bool OptForSize, unsigned UserVF); 146 147 /// Finalize the best decision and dispose of all other VPlans. 148 void setBestPlan(unsigned VF, unsigned UF); 149 150 /// Generate the IR code for the body of the vectorized loop according to the 151 /// best selected VPlan. 152 void executePlan(InnerLoopVectorizer &LB, DominatorTree *DT); 153 154 void printPlans(raw_ostream &O) { 155 for (const auto &Plan : VPlans) 156 O << *Plan; 157 } 158 159 protected: 160 /// Collect the instructions from the original loop that would be trivially 161 /// dead in the vectorized loop if generated. 162 void collectTriviallyDeadInstructions( 163 SmallPtrSetImpl<Instruction *> &DeadInstructions); 164 165 /// A range of powers-of-2 vectorization factors with fixed start and 166 /// adjustable end. The range includes start and excludes end, e.g.,: 167 /// [1, 9) = {1, 2, 4, 8} 168 struct VFRange { 169 // A power of 2. 170 const unsigned Start; 171 172 // Need not be a power of 2. If End <= Start range is empty. 173 unsigned End; 174 }; 175 176 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying 177 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the 178 /// returned value holds for the entire \p Range. 179 bool getDecisionAndClampRange(const std::function<bool(unsigned)> &Predicate, 180 VFRange &Range); 181 182 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, 183 /// according to the information gathered by Legal when it checked if it is 184 /// legal to vectorize the loop. 185 void buildVPlans(unsigned MinVF, unsigned MaxVF); 186 187 private: 188 /// A helper function that computes the predicate of the block BB, assuming 189 /// that the header block of the loop is set to True. It returns the *entry* 190 /// mask for the block BB. 191 VPValue *createBlockInMask(BasicBlock *BB, VPlanPtr &Plan); 192 193 /// A helper function that computes the predicate of the edge between SRC 194 /// and DST. 195 VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan); 196 197 /// Check if \I belongs to an Interleave Group within the given VF \p Range, 198 /// \return true in the first returned value if so and false otherwise. 199 /// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG 200 /// for \p Range.Start, and provide it as the second returned value. 201 /// Note that if \I is an adjunct member of an IG for \p Range.Start, the 202 /// \return value is <true, nullptr>, as it is handled by another recipe. 203 /// \p Range.End may be decreased to ensure same decision from \p Range.Start 204 /// to \p Range.End. 205 VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range); 206 207 // Check if \I is a memory instruction to be widened for \p Range.Start and 208 // potentially masked. Such instructions are handled by a recipe that takes an 209 // additional VPInstruction for the mask. 210 VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I, 211 VFRange &Range, 212 VPlanPtr &Plan); 213 214 /// Check if an induction recipe should be constructed for \I within the given 215 /// VF \p Range. If so build and return it. If not, return null. \p Range.End 216 /// may be decreased to ensure same decision from \p Range.Start to 217 /// \p Range.End. 218 VPWidenIntOrFpInductionRecipe *tryToOptimizeInduction(Instruction *I, 219 VFRange &Range); 220 221 /// Handle non-loop phi nodes. Currently all such phi nodes are turned into 222 /// a sequence of select instructions as the vectorizer currently performs 223 /// full if-conversion. 224 VPBlendRecipe *tryToBlend(Instruction *I, VPlanPtr &Plan); 225 226 /// Check if \p I can be widened within the given VF \p Range. If \p I can be 227 /// widened for \p Range.Start, check if the last recipe of \p VPBB can be 228 /// extended to include \p I or else build a new VPWidenRecipe for it and 229 /// append it to \p VPBB. Return true if \p I can be widened for Range.Start, 230 /// false otherwise. Range.End may be decreased to ensure same decision from 231 /// \p Range.Start to \p Range.End. 232 bool tryToWiden(Instruction *I, VPBasicBlock *VPBB, VFRange &Range); 233 234 /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it 235 /// is predicated. \return \p VPBB augmented with this new recipe if \p I is 236 /// not predicated, otherwise \return a new VPBasicBlock that succeeds the new 237 /// Region. Update the packing decision of predicated instructions if they 238 /// feed \p I. Range.End may be decreased to ensure same recipe behavior from 239 /// \p Range.Start to \p Range.End. 240 VPBasicBlock *handleReplication( 241 Instruction *I, VFRange &Range, VPBasicBlock *VPBB, 242 DenseMap<Instruction *, VPReplicateRecipe *> &PredInst2Recipe, 243 VPlanPtr &Plan); 244 245 /// Create a replicating region for instruction \p I that requires 246 /// predication. \p PredRecipe is a VPReplicateRecipe holding \p I. 247 VPRegionBlock *createReplicateRegion(Instruction *I, VPRecipeBase *PredRecipe, 248 VPlanPtr &Plan); 249 250 /// Build a VPlan according to the information gathered by Legal. \return a 251 /// VPlan for vectorization factors \p Range.Start and up to \p Range.End 252 /// exclusive, possibly decreasing \p Range.End. 253 VPlanPtr buildVPlan(VFRange &Range, 254 const SmallPtrSetImpl<Value *> &NeedDef); 255 }; 256 257 } // namespace llvm 258 259 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H 260