1 //===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// This file provides a LoopVectorizationPlanner class. 12 /// InnerLoopVectorizer vectorizes loops which contain only one basic 13 /// LoopVectorizationPlanner - drives the vectorization process after having 14 /// passed Legality checks. 15 /// The planner builds and optimizes the Vectorization Plans which record the 16 /// decisions how to vectorize the given loop. In particular, represent the 17 /// control-flow of the vectorized version, the replication of instructions that 18 /// are to be scalarized, and interleave access groups. 19 /// 20 /// Also provides a VPlan-based builder utility analogous to IRBuilder. 21 /// It provides an instruction-level API for generating VPInstructions while 22 /// abstracting away the Recipe manipulation details. 23 //===----------------------------------------------------------------------===// 24 25 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H 26 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H 27 28 #include "VPlan.h" 29 30 namespace llvm { 31 32 /// VPlan-based builder utility analogous to IRBuilder. 33 class VPBuilder { 34 private: 35 VPBasicBlock *BB = nullptr; 36 VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator(); 37 38 VPInstruction *createInstruction(unsigned Opcode, 39 std::initializer_list<VPValue *> Operands) { 40 VPInstruction *Instr = new VPInstruction(Opcode, Operands); 41 BB->insert(Instr, InsertPt); 42 return Instr; 43 } 44 45 public: 46 VPBuilder() {} 47 48 /// \brief This specifies that created VPInstructions should be appended to 49 /// the end of the specified block. 50 void setInsertPoint(VPBasicBlock *TheBB) { 51 assert(TheBB && "Attempting to set a null insert point"); 52 BB = TheBB; 53 InsertPt = BB->end(); 54 } 55 56 VPValue *createNot(VPValue *Operand) { 57 return createInstruction(VPInstruction::Not, {Operand}); 58 } 59 60 VPValue *createAnd(VPValue *LHS, VPValue *RHS) { 61 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}); 62 } 63 64 VPValue *createOr(VPValue *LHS, VPValue *RHS) { 65 return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}); 66 } 67 }; 68 69 70 /// TODO: The following VectorizationFactor was pulled out of 71 /// LoopVectorizationCostModel class. LV also deals with 72 /// VectorizerParams::VectorizationFactor and VectorizationCostTy. 73 /// We need to streamline them. 74 75 /// Information about vectorization costs 76 struct VectorizationFactor { 77 // Vector width with best cost 78 unsigned Width; 79 // Cost of the loop with that width 80 unsigned Cost; 81 }; 82 83 /// Planner drives the vectorization process after having passed 84 /// Legality checks. 85 class LoopVectorizationPlanner { 86 /// The loop that we evaluate. 87 Loop *OrigLoop; 88 89 /// Loop Info analysis. 90 LoopInfo *LI; 91 92 /// Target Library Info. 93 const TargetLibraryInfo *TLI; 94 95 /// Target Transform Info. 96 const TargetTransformInfo *TTI; 97 98 /// The legality analysis. 99 LoopVectorizationLegality *Legal; 100 101 /// The profitablity analysis. 102 LoopVectorizationCostModel &CM; 103 104 using VPlanPtr = std::unique_ptr<VPlan>; 105 106 SmallVector<VPlanPtr, 4> VPlans; 107 108 /// This class is used to enable the VPlan to invoke a method of ILV. This is 109 /// needed until the method is refactored out of ILV and becomes reusable. 110 struct VPCallbackILV : public VPCallback { 111 InnerLoopVectorizer &ILV; 112 113 VPCallbackILV(InnerLoopVectorizer &ILV) : ILV(ILV) {} 114 115 Value *getOrCreateVectorValues(Value *V, unsigned Part) override; 116 }; 117 118 /// A builder used to construct the current plan. 119 VPBuilder Builder; 120 121 /// When we if-convert we need to create edge masks. We have to cache values 122 /// so that we don't end up with exponential recursion/IR. Note that 123 /// if-conversion currently takes place during VPlan-construction, so these 124 /// caches are only used at that stage. 125 using EdgeMaskCacheTy = 126 DenseMap<std::pair<BasicBlock *, BasicBlock *>, VPValue *>; 127 using BlockMaskCacheTy = DenseMap<BasicBlock *, VPValue *>; 128 EdgeMaskCacheTy EdgeMaskCache; 129 BlockMaskCacheTy BlockMaskCache; 130 131 unsigned BestVF = 0; 132 unsigned BestUF = 0; 133 134 public: 135 LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, 136 const TargetTransformInfo *TTI, 137 LoopVectorizationLegality *Legal, 138 LoopVectorizationCostModel &CM) 139 : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {} 140 141 /// Plan how to best vectorize, return the best VF and its cost. 142 VectorizationFactor plan(bool OptForSize, unsigned UserVF); 143 144 /// Finalize the best decision and dispose of all other VPlans. 145 void setBestPlan(unsigned VF, unsigned UF); 146 147 /// Generate the IR code for the body of the vectorized loop according to the 148 /// best selected VPlan. 149 void executePlan(InnerLoopVectorizer &LB, DominatorTree *DT); 150 151 void printPlans(raw_ostream &O) { 152 for (const auto &Plan : VPlans) 153 O << *Plan; 154 } 155 156 protected: 157 /// Collect the instructions from the original loop that would be trivially 158 /// dead in the vectorized loop if generated. 159 void collectTriviallyDeadInstructions( 160 SmallPtrSetImpl<Instruction *> &DeadInstructions); 161 162 /// A range of powers-of-2 vectorization factors with fixed start and 163 /// adjustable end. The range includes start and excludes end, e.g.,: 164 /// [1, 9) = {1, 2, 4, 8} 165 struct VFRange { 166 // A power of 2. 167 const unsigned Start; 168 169 // Need not be a power of 2. If End <= Start range is empty. 170 unsigned End; 171 }; 172 173 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying 174 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the 175 /// returned value holds for the entire \p Range. 176 bool getDecisionAndClampRange(const std::function<bool(unsigned)> &Predicate, 177 VFRange &Range); 178 179 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, 180 /// according to the information gathered by Legal when it checked if it is 181 /// legal to vectorize the loop. 182 void buildVPlans(unsigned MinVF, unsigned MaxVF); 183 184 private: 185 /// A helper function that computes the predicate of the block BB, assuming 186 /// that the header block of the loop is set to True. It returns the *entry* 187 /// mask for the block BB. 188 VPValue *createBlockInMask(BasicBlock *BB, VPlanPtr &Plan); 189 190 /// A helper function that computes the predicate of the edge between SRC 191 /// and DST. 192 VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan); 193 194 /// Check if \I belongs to an Interleave Group within the given VF \p Range, 195 /// \return true in the first returned value if so and false otherwise. 196 /// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG 197 /// for \p Range.Start, and provide it as the second returned value. 198 /// Note that if \I is an adjunct member of an IG for \p Range.Start, the 199 /// \return value is <true, nullptr>, as it is handled by another recipe. 200 /// \p Range.End may be decreased to ensure same decision from \p Range.Start 201 /// to \p Range.End. 202 VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range); 203 204 // Check if \I is a memory instruction to be widened for \p Range.Start and 205 // potentially masked. Such instructions are handled by a recipe that takes an 206 // additional VPInstruction for the mask. 207 VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I, 208 VFRange &Range, 209 VPlanPtr &Plan); 210 211 /// Check if an induction recipe should be constructed for \I within the given 212 /// VF \p Range. If so build and return it. If not, return null. \p Range.End 213 /// may be decreased to ensure same decision from \p Range.Start to 214 /// \p Range.End. 215 VPWidenIntOrFpInductionRecipe *tryToOptimizeInduction(Instruction *I, 216 VFRange &Range); 217 218 /// Handle non-loop phi nodes. Currently all such phi nodes are turned into 219 /// a sequence of select instructions as the vectorizer currently performs 220 /// full if-conversion. 221 VPBlendRecipe *tryToBlend(Instruction *I, VPlanPtr &Plan); 222 223 /// Check if \p I can be widened within the given VF \p Range. If \p I can be 224 /// widened for \p Range.Start, check if the last recipe of \p VPBB can be 225 /// extended to include \p I or else build a new VPWidenRecipe for it and 226 /// append it to \p VPBB. Return true if \p I can be widened for Range.Start, 227 /// false otherwise. Range.End may be decreased to ensure same decision from 228 /// \p Range.Start to \p Range.End. 229 bool tryToWiden(Instruction *I, VPBasicBlock *VPBB, VFRange &Range); 230 231 /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it 232 /// is predicated. \return \p VPBB augmented with this new recipe if \p I is 233 /// not predicated, otherwise \return a new VPBasicBlock that succeeds the new 234 /// Region. Update the packing decision of predicated instructions if they 235 /// feed \p I. Range.End may be decreased to ensure same recipe behavior from 236 /// \p Range.Start to \p Range.End. 237 VPBasicBlock *handleReplication( 238 Instruction *I, VFRange &Range, VPBasicBlock *VPBB, 239 DenseMap<Instruction *, VPReplicateRecipe *> &PredInst2Recipe, 240 VPlanPtr &Plan); 241 242 /// Create a replicating region for instruction \p I that requires 243 /// predication. \p PredRecipe is a VPReplicateRecipe holding \p I. 244 VPRegionBlock *createReplicateRegion(Instruction *I, VPRecipeBase *PredRecipe, 245 VPlanPtr &Plan); 246 247 /// Build a VPlan according to the information gathered by Legal. \return a 248 /// VPlan for vectorization factors \p Range.Start and up to \p Range.End 249 /// exclusive, possibly decreasing \p Range.End. 250 VPlanPtr buildVPlan(VFRange &Range, 251 const SmallPtrSetImpl<Value *> &NeedDef); 252 }; 253 254 } // namespace llvm 255 256 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H 257