1 //===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// This file provides a LoopVectorizationPlanner class. 12 /// InnerLoopVectorizer vectorizes loops which contain only one basic 13 /// LoopVectorizationPlanner - drives the vectorization process after having 14 /// passed Legality checks. 15 /// The planner builds and optimizes the Vectorization Plans which record the 16 /// decisions how to vectorize the given loop. In particular, represent the 17 /// control-flow of the vectorized version, the replication of instructions that 18 /// are to be scalarized, and interleave access groups. 19 /// 20 /// Also provides a VPlan-based builder utility analogous to IRBuilder. 21 /// It provides an instruction-level API for generating VPInstructions while 22 /// abstracting away the Recipe manipulation details. 23 //===----------------------------------------------------------------------===// 24 25 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H 26 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H 27 28 #include "VPlan.h" 29 #include "llvm/Analysis/LoopInfo.h" 30 #include "llvm/Analysis/TargetLibraryInfo.h" 31 #include "llvm/Analysis/TargetTransformInfo.h" 32 33 namespace llvm { 34 35 /// VPlan-based builder utility analogous to IRBuilder. 36 class VPBuilder { 37 private: 38 VPBasicBlock *BB = nullptr; 39 VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator(); 40 41 VPInstruction *createInstruction(unsigned Opcode, 42 ArrayRef<VPValue *> Operands) { 43 VPInstruction *Instr = new VPInstruction(Opcode, Operands); 44 if (BB) 45 BB->insert(Instr, InsertPt); 46 return Instr; 47 } 48 49 VPInstruction *createInstruction(unsigned Opcode, 50 std::initializer_list<VPValue *> Operands) { 51 return createInstruction(Opcode, ArrayRef<VPValue *>(Operands)); 52 } 53 54 public: 55 VPBuilder() {} 56 57 /// Clear the insertion point: created instructions will not be inserted into 58 /// a block. 59 void clearInsertionPoint() { 60 BB = nullptr; 61 InsertPt = VPBasicBlock::iterator(); 62 } 63 64 VPBasicBlock *getInsertBlock() const { return BB; } 65 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; } 66 67 /// InsertPoint - A saved insertion point. 68 class VPInsertPoint { 69 VPBasicBlock *Block = nullptr; 70 VPBasicBlock::iterator Point; 71 72 public: 73 /// Creates a new insertion point which doesn't point to anything. 74 VPInsertPoint() = default; 75 76 /// Creates a new insertion point at the given location. 77 VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint) 78 : Block(InsertBlock), Point(InsertPoint) {} 79 80 /// Returns true if this insert point is set. 81 bool isSet() const { return Block != nullptr; } 82 83 VPBasicBlock *getBlock() const { return Block; } 84 VPBasicBlock::iterator getPoint() const { return Point; } 85 }; 86 87 /// Sets the current insert point to a previously-saved location. 88 void restoreIP(VPInsertPoint IP) { 89 if (IP.isSet()) 90 setInsertPoint(IP.getBlock(), IP.getPoint()); 91 else 92 clearInsertionPoint(); 93 } 94 95 /// This specifies that created VPInstructions should be appended to the end 96 /// of the specified block. 97 void setInsertPoint(VPBasicBlock *TheBB) { 98 assert(TheBB && "Attempting to set a null insert point"); 99 BB = TheBB; 100 InsertPt = BB->end(); 101 } 102 103 /// This specifies that created instructions should be inserted at the 104 /// specified point. 105 void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) { 106 BB = TheBB; 107 InsertPt = IP; 108 } 109 110 /// Insert and return the specified instruction. 111 VPInstruction *insert(VPInstruction *I) const { 112 BB->insert(I, InsertPt); 113 return I; 114 } 115 116 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as 117 /// its underlying Instruction. 118 VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands, 119 Instruction *Inst = nullptr) { 120 VPInstruction *NewVPInst = createInstruction(Opcode, Operands); 121 NewVPInst->setUnderlyingValue(Inst); 122 return NewVPInst; 123 } 124 VPValue *createNaryOp(unsigned Opcode, 125 std::initializer_list<VPValue *> Operands, 126 Instruction *Inst = nullptr) { 127 return createNaryOp(Opcode, ArrayRef<VPValue *>(Operands), Inst); 128 } 129 130 VPValue *createNot(VPValue *Operand) { 131 return createInstruction(VPInstruction::Not, {Operand}); 132 } 133 134 VPValue *createAnd(VPValue *LHS, VPValue *RHS) { 135 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}); 136 } 137 138 VPValue *createOr(VPValue *LHS, VPValue *RHS) { 139 return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}); 140 } 141 142 //===--------------------------------------------------------------------===// 143 // RAII helpers. 144 //===--------------------------------------------------------------------===// 145 146 /// RAII object that stores the current insertion point and restores it when 147 /// the object is destroyed. 148 class InsertPointGuard { 149 VPBuilder &Builder; 150 VPBasicBlock *Block; 151 VPBasicBlock::iterator Point; 152 153 public: 154 InsertPointGuard(VPBuilder &B) 155 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {} 156 157 InsertPointGuard(const InsertPointGuard &) = delete; 158 InsertPointGuard &operator=(const InsertPointGuard &) = delete; 159 160 ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); } 161 }; 162 }; 163 164 /// TODO: The following VectorizationFactor was pulled out of 165 /// LoopVectorizationCostModel class. LV also deals with 166 /// VectorizerParams::VectorizationFactor and VectorizationCostTy. 167 /// We need to streamline them. 168 169 /// Information about vectorization costs 170 struct VectorizationFactor { 171 // Vector width with best cost 172 unsigned Width; 173 // Cost of the loop with that width 174 unsigned Cost; 175 }; 176 177 /// Planner drives the vectorization process after having passed 178 /// Legality checks. 179 class LoopVectorizationPlanner { 180 /// The loop that we evaluate. 181 Loop *OrigLoop; 182 183 /// Loop Info analysis. 184 LoopInfo *LI; 185 186 /// Target Library Info. 187 const TargetLibraryInfo *TLI; 188 189 /// Target Transform Info. 190 const TargetTransformInfo *TTI; 191 192 /// The legality analysis. 193 LoopVectorizationLegality *Legal; 194 195 /// The profitablity analysis. 196 LoopVectorizationCostModel &CM; 197 198 using VPlanPtr = std::unique_ptr<VPlan>; 199 200 SmallVector<VPlanPtr, 4> VPlans; 201 202 /// This class is used to enable the VPlan to invoke a method of ILV. This is 203 /// needed until the method is refactored out of ILV and becomes reusable. 204 struct VPCallbackILV : public VPCallback { 205 InnerLoopVectorizer &ILV; 206 207 VPCallbackILV(InnerLoopVectorizer &ILV) : ILV(ILV) {} 208 209 Value *getOrCreateVectorValues(Value *V, unsigned Part) override; 210 }; 211 212 /// A builder used to construct the current plan. 213 VPBuilder Builder; 214 215 /// When we if-convert we need to create edge masks. We have to cache values 216 /// so that we don't end up with exponential recursion/IR. Note that 217 /// if-conversion currently takes place during VPlan-construction, so these 218 /// caches are only used at that stage. 219 using EdgeMaskCacheTy = 220 DenseMap<std::pair<BasicBlock *, BasicBlock *>, VPValue *>; 221 using BlockMaskCacheTy = DenseMap<BasicBlock *, VPValue *>; 222 EdgeMaskCacheTy EdgeMaskCache; 223 BlockMaskCacheTy BlockMaskCache; 224 225 unsigned BestVF = 0; 226 unsigned BestUF = 0; 227 228 public: 229 LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, 230 const TargetTransformInfo *TTI, 231 LoopVectorizationLegality *Legal, 232 LoopVectorizationCostModel &CM) 233 : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {} 234 235 /// Plan how to best vectorize, return the best VF and its cost. 236 VectorizationFactor plan(bool OptForSize, unsigned UserVF); 237 238 /// Use the VPlan-native path to plan how to best vectorize, return the best 239 /// VF and its cost. 240 VectorizationFactor planInVPlanNativePath(bool OptForSize, unsigned UserVF); 241 242 /// Finalize the best decision and dispose of all other VPlans. 243 void setBestPlan(unsigned VF, unsigned UF); 244 245 /// Generate the IR code for the body of the vectorized loop according to the 246 /// best selected VPlan. 247 void executePlan(InnerLoopVectorizer &LB, DominatorTree *DT); 248 249 void printPlans(raw_ostream &O) { 250 for (const auto &Plan : VPlans) 251 O << *Plan; 252 } 253 254 protected: 255 /// Collect the instructions from the original loop that would be trivially 256 /// dead in the vectorized loop if generated. 257 void collectTriviallyDeadInstructions( 258 SmallPtrSetImpl<Instruction *> &DeadInstructions); 259 260 /// A range of powers-of-2 vectorization factors with fixed start and 261 /// adjustable end. The range includes start and excludes end, e.g.,: 262 /// [1, 9) = {1, 2, 4, 8} 263 struct VFRange { 264 // A power of 2. 265 const unsigned Start; 266 267 // Need not be a power of 2. If End <= Start range is empty. 268 unsigned End; 269 }; 270 271 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying 272 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the 273 /// returned value holds for the entire \p Range. 274 bool getDecisionAndClampRange(const std::function<bool(unsigned)> &Predicate, 275 VFRange &Range); 276 277 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, 278 /// according to the information gathered by Legal when it checked if it is 279 /// legal to vectorize the loop. 280 void buildVPlans(unsigned MinVF, unsigned MaxVF); 281 282 private: 283 /// A helper function that computes the predicate of the block BB, assuming 284 /// that the header block of the loop is set to True. It returns the *entry* 285 /// mask for the block BB. 286 VPValue *createBlockInMask(BasicBlock *BB, VPlanPtr &Plan); 287 288 /// A helper function that computes the predicate of the edge between SRC 289 /// and DST. 290 VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan); 291 292 /// Check if \I belongs to an Interleave Group within the given VF \p Range, 293 /// \return true in the first returned value if so and false otherwise. 294 /// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG 295 /// for \p Range.Start, and provide it as the second returned value. 296 /// Note that if \I is an adjunct member of an IG for \p Range.Start, the 297 /// \return value is <true, nullptr>, as it is handled by another recipe. 298 /// \p Range.End may be decreased to ensure same decision from \p Range.Start 299 /// to \p Range.End. 300 VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range); 301 302 // Check if \I is a memory instruction to be widened for \p Range.Start and 303 // potentially masked. Such instructions are handled by a recipe that takes an 304 // additional VPInstruction for the mask. 305 VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I, 306 VFRange &Range, 307 VPlanPtr &Plan); 308 309 /// Check if an induction recipe should be constructed for \I within the given 310 /// VF \p Range. If so build and return it. If not, return null. \p Range.End 311 /// may be decreased to ensure same decision from \p Range.Start to 312 /// \p Range.End. 313 VPWidenIntOrFpInductionRecipe *tryToOptimizeInduction(Instruction *I, 314 VFRange &Range); 315 316 /// Handle non-loop phi nodes. Currently all such phi nodes are turned into 317 /// a sequence of select instructions as the vectorizer currently performs 318 /// full if-conversion. 319 VPBlendRecipe *tryToBlend(Instruction *I, VPlanPtr &Plan); 320 321 /// Check if \p I can be widened within the given VF \p Range. If \p I can be 322 /// widened for \p Range.Start, check if the last recipe of \p VPBB can be 323 /// extended to include \p I or else build a new VPWidenRecipe for it and 324 /// append it to \p VPBB. Return true if \p I can be widened for Range.Start, 325 /// false otherwise. Range.End may be decreased to ensure same decision from 326 /// \p Range.Start to \p Range.End. 327 bool tryToWiden(Instruction *I, VPBasicBlock *VPBB, VFRange &Range); 328 329 /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it 330 /// is predicated. \return \p VPBB augmented with this new recipe if \p I is 331 /// not predicated, otherwise \return a new VPBasicBlock that succeeds the new 332 /// Region. Update the packing decision of predicated instructions if they 333 /// feed \p I. Range.End may be decreased to ensure same recipe behavior from 334 /// \p Range.Start to \p Range.End. 335 VPBasicBlock *handleReplication( 336 Instruction *I, VFRange &Range, VPBasicBlock *VPBB, 337 DenseMap<Instruction *, VPReplicateRecipe *> &PredInst2Recipe, 338 VPlanPtr &Plan); 339 340 /// Create a replicating region for instruction \p I that requires 341 /// predication. \p PredRecipe is a VPReplicateRecipe holding \p I. 342 VPRegionBlock *createReplicateRegion(Instruction *I, VPRecipeBase *PredRecipe, 343 VPlanPtr &Plan); 344 345 /// Build a VPlan according to the information gathered by Legal. \return a 346 /// VPlan for vectorization factors \p Range.Start and up to \p Range.End 347 /// exclusive, possibly decreasing \p Range.End. 348 VPlanPtr buildVPlan(VFRange &Range, 349 const SmallPtrSetImpl<Value *> &NeedDef); 350 }; 351 352 } // namespace llvm 353 354 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H 355