1 //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// This file contains the declarations of the Vectorization Plan base classes: 12 /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual 13 /// VPBlockBase, together implementing a Hierarchical CFG; 14 /// 2. Specializations of GraphTraits that allow VPBlockBase graphs to be 15 /// treated as proper graphs for generic algorithms; 16 /// 3. Pure virtual VPRecipeBase serving as the base class for recipes contained 17 /// within VPBasicBlocks; 18 /// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned 19 /// instruction; 20 /// 5. The VPlan class holding a candidate for vectorization; 21 /// 6. The VPlanPrinter class providing a way to print a plan in dot format; 22 /// These are documented in docs/VectorizationPlan.rst. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 27 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 28 29 #include "VPlanLoopInfo.h" 30 #include "VPlanValue.h" 31 #include "llvm/ADT/DenseMap.h" 32 #include "llvm/ADT/DepthFirstIterator.h" 33 #include "llvm/ADT/GraphTraits.h" 34 #include "llvm/ADT/Optional.h" 35 #include "llvm/ADT/SmallPtrSet.h" 36 #include "llvm/ADT/SmallSet.h" 37 #include "llvm/ADT/SmallVector.h" 38 #include "llvm/ADT/Twine.h" 39 #include "llvm/ADT/ilist.h" 40 #include "llvm/ADT/ilist_node.h" 41 #include "llvm/Analysis/VectorUtils.h" 42 #include "llvm/IR/IRBuilder.h" 43 #include <algorithm> 44 #include <cassert> 45 #include <cstddef> 46 #include <map> 47 #include <string> 48 49 namespace llvm { 50 51 class LoopVectorizationLegality; 52 class LoopVectorizationCostModel; 53 class BasicBlock; 54 class DominatorTree; 55 class InnerLoopVectorizer; 56 template <class T> class InterleaveGroup; 57 class LoopInfo; 58 class raw_ostream; 59 class Value; 60 class VPBasicBlock; 61 class VPRegionBlock; 62 class VPlan; 63 class VPlanSlp; 64 65 /// A range of powers-of-2 vectorization factors with fixed start and 66 /// adjustable end. The range includes start and excludes end, e.g.,: 67 /// [1, 9) = {1, 2, 4, 8} 68 struct VFRange { 69 // A power of 2. 70 const unsigned Start; 71 72 // Need not be a power of 2. If End <= Start range is empty. 73 unsigned End; 74 }; 75 76 using VPlanPtr = std::unique_ptr<VPlan>; 77 78 /// In what follows, the term "input IR" refers to code that is fed into the 79 /// vectorizer whereas the term "output IR" refers to code that is generated by 80 /// the vectorizer. 81 82 /// VPIteration represents a single point in the iteration space of the output 83 /// (vectorized and/or unrolled) IR loop. 84 struct VPIteration { 85 /// in [0..UF) 86 unsigned Part; 87 88 /// in [0..VF) 89 unsigned Lane; 90 }; 91 92 /// This is a helper struct for maintaining vectorization state. It's used for 93 /// mapping values from the original loop to their corresponding values in 94 /// the new loop. Two mappings are maintained: one for vectorized values and 95 /// one for scalarized values. Vectorized values are represented with UF 96 /// vector values in the new loop, and scalarized values are represented with 97 /// UF x VF scalar values in the new loop. UF and VF are the unroll and 98 /// vectorization factors, respectively. 99 /// 100 /// Entries can be added to either map with setVectorValue and setScalarValue, 101 /// which assert that an entry was not already added before. If an entry is to 102 /// replace an existing one, call resetVectorValue and resetScalarValue. This is 103 /// currently needed to modify the mapped values during "fix-up" operations that 104 /// occur once the first phase of widening is complete. These operations include 105 /// type truncation and the second phase of recurrence widening. 106 /// 107 /// Entries from either map can be retrieved using the getVectorValue and 108 /// getScalarValue functions, which assert that the desired value exists. 109 struct VectorizerValueMap { 110 friend struct VPTransformState; 111 112 private: 113 /// The unroll factor. Each entry in the vector map contains UF vector values. 114 unsigned UF; 115 116 /// The vectorization factor. Each entry in the scalar map contains UF x VF 117 /// scalar values. 118 unsigned VF; 119 120 /// The vector and scalar map storage. We use std::map and not DenseMap 121 /// because insertions to DenseMap invalidate its iterators. 122 using VectorParts = SmallVector<Value *, 2>; 123 using ScalarParts = SmallVector<SmallVector<Value *, 4>, 2>; 124 std::map<Value *, VectorParts> VectorMapStorage; 125 std::map<Value *, ScalarParts> ScalarMapStorage; 126 127 public: 128 /// Construct an empty map with the given unroll and vectorization factors. VectorizerValueMapVectorizerValueMap129 VectorizerValueMap(unsigned UF, unsigned VF) : UF(UF), VF(VF) {} 130 131 /// \return True if the map has any vector entry for \p Key. hasAnyVectorValueVectorizerValueMap132 bool hasAnyVectorValue(Value *Key) const { 133 return VectorMapStorage.count(Key); 134 } 135 136 /// \return True if the map has a vector entry for \p Key and \p Part. hasVectorValueVectorizerValueMap137 bool hasVectorValue(Value *Key, unsigned Part) const { 138 assert(Part < UF && "Queried Vector Part is too large."); 139 if (!hasAnyVectorValue(Key)) 140 return false; 141 const VectorParts &Entry = VectorMapStorage.find(Key)->second; 142 assert(Entry.size() == UF && "VectorParts has wrong dimensions."); 143 return Entry[Part] != nullptr; 144 } 145 146 /// \return True if the map has any scalar entry for \p Key. hasAnyScalarValueVectorizerValueMap147 bool hasAnyScalarValue(Value *Key) const { 148 return ScalarMapStorage.count(Key); 149 } 150 151 /// \return True if the map has a scalar entry for \p Key and \p Instance. hasScalarValueVectorizerValueMap152 bool hasScalarValue(Value *Key, const VPIteration &Instance) const { 153 assert(Instance.Part < UF && "Queried Scalar Part is too large."); 154 assert(Instance.Lane < VF && "Queried Scalar Lane is too large."); 155 if (!hasAnyScalarValue(Key)) 156 return false; 157 const ScalarParts &Entry = ScalarMapStorage.find(Key)->second; 158 assert(Entry.size() == UF && "ScalarParts has wrong dimensions."); 159 assert(Entry[Instance.Part].size() == VF && 160 "ScalarParts has wrong dimensions."); 161 return Entry[Instance.Part][Instance.Lane] != nullptr; 162 } 163 164 /// Retrieve the existing vector value that corresponds to \p Key and 165 /// \p Part. getVectorValueVectorizerValueMap166 Value *getVectorValue(Value *Key, unsigned Part) { 167 assert(hasVectorValue(Key, Part) && "Getting non-existent value."); 168 return VectorMapStorage[Key][Part]; 169 } 170 171 /// Retrieve the existing scalar value that corresponds to \p Key and 172 /// \p Instance. getScalarValueVectorizerValueMap173 Value *getScalarValue(Value *Key, const VPIteration &Instance) { 174 assert(hasScalarValue(Key, Instance) && "Getting non-existent value."); 175 return ScalarMapStorage[Key][Instance.Part][Instance.Lane]; 176 } 177 178 /// Set a vector value associated with \p Key and \p Part. Assumes such a 179 /// value is not already set. If it is, use resetVectorValue() instead. setVectorValueVectorizerValueMap180 void setVectorValue(Value *Key, unsigned Part, Value *Vector) { 181 assert(!hasVectorValue(Key, Part) && "Vector value already set for part"); 182 if (!VectorMapStorage.count(Key)) { 183 VectorParts Entry(UF); 184 VectorMapStorage[Key] = Entry; 185 } 186 VectorMapStorage[Key][Part] = Vector; 187 } 188 189 /// Set a scalar value associated with \p Key and \p Instance. Assumes such a 190 /// value is not already set. setScalarValueVectorizerValueMap191 void setScalarValue(Value *Key, const VPIteration &Instance, Value *Scalar) { 192 assert(!hasScalarValue(Key, Instance) && "Scalar value already set"); 193 if (!ScalarMapStorage.count(Key)) { 194 ScalarParts Entry(UF); 195 // TODO: Consider storing uniform values only per-part, as they occupy 196 // lane 0 only, keeping the other VF-1 redundant entries null. 197 for (unsigned Part = 0; Part < UF; ++Part) 198 Entry[Part].resize(VF, nullptr); 199 ScalarMapStorage[Key] = Entry; 200 } 201 ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar; 202 } 203 204 /// Reset the vector value associated with \p Key for the given \p Part. 205 /// This function can be used to update values that have already been 206 /// vectorized. This is the case for "fix-up" operations including type 207 /// truncation and the second phase of recurrence vectorization. resetVectorValueVectorizerValueMap208 void resetVectorValue(Value *Key, unsigned Part, Value *Vector) { 209 assert(hasVectorValue(Key, Part) && "Vector value not set for part"); 210 VectorMapStorage[Key][Part] = Vector; 211 } 212 213 /// Reset the scalar value associated with \p Key for \p Part and \p Lane. 214 /// This function can be used to update values that have already been 215 /// scalarized. This is the case for "fix-up" operations including scalar phi 216 /// nodes for scalarized and predicated instructions. resetScalarValueVectorizerValueMap217 void resetScalarValue(Value *Key, const VPIteration &Instance, 218 Value *Scalar) { 219 assert(hasScalarValue(Key, Instance) && 220 "Scalar value not set for part and lane"); 221 ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar; 222 } 223 }; 224 225 /// This class is used to enable the VPlan to invoke a method of ILV. This is 226 /// needed until the method is refactored out of ILV and becomes reusable. 227 struct VPCallback { ~VPCallbackVPCallback228 virtual ~VPCallback() {} 229 virtual Value *getOrCreateVectorValues(Value *V, unsigned Part) = 0; 230 }; 231 232 /// VPTransformState holds information passed down when "executing" a VPlan, 233 /// needed for generating the output IR. 234 struct VPTransformState { VPTransformStateVPTransformState235 VPTransformState(unsigned VF, unsigned UF, LoopInfo *LI, DominatorTree *DT, 236 IRBuilder<> &Builder, VectorizerValueMap &ValueMap, 237 InnerLoopVectorizer *ILV, VPCallback &Callback) 238 : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder), 239 ValueMap(ValueMap), ILV(ILV), Callback(Callback) {} 240 241 /// The chosen Vectorization and Unroll Factors of the loop being vectorized. 242 unsigned VF; 243 unsigned UF; 244 245 /// Hold the indices to generate specific scalar instructions. Null indicates 246 /// that all instances are to be generated, using either scalar or vector 247 /// instructions. 248 Optional<VPIteration> Instance; 249 250 struct DataState { 251 /// A type for vectorized values in the new loop. Each value from the 252 /// original loop, when vectorized, is represented by UF vector values in 253 /// the new unrolled loop, where UF is the unroll factor. 254 typedef SmallVector<Value *, 2> PerPartValuesTy; 255 256 DenseMap<VPValue *, PerPartValuesTy> PerPartOutput; 257 } Data; 258 259 /// Get the generated Value for a given VPValue and a given Part. Note that 260 /// as some Defs are still created by ILV and managed in its ValueMap, this 261 /// method will delegate the call to ILV in such cases in order to provide 262 /// callers a consistent API. 263 /// \see set. getVPTransformState264 Value *get(VPValue *Def, unsigned Part) { 265 // If Values have been set for this Def return the one relevant for \p Part. 266 if (Data.PerPartOutput.count(Def)) 267 return Data.PerPartOutput[Def][Part]; 268 // Def is managed by ILV: bring the Values from ValueMap. 269 return Callback.getOrCreateVectorValues(VPValue2Value[Def], Part); 270 } 271 272 /// Set the generated Value for a given VPValue and a given Part. setVPTransformState273 void set(VPValue *Def, Value *V, unsigned Part) { 274 if (!Data.PerPartOutput.count(Def)) { 275 DataState::PerPartValuesTy Entry(UF); 276 Data.PerPartOutput[Def] = Entry; 277 } 278 Data.PerPartOutput[Def][Part] = V; 279 } 280 281 /// Hold state information used when constructing the CFG of the output IR, 282 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks. 283 struct CFGState { 284 /// The previous VPBasicBlock visited. Initially set to null. 285 VPBasicBlock *PrevVPBB = nullptr; 286 287 /// The previous IR BasicBlock created or used. Initially set to the new 288 /// header BasicBlock. 289 BasicBlock *PrevBB = nullptr; 290 291 /// The last IR BasicBlock in the output IR. Set to the new latch 292 /// BasicBlock, used for placing the newly created BasicBlocks. 293 BasicBlock *LastBB = nullptr; 294 295 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case 296 /// of replication, maps the BasicBlock of the last replica created. 297 SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB; 298 299 /// Vector of VPBasicBlocks whose terminator instruction needs to be fixed 300 /// up at the end of vector code generation. 301 SmallVector<VPBasicBlock *, 8> VPBBsToFix; 302 303 CFGState() = default; 304 } CFG; 305 306 /// Hold a pointer to LoopInfo to register new basic blocks in the loop. 307 LoopInfo *LI; 308 309 /// Hold a pointer to Dominator Tree to register new basic blocks in the loop. 310 DominatorTree *DT; 311 312 /// Hold a reference to the IRBuilder used to generate output IR code. 313 IRBuilder<> &Builder; 314 315 /// Hold a reference to the Value state information used when generating the 316 /// Values of the output IR. 317 VectorizerValueMap &ValueMap; 318 319 /// Hold a reference to a mapping between VPValues in VPlan and original 320 /// Values they correspond to. 321 VPValue2ValueTy VPValue2Value; 322 323 /// Hold the trip count of the scalar loop. 324 Value *TripCount = nullptr; 325 326 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods. 327 InnerLoopVectorizer *ILV; 328 329 VPCallback &Callback; 330 }; 331 332 /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph. 333 /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock. 334 class VPBlockBase { 335 friend class VPBlockUtils; 336 337 private: 338 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). 339 340 /// An optional name for the block. 341 std::string Name; 342 343 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if 344 /// it is a topmost VPBlockBase. 345 VPRegionBlock *Parent = nullptr; 346 347 /// List of predecessor blocks. 348 SmallVector<VPBlockBase *, 1> Predecessors; 349 350 /// List of successor blocks. 351 SmallVector<VPBlockBase *, 1> Successors; 352 353 /// Successor selector, null for zero or single successor blocks. 354 VPValue *CondBit = nullptr; 355 356 /// Add \p Successor as the last successor to this block. appendSuccessor(VPBlockBase * Successor)357 void appendSuccessor(VPBlockBase *Successor) { 358 assert(Successor && "Cannot add nullptr successor!"); 359 Successors.push_back(Successor); 360 } 361 362 /// Add \p Predecessor as the last predecessor to this block. appendPredecessor(VPBlockBase * Predecessor)363 void appendPredecessor(VPBlockBase *Predecessor) { 364 assert(Predecessor && "Cannot add nullptr predecessor!"); 365 Predecessors.push_back(Predecessor); 366 } 367 368 /// Remove \p Predecessor from the predecessors of this block. removePredecessor(VPBlockBase * Predecessor)369 void removePredecessor(VPBlockBase *Predecessor) { 370 auto Pos = std::find(Predecessors.begin(), Predecessors.end(), Predecessor); 371 assert(Pos && "Predecessor does not exist"); 372 Predecessors.erase(Pos); 373 } 374 375 /// Remove \p Successor from the successors of this block. removeSuccessor(VPBlockBase * Successor)376 void removeSuccessor(VPBlockBase *Successor) { 377 auto Pos = std::find(Successors.begin(), Successors.end(), Successor); 378 assert(Pos && "Successor does not exist"); 379 Successors.erase(Pos); 380 } 381 382 protected: VPBlockBase(const unsigned char SC,const std::string & N)383 VPBlockBase(const unsigned char SC, const std::string &N) 384 : SubclassID(SC), Name(N) {} 385 386 public: 387 /// An enumeration for keeping track of the concrete subclass of VPBlockBase 388 /// that are actually instantiated. Values of this enumeration are kept in the 389 /// SubclassID field of the VPBlockBase objects. They are used for concrete 390 /// type identification. 391 using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC }; 392 393 using VPBlocksTy = SmallVectorImpl<VPBlockBase *>; 394 395 virtual ~VPBlockBase() = default; 396 getName()397 const std::string &getName() const { return Name; } 398 setName(const Twine & newName)399 void setName(const Twine &newName) { Name = newName.str(); } 400 401 /// \return an ID for the concrete type of this object. 402 /// This is used to implement the classof checks. This should not be used 403 /// for any other purpose, as the values may change as LLVM evolves. getVPBlockID()404 unsigned getVPBlockID() const { return SubclassID; } 405 getParent()406 VPRegionBlock *getParent() { return Parent; } getParent()407 const VPRegionBlock *getParent() const { return Parent; } 408 setParent(VPRegionBlock * P)409 void setParent(VPRegionBlock *P) { Parent = P; } 410 411 /// \return the VPBasicBlock that is the entry of this VPBlockBase, 412 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this 413 /// VPBlockBase is a VPBasicBlock, it is returned. 414 const VPBasicBlock *getEntryBasicBlock() const; 415 VPBasicBlock *getEntryBasicBlock(); 416 417 /// \return the VPBasicBlock that is the exit of this VPBlockBase, 418 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this 419 /// VPBlockBase is a VPBasicBlock, it is returned. 420 const VPBasicBlock *getExitBasicBlock() const; 421 VPBasicBlock *getExitBasicBlock(); 422 getSuccessors()423 const VPBlocksTy &getSuccessors() const { return Successors; } getSuccessors()424 VPBlocksTy &getSuccessors() { return Successors; } 425 getPredecessors()426 const VPBlocksTy &getPredecessors() const { return Predecessors; } getPredecessors()427 VPBlocksTy &getPredecessors() { return Predecessors; } 428 429 /// \return the successor of this VPBlockBase if it has a single successor. 430 /// Otherwise return a null pointer. getSingleSuccessor()431 VPBlockBase *getSingleSuccessor() const { 432 return (Successors.size() == 1 ? *Successors.begin() : nullptr); 433 } 434 435 /// \return the predecessor of this VPBlockBase if it has a single 436 /// predecessor. Otherwise return a null pointer. getSinglePredecessor()437 VPBlockBase *getSinglePredecessor() const { 438 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr); 439 } 440 getNumSuccessors()441 size_t getNumSuccessors() const { return Successors.size(); } getNumPredecessors()442 size_t getNumPredecessors() const { return Predecessors.size(); } 443 444 /// An Enclosing Block of a block B is any block containing B, including B 445 /// itself. \return the closest enclosing block starting from "this", which 446 /// has successors. \return the root enclosing block if all enclosing blocks 447 /// have no successors. 448 VPBlockBase *getEnclosingBlockWithSuccessors(); 449 450 /// \return the closest enclosing block starting from "this", which has 451 /// predecessors. \return the root enclosing block if all enclosing blocks 452 /// have no predecessors. 453 VPBlockBase *getEnclosingBlockWithPredecessors(); 454 455 /// \return the successors either attached directly to this VPBlockBase or, if 456 /// this VPBlockBase is the exit block of a VPRegionBlock and has no 457 /// successors of its own, search recursively for the first enclosing 458 /// VPRegionBlock that has successors and return them. If no such 459 /// VPRegionBlock exists, return the (empty) successors of the topmost 460 /// VPBlockBase reached. getHierarchicalSuccessors()461 const VPBlocksTy &getHierarchicalSuccessors() { 462 return getEnclosingBlockWithSuccessors()->getSuccessors(); 463 } 464 465 /// \return the hierarchical successor of this VPBlockBase if it has a single 466 /// hierarchical successor. Otherwise return a null pointer. getSingleHierarchicalSuccessor()467 VPBlockBase *getSingleHierarchicalSuccessor() { 468 return getEnclosingBlockWithSuccessors()->getSingleSuccessor(); 469 } 470 471 /// \return the predecessors either attached directly to this VPBlockBase or, 472 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no 473 /// predecessors of its own, search recursively for the first enclosing 474 /// VPRegionBlock that has predecessors and return them. If no such 475 /// VPRegionBlock exists, return the (empty) predecessors of the topmost 476 /// VPBlockBase reached. getHierarchicalPredecessors()477 const VPBlocksTy &getHierarchicalPredecessors() { 478 return getEnclosingBlockWithPredecessors()->getPredecessors(); 479 } 480 481 /// \return the hierarchical predecessor of this VPBlockBase if it has a 482 /// single hierarchical predecessor. Otherwise return a null pointer. getSingleHierarchicalPredecessor()483 VPBlockBase *getSingleHierarchicalPredecessor() { 484 return getEnclosingBlockWithPredecessors()->getSinglePredecessor(); 485 } 486 487 /// \return the condition bit selecting the successor. getCondBit()488 VPValue *getCondBit() { return CondBit; } 489 getCondBit()490 const VPValue *getCondBit() const { return CondBit; } 491 setCondBit(VPValue * CV)492 void setCondBit(VPValue *CV) { CondBit = CV; } 493 494 /// Set a given VPBlockBase \p Successor as the single successor of this 495 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor. 496 /// This VPBlockBase must have no successors. setOneSuccessor(VPBlockBase * Successor)497 void setOneSuccessor(VPBlockBase *Successor) { 498 assert(Successors.empty() && "Setting one successor when others exist."); 499 appendSuccessor(Successor); 500 } 501 502 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two 503 /// successors of this VPBlockBase. \p Condition is set as the successor 504 /// selector. This VPBlockBase is not added as predecessor of \p IfTrue or \p 505 /// IfFalse. This VPBlockBase must have no successors. setTwoSuccessors(VPBlockBase * IfTrue,VPBlockBase * IfFalse,VPValue * Condition)506 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse, 507 VPValue *Condition) { 508 assert(Successors.empty() && "Setting two successors when others exist."); 509 assert(Condition && "Setting two successors without condition!"); 510 CondBit = Condition; 511 appendSuccessor(IfTrue); 512 appendSuccessor(IfFalse); 513 } 514 515 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase. 516 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added 517 /// as successor of any VPBasicBlock in \p NewPreds. setPredecessors(ArrayRef<VPBlockBase * > NewPreds)518 void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) { 519 assert(Predecessors.empty() && "Block predecessors already set."); 520 for (auto *Pred : NewPreds) 521 appendPredecessor(Pred); 522 } 523 524 /// The method which generates the output IR that correspond to this 525 /// VPBlockBase, thereby "executing" the VPlan. 526 virtual void execute(struct VPTransformState *State) = 0; 527 528 /// Delete all blocks reachable from a given VPBlockBase, inclusive. 529 static void deleteCFG(VPBlockBase *Entry); 530 printAsOperand(raw_ostream & OS,bool PrintType)531 void printAsOperand(raw_ostream &OS, bool PrintType) const { 532 OS << getName(); 533 } 534 print(raw_ostream & OS)535 void print(raw_ostream &OS) const { 536 // TODO: Only printing VPBB name for now since we only have dot printing 537 // support for VPInstructions/Recipes. 538 printAsOperand(OS, false); 539 } 540 541 /// Return true if it is legal to hoist instructions into this block. isLegalToHoistInto()542 bool isLegalToHoistInto() { 543 // There are currently no constraints that prevent an instruction to be 544 // hoisted into a VPBlockBase. 545 return true; 546 } 547 }; 548 549 /// VPRecipeBase is a base class modeling a sequence of one or more output IR 550 /// instructions. 551 class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> { 552 friend VPBasicBlock; 553 554 private: 555 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). 556 557 /// Each VPRecipe belongs to a single VPBasicBlock. 558 VPBasicBlock *Parent = nullptr; 559 560 public: 561 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase 562 /// that is actually instantiated. Values of this enumeration are kept in the 563 /// SubclassID field of the VPRecipeBase objects. They are used for concrete 564 /// type identification. 565 using VPRecipeTy = enum { 566 VPBlendSC, 567 VPBranchOnMaskSC, 568 VPInstructionSC, 569 VPInterleaveSC, 570 VPPredInstPHISC, 571 VPReplicateSC, 572 VPWidenIntOrFpInductionSC, 573 VPWidenMemoryInstructionSC, 574 VPWidenPHISC, 575 VPWidenSC, 576 }; 577 VPRecipeBase(const unsigned char SC)578 VPRecipeBase(const unsigned char SC) : SubclassID(SC) {} 579 virtual ~VPRecipeBase() = default; 580 581 /// \return an ID for the concrete type of this object. 582 /// This is used to implement the classof checks. This should not be used 583 /// for any other purpose, as the values may change as LLVM evolves. getVPRecipeID()584 unsigned getVPRecipeID() const { return SubclassID; } 585 586 /// \return the VPBasicBlock which this VPRecipe belongs to. getParent()587 VPBasicBlock *getParent() { return Parent; } getParent()588 const VPBasicBlock *getParent() const { return Parent; } 589 590 /// The method which generates the output IR instructions that correspond to 591 /// this VPRecipe, thereby "executing" the VPlan. 592 virtual void execute(struct VPTransformState &State) = 0; 593 594 /// Each recipe prints itself. 595 virtual void print(raw_ostream &O, const Twine &Indent) const = 0; 596 597 /// Insert an unlinked recipe into a basic block immediately before 598 /// the specified recipe. 599 void insertBefore(VPRecipeBase *InsertPos); 600 601 /// This method unlinks 'this' from the containing basic block and deletes it. 602 /// 603 /// \returns an iterator pointing to the element after the erased one 604 iplist<VPRecipeBase>::iterator eraseFromParent(); 605 }; 606 607 /// This is a concrete Recipe that models a single VPlan-level instruction. 608 /// While as any Recipe it may generate a sequence of IR instructions when 609 /// executed, these instructions would always form a single-def expression as 610 /// the VPInstruction is also a single def-use vertex. 611 class VPInstruction : public VPUser, public VPRecipeBase { 612 friend class VPlanHCFGTransforms; 613 friend class VPlanSlp; 614 615 public: 616 /// VPlan opcodes, extending LLVM IR with idiomatics instructions. 617 enum { 618 Not = Instruction::OtherOpsEnd + 1, 619 ICmpULE, 620 SLPLoad, 621 SLPStore, 622 }; 623 624 private: 625 typedef unsigned char OpcodeTy; 626 OpcodeTy Opcode; 627 628 /// Utility method serving execute(): generates a single instance of the 629 /// modeled instruction. 630 void generateInstruction(VPTransformState &State, unsigned Part); 631 632 protected: getUnderlyingInstr()633 Instruction *getUnderlyingInstr() { 634 return cast_or_null<Instruction>(getUnderlyingValue()); 635 } 636 setUnderlyingInstr(Instruction * I)637 void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); } 638 639 public: VPInstruction(unsigned Opcode,ArrayRef<VPValue * > Operands)640 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands) 641 : VPUser(VPValue::VPInstructionSC, Operands), 642 VPRecipeBase(VPRecipeBase::VPInstructionSC), Opcode(Opcode) {} 643 VPInstruction(unsigned Opcode,std::initializer_list<VPValue * > Operands)644 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands) 645 : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands)) {} 646 647 /// Method to support type inquiry through isa, cast, and dyn_cast. classof(const VPValue * V)648 static inline bool classof(const VPValue *V) { 649 return V->getVPValueID() == VPValue::VPInstructionSC; 650 } 651 clone()652 VPInstruction *clone() const { 653 SmallVector<VPValue *, 2> Operands(operands()); 654 return new VPInstruction(Opcode, Operands); 655 } 656 657 /// Method to support type inquiry through isa, cast, and dyn_cast. classof(const VPRecipeBase * R)658 static inline bool classof(const VPRecipeBase *R) { 659 return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC; 660 } 661 getOpcode()662 unsigned getOpcode() const { return Opcode; } 663 664 /// Generate the instruction. 665 /// TODO: We currently execute only per-part unless a specific instance is 666 /// provided. 667 void execute(VPTransformState &State) override; 668 669 /// Print the Recipe. 670 void print(raw_ostream &O, const Twine &Indent) const override; 671 672 /// Print the VPInstruction. 673 void print(raw_ostream &O) const; 674 675 /// Return true if this instruction may modify memory. mayWriteToMemory()676 bool mayWriteToMemory() const { 677 // TODO: we can use attributes of the called function to rule out memory 678 // modifications. 679 return Opcode == Instruction::Store || Opcode == Instruction::Call || 680 Opcode == Instruction::Invoke || Opcode == SLPStore; 681 } 682 }; 683 684 /// VPWidenRecipe is a recipe for producing a copy of vector type for each 685 /// Instruction in its ingredients independently, in order. This recipe covers 686 /// most of the traditional vectorization cases where each ingredient transforms 687 /// into a vectorized version of itself. 688 class VPWidenRecipe : public VPRecipeBase { 689 private: 690 /// Hold the ingredients by pointing to their original BasicBlock location. 691 BasicBlock::iterator Begin; 692 BasicBlock::iterator End; 693 694 public: VPWidenRecipe(Instruction * I)695 VPWidenRecipe(Instruction *I) : VPRecipeBase(VPWidenSC) { 696 End = I->getIterator(); 697 Begin = End++; 698 } 699 700 ~VPWidenRecipe() override = default; 701 702 /// Method to support type inquiry through isa, cast, and dyn_cast. classof(const VPRecipeBase * V)703 static inline bool classof(const VPRecipeBase *V) { 704 return V->getVPRecipeID() == VPRecipeBase::VPWidenSC; 705 } 706 707 /// Produce widened copies of all Ingredients. 708 void execute(VPTransformState &State) override; 709 710 /// Augment the recipe to include Instr, if it lies at its End. appendInstruction(Instruction * Instr)711 bool appendInstruction(Instruction *Instr) { 712 if (End != Instr->getIterator()) 713 return false; 714 End++; 715 return true; 716 } 717 718 /// Print the recipe. 719 void print(raw_ostream &O, const Twine &Indent) const override; 720 }; 721 722 /// A recipe for handling phi nodes of integer and floating-point inductions, 723 /// producing their vector and scalar values. 724 class VPWidenIntOrFpInductionRecipe : public VPRecipeBase { 725 private: 726 PHINode *IV; 727 TruncInst *Trunc; 728 729 public: 730 VPWidenIntOrFpInductionRecipe(PHINode *IV, TruncInst *Trunc = nullptr) VPRecipeBase(VPWidenIntOrFpInductionSC)731 : VPRecipeBase(VPWidenIntOrFpInductionSC), IV(IV), Trunc(Trunc) {} 732 ~VPWidenIntOrFpInductionRecipe() override = default; 733 734 /// Method to support type inquiry through isa, cast, and dyn_cast. classof(const VPRecipeBase * V)735 static inline bool classof(const VPRecipeBase *V) { 736 return V->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC; 737 } 738 739 /// Generate the vectorized and scalarized versions of the phi node as 740 /// needed by their users. 741 void execute(VPTransformState &State) override; 742 743 /// Print the recipe. 744 void print(raw_ostream &O, const Twine &Indent) const override; 745 }; 746 747 /// A recipe for handling all phi nodes except for integer and FP inductions. 748 class VPWidenPHIRecipe : public VPRecipeBase { 749 private: 750 PHINode *Phi; 751 752 public: VPWidenPHIRecipe(PHINode * Phi)753 VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) {} 754 ~VPWidenPHIRecipe() override = default; 755 756 /// Method to support type inquiry through isa, cast, and dyn_cast. classof(const VPRecipeBase * V)757 static inline bool classof(const VPRecipeBase *V) { 758 return V->getVPRecipeID() == VPRecipeBase::VPWidenPHISC; 759 } 760 761 /// Generate the phi/select nodes. 762 void execute(VPTransformState &State) override; 763 764 /// Print the recipe. 765 void print(raw_ostream &O, const Twine &Indent) const override; 766 }; 767 768 /// A recipe for vectorizing a phi-node as a sequence of mask-based select 769 /// instructions. 770 class VPBlendRecipe : public VPRecipeBase { 771 private: 772 PHINode *Phi; 773 774 /// The blend operation is a User of a mask, if not null. 775 std::unique_ptr<VPUser> User; 776 777 public: VPBlendRecipe(PHINode * Phi,ArrayRef<VPValue * > Masks)778 VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Masks) 779 : VPRecipeBase(VPBlendSC), Phi(Phi) { 780 assert((Phi->getNumIncomingValues() == 1 || 781 Phi->getNumIncomingValues() == Masks.size()) && 782 "Expected the same number of incoming values and masks"); 783 if (!Masks.empty()) 784 User.reset(new VPUser(Masks)); 785 } 786 787 /// Method to support type inquiry through isa, cast, and dyn_cast. classof(const VPRecipeBase * V)788 static inline bool classof(const VPRecipeBase *V) { 789 return V->getVPRecipeID() == VPRecipeBase::VPBlendSC; 790 } 791 792 /// Generate the phi/select nodes. 793 void execute(VPTransformState &State) override; 794 795 /// Print the recipe. 796 void print(raw_ostream &O, const Twine &Indent) const override; 797 }; 798 799 /// VPInterleaveRecipe is a recipe for transforming an interleave group of load 800 /// or stores into one wide load/store and shuffles. 801 class VPInterleaveRecipe : public VPRecipeBase { 802 private: 803 const InterleaveGroup<Instruction> *IG; 804 std::unique_ptr<VPUser> User; 805 806 public: VPInterleaveRecipe(const InterleaveGroup<Instruction> * IG,VPValue * Mask)807 VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Mask) 808 : VPRecipeBase(VPInterleaveSC), IG(IG) { 809 if (Mask) // Create a VPInstruction to register as a user of the mask. 810 User.reset(new VPUser({Mask})); 811 } 812 ~VPInterleaveRecipe() override = default; 813 814 /// Method to support type inquiry through isa, cast, and dyn_cast. classof(const VPRecipeBase * V)815 static inline bool classof(const VPRecipeBase *V) { 816 return V->getVPRecipeID() == VPRecipeBase::VPInterleaveSC; 817 } 818 819 /// Generate the wide load or store, and shuffles. 820 void execute(VPTransformState &State) override; 821 822 /// Print the recipe. 823 void print(raw_ostream &O, const Twine &Indent) const override; 824 getInterleaveGroup()825 const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; } 826 }; 827 828 /// VPReplicateRecipe replicates a given instruction producing multiple scalar 829 /// copies of the original scalar type, one per lane, instead of producing a 830 /// single copy of widened type for all lanes. If the instruction is known to be 831 /// uniform only one copy, per lane zero, will be generated. 832 class VPReplicateRecipe : public VPRecipeBase { 833 private: 834 /// The instruction being replicated. 835 Instruction *Ingredient; 836 837 /// Indicator if only a single replica per lane is needed. 838 bool IsUniform; 839 840 /// Indicator if the replicas are also predicated. 841 bool IsPredicated; 842 843 /// Indicator if the scalar values should also be packed into a vector. 844 bool AlsoPack; 845 846 public: 847 VPReplicateRecipe(Instruction *I, bool IsUniform, bool IsPredicated = false) VPRecipeBase(VPReplicateSC)848 : VPRecipeBase(VPReplicateSC), Ingredient(I), IsUniform(IsUniform), 849 IsPredicated(IsPredicated) { 850 // Retain the previous behavior of predicateInstructions(), where an 851 // insert-element of a predicated instruction got hoisted into the 852 // predicated basic block iff it was its only user. This is achieved by 853 // having predicated instructions also pack their values into a vector by 854 // default unless they have a replicated user which uses their scalar value. 855 AlsoPack = IsPredicated && !I->use_empty(); 856 } 857 858 ~VPReplicateRecipe() override = default; 859 860 /// Method to support type inquiry through isa, cast, and dyn_cast. classof(const VPRecipeBase * V)861 static inline bool classof(const VPRecipeBase *V) { 862 return V->getVPRecipeID() == VPRecipeBase::VPReplicateSC; 863 } 864 865 /// Generate replicas of the desired Ingredient. Replicas will be generated 866 /// for all parts and lanes unless a specific part and lane are specified in 867 /// the \p State. 868 void execute(VPTransformState &State) override; 869 setAlsoPack(bool Pack)870 void setAlsoPack(bool Pack) { AlsoPack = Pack; } 871 872 /// Print the recipe. 873 void print(raw_ostream &O, const Twine &Indent) const override; 874 }; 875 876 /// A recipe for generating conditional branches on the bits of a mask. 877 class VPBranchOnMaskRecipe : public VPRecipeBase { 878 private: 879 std::unique_ptr<VPUser> User; 880 881 public: VPBranchOnMaskRecipe(VPValue * BlockInMask)882 VPBranchOnMaskRecipe(VPValue *BlockInMask) : VPRecipeBase(VPBranchOnMaskSC) { 883 if (BlockInMask) // nullptr means all-one mask. 884 User.reset(new VPUser({BlockInMask})); 885 } 886 887 /// Method to support type inquiry through isa, cast, and dyn_cast. classof(const VPRecipeBase * V)888 static inline bool classof(const VPRecipeBase *V) { 889 return V->getVPRecipeID() == VPRecipeBase::VPBranchOnMaskSC; 890 } 891 892 /// Generate the extraction of the appropriate bit from the block mask and the 893 /// conditional branch. 894 void execute(VPTransformState &State) override; 895 896 /// Print the recipe. print(raw_ostream & O,const Twine & Indent)897 void print(raw_ostream &O, const Twine &Indent) const override { 898 O << " +\n" << Indent << "\"BRANCH-ON-MASK "; 899 if (User) 900 O << *User->getOperand(0); 901 else 902 O << " All-One"; 903 O << "\\l\""; 904 } 905 }; 906 907 /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when 908 /// control converges back from a Branch-on-Mask. The phi nodes are needed in 909 /// order to merge values that are set under such a branch and feed their uses. 910 /// The phi nodes can be scalar or vector depending on the users of the value. 911 /// This recipe works in concert with VPBranchOnMaskRecipe. 912 class VPPredInstPHIRecipe : public VPRecipeBase { 913 private: 914 Instruction *PredInst; 915 916 public: 917 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi 918 /// nodes after merging back from a Branch-on-Mask. VPPredInstPHIRecipe(Instruction * PredInst)919 VPPredInstPHIRecipe(Instruction *PredInst) 920 : VPRecipeBase(VPPredInstPHISC), PredInst(PredInst) {} 921 ~VPPredInstPHIRecipe() override = default; 922 923 /// Method to support type inquiry through isa, cast, and dyn_cast. classof(const VPRecipeBase * V)924 static inline bool classof(const VPRecipeBase *V) { 925 return V->getVPRecipeID() == VPRecipeBase::VPPredInstPHISC; 926 } 927 928 /// Generates phi nodes for live-outs as needed to retain SSA form. 929 void execute(VPTransformState &State) override; 930 931 /// Print the recipe. 932 void print(raw_ostream &O, const Twine &Indent) const override; 933 }; 934 935 /// A Recipe for widening load/store operations. 936 /// TODO: We currently execute only per-part unless a specific instance is 937 /// provided. 938 class VPWidenMemoryInstructionRecipe : public VPRecipeBase { 939 private: 940 Instruction &Instr; 941 std::unique_ptr<VPUser> User; 942 943 public: VPWidenMemoryInstructionRecipe(Instruction & Instr,VPValue * Mask)944 VPWidenMemoryInstructionRecipe(Instruction &Instr, VPValue *Mask) 945 : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr) { 946 if (Mask) // Create a VPInstruction to register as a user of the mask. 947 User.reset(new VPUser({Mask})); 948 } 949 950 /// Method to support type inquiry through isa, cast, and dyn_cast. classof(const VPRecipeBase * V)951 static inline bool classof(const VPRecipeBase *V) { 952 return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC; 953 } 954 955 /// Generate the wide load/store. 956 void execute(VPTransformState &State) override; 957 958 /// Print the recipe. 959 void print(raw_ostream &O, const Twine &Indent) const override; 960 }; 961 962 /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It 963 /// holds a sequence of zero or more VPRecipe's each representing a sequence of 964 /// output IR instructions. 965 class VPBasicBlock : public VPBlockBase { 966 public: 967 using RecipeListTy = iplist<VPRecipeBase>; 968 969 private: 970 /// The VPRecipes held in the order of output instructions to generate. 971 RecipeListTy Recipes; 972 973 public: 974 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr) 975 : VPBlockBase(VPBasicBlockSC, Name.str()) { 976 if (Recipe) 977 appendRecipe(Recipe); 978 } 979 ~VPBasicBlock()980 ~VPBasicBlock() override { Recipes.clear(); } 981 982 /// Instruction iterators... 983 using iterator = RecipeListTy::iterator; 984 using const_iterator = RecipeListTy::const_iterator; 985 using reverse_iterator = RecipeListTy::reverse_iterator; 986 using const_reverse_iterator = RecipeListTy::const_reverse_iterator; 987 988 //===--------------------------------------------------------------------===// 989 /// Recipe iterator methods 990 /// begin()991 inline iterator begin() { return Recipes.begin(); } begin()992 inline const_iterator begin() const { return Recipes.begin(); } end()993 inline iterator end() { return Recipes.end(); } end()994 inline const_iterator end() const { return Recipes.end(); } 995 rbegin()996 inline reverse_iterator rbegin() { return Recipes.rbegin(); } rbegin()997 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); } rend()998 inline reverse_iterator rend() { return Recipes.rend(); } rend()999 inline const_reverse_iterator rend() const { return Recipes.rend(); } 1000 size()1001 inline size_t size() const { return Recipes.size(); } empty()1002 inline bool empty() const { return Recipes.empty(); } front()1003 inline const VPRecipeBase &front() const { return Recipes.front(); } front()1004 inline VPRecipeBase &front() { return Recipes.front(); } back()1005 inline const VPRecipeBase &back() const { return Recipes.back(); } back()1006 inline VPRecipeBase &back() { return Recipes.back(); } 1007 1008 /// Returns a reference to the list of recipes. getRecipeList()1009 RecipeListTy &getRecipeList() { return Recipes; } 1010 1011 /// Returns a pointer to a member of the recipe list. getSublistAccess(VPRecipeBase *)1012 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) { 1013 return &VPBasicBlock::Recipes; 1014 } 1015 1016 /// Method to support type inquiry through isa, cast, and dyn_cast. classof(const VPBlockBase * V)1017 static inline bool classof(const VPBlockBase *V) { 1018 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC; 1019 } 1020 insert(VPRecipeBase * Recipe,iterator InsertPt)1021 void insert(VPRecipeBase *Recipe, iterator InsertPt) { 1022 assert(Recipe && "No recipe to append."); 1023 assert(!Recipe->Parent && "Recipe already in VPlan"); 1024 Recipe->Parent = this; 1025 Recipes.insert(InsertPt, Recipe); 1026 } 1027 1028 /// Augment the existing recipes of a VPBasicBlock with an additional 1029 /// \p Recipe as the last recipe. appendRecipe(VPRecipeBase * Recipe)1030 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); } 1031 1032 /// The method which generates the output IR instructions that correspond to 1033 /// this VPBasicBlock, thereby "executing" the VPlan. 1034 void execute(struct VPTransformState *State) override; 1035 1036 private: 1037 /// Create an IR BasicBlock to hold the output instructions generated by this 1038 /// VPBasicBlock, and return it. Update the CFGState accordingly. 1039 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG); 1040 }; 1041 1042 /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks 1043 /// which form a Single-Entry-Single-Exit subgraph of the output IR CFG. 1044 /// A VPRegionBlock may indicate that its contents are to be replicated several 1045 /// times. This is designed to support predicated scalarization, in which a 1046 /// scalar if-then code structure needs to be generated VF * UF times. Having 1047 /// this replication indicator helps to keep a single model for multiple 1048 /// candidate VF's. The actual replication takes place only once the desired VF 1049 /// and UF have been determined. 1050 class VPRegionBlock : public VPBlockBase { 1051 private: 1052 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock. 1053 VPBlockBase *Entry; 1054 1055 /// Hold the Single Exit of the SESE region modelled by the VPRegionBlock. 1056 VPBlockBase *Exit; 1057 1058 /// An indicator whether this region is to generate multiple replicated 1059 /// instances of output IR corresponding to its VPBlockBases. 1060 bool IsReplicator; 1061 1062 public: 1063 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exit, 1064 const std::string &Name = "", bool IsReplicator = false) VPBlockBase(VPRegionBlockSC,Name)1065 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exit(Exit), 1066 IsReplicator(IsReplicator) { 1067 assert(Entry->getPredecessors().empty() && "Entry block has predecessors."); 1068 assert(Exit->getSuccessors().empty() && "Exit block has successors."); 1069 Entry->setParent(this); 1070 Exit->setParent(this); 1071 } 1072 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false) VPBlockBase(VPRegionBlockSC,Name)1073 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exit(nullptr), 1074 IsReplicator(IsReplicator) {} 1075 ~VPRegionBlock()1076 ~VPRegionBlock() override { 1077 if (Entry) 1078 deleteCFG(Entry); 1079 } 1080 1081 /// Method to support type inquiry through isa, cast, and dyn_cast. classof(const VPBlockBase * V)1082 static inline bool classof(const VPBlockBase *V) { 1083 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC; 1084 } 1085 getEntry()1086 const VPBlockBase *getEntry() const { return Entry; } getEntry()1087 VPBlockBase *getEntry() { return Entry; } 1088 1089 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p 1090 /// EntryBlock must have no predecessors. setEntry(VPBlockBase * EntryBlock)1091 void setEntry(VPBlockBase *EntryBlock) { 1092 assert(EntryBlock->getPredecessors().empty() && 1093 "Entry block cannot have predecessors."); 1094 Entry = EntryBlock; 1095 EntryBlock->setParent(this); 1096 } 1097 1098 // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a 1099 // specific interface of llvm::Function, instead of using 1100 // GraphTraints::getEntryNode. We should add a new template parameter to 1101 // DominatorTreeBase representing the Graph type. front()1102 VPBlockBase &front() const { return *Entry; } 1103 getExit()1104 const VPBlockBase *getExit() const { return Exit; } getExit()1105 VPBlockBase *getExit() { return Exit; } 1106 1107 /// Set \p ExitBlock as the exit VPBlockBase of this VPRegionBlock. \p 1108 /// ExitBlock must have no successors. setExit(VPBlockBase * ExitBlock)1109 void setExit(VPBlockBase *ExitBlock) { 1110 assert(ExitBlock->getSuccessors().empty() && 1111 "Exit block cannot have successors."); 1112 Exit = ExitBlock; 1113 ExitBlock->setParent(this); 1114 } 1115 1116 /// An indicator whether this region is to generate multiple replicated 1117 /// instances of output IR corresponding to its VPBlockBases. isReplicator()1118 bool isReplicator() const { return IsReplicator; } 1119 1120 /// The method which generates the output IR instructions that correspond to 1121 /// this VPRegionBlock, thereby "executing" the VPlan. 1122 void execute(struct VPTransformState *State) override; 1123 }; 1124 1125 /// VPlan models a candidate for vectorization, encoding various decisions take 1126 /// to produce efficient output IR, including which branches, basic-blocks and 1127 /// output IR instructions to generate, and their cost. VPlan holds a 1128 /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry 1129 /// VPBlock. 1130 class VPlan { 1131 friend class VPlanPrinter; 1132 1133 private: 1134 /// Hold the single entry to the Hierarchical CFG of the VPlan. 1135 VPBlockBase *Entry; 1136 1137 /// Holds the VFs applicable to this VPlan. 1138 SmallSet<unsigned, 2> VFs; 1139 1140 /// Holds the name of the VPlan, for printing. 1141 std::string Name; 1142 1143 /// Holds all the external definitions created for this VPlan. 1144 // TODO: Introduce a specific representation for external definitions in 1145 // VPlan. External definitions must be immutable and hold a pointer to its 1146 // underlying IR that will be used to implement its structural comparison 1147 // (operators '==' and '<'). 1148 SmallPtrSet<VPValue *, 16> VPExternalDefs; 1149 1150 /// Represents the backedge taken count of the original loop, for folding 1151 /// the tail. 1152 VPValue *BackedgeTakenCount = nullptr; 1153 1154 /// Holds a mapping between Values and their corresponding VPValue inside 1155 /// VPlan. 1156 Value2VPValueTy Value2VPValue; 1157 1158 /// Holds the VPLoopInfo analysis for this VPlan. 1159 VPLoopInfo VPLInfo; 1160 1161 /// Holds the condition bit values built during VPInstruction to VPRecipe transformation. 1162 SmallVector<VPValue *, 4> VPCBVs; 1163 1164 public: Entry(Entry)1165 VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {} 1166 ~VPlan()1167 ~VPlan() { 1168 if (Entry) 1169 VPBlockBase::deleteCFG(Entry); 1170 for (auto &MapEntry : Value2VPValue) 1171 if (MapEntry.second != BackedgeTakenCount) 1172 delete MapEntry.second; 1173 if (BackedgeTakenCount) 1174 delete BackedgeTakenCount; // Delete once, if in Value2VPValue or not. 1175 for (VPValue *Def : VPExternalDefs) 1176 delete Def; 1177 for (VPValue *CBV : VPCBVs) 1178 delete CBV; 1179 } 1180 1181 /// Generate the IR code for this VPlan. 1182 void execute(struct VPTransformState *State); 1183 getEntry()1184 VPBlockBase *getEntry() { return Entry; } getEntry()1185 const VPBlockBase *getEntry() const { return Entry; } 1186 setEntry(VPBlockBase * Block)1187 VPBlockBase *setEntry(VPBlockBase *Block) { return Entry = Block; } 1188 1189 /// The backedge taken count of the original loop. getOrCreateBackedgeTakenCount()1190 VPValue *getOrCreateBackedgeTakenCount() { 1191 if (!BackedgeTakenCount) 1192 BackedgeTakenCount = new VPValue(); 1193 return BackedgeTakenCount; 1194 } 1195 addVF(unsigned VF)1196 void addVF(unsigned VF) { VFs.insert(VF); } 1197 hasVF(unsigned VF)1198 bool hasVF(unsigned VF) { return VFs.count(VF); } 1199 getName()1200 const std::string &getName() const { return Name; } 1201 setName(const Twine & newName)1202 void setName(const Twine &newName) { Name = newName.str(); } 1203 1204 /// Add \p VPVal to the pool of external definitions if it's not already 1205 /// in the pool. addExternalDef(VPValue * VPVal)1206 void addExternalDef(VPValue *VPVal) { 1207 VPExternalDefs.insert(VPVal); 1208 } 1209 1210 /// Add \p CBV to the vector of condition bit values. addCBV(VPValue * CBV)1211 void addCBV(VPValue *CBV) { 1212 VPCBVs.push_back(CBV); 1213 } 1214 addVPValue(Value * V)1215 void addVPValue(Value *V) { 1216 assert(V && "Trying to add a null Value to VPlan"); 1217 assert(!Value2VPValue.count(V) && "Value already exists in VPlan"); 1218 Value2VPValue[V] = new VPValue(); 1219 } 1220 getVPValue(Value * V)1221 VPValue *getVPValue(Value *V) { 1222 assert(V && "Trying to get the VPValue of a null Value"); 1223 assert(Value2VPValue.count(V) && "Value does not exist in VPlan"); 1224 return Value2VPValue[V]; 1225 } 1226 1227 /// Return the VPLoopInfo analysis for this VPlan. getVPLoopInfo()1228 VPLoopInfo &getVPLoopInfo() { return VPLInfo; } getVPLoopInfo()1229 const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; } 1230 1231 private: 1232 /// Add to the given dominator tree the header block and every new basic block 1233 /// that was created between it and the latch block, inclusive. 1234 static void updateDominatorTree(DominatorTree *DT, 1235 BasicBlock *LoopPreHeaderBB, 1236 BasicBlock *LoopLatchBB); 1237 }; 1238 1239 /// VPlanPrinter prints a given VPlan to a given output stream. The printing is 1240 /// indented and follows the dot format. 1241 class VPlanPrinter { 1242 friend inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan); 1243 friend inline raw_ostream &operator<<(raw_ostream &OS, 1244 const struct VPlanIngredient &I); 1245 1246 private: 1247 raw_ostream &OS; 1248 VPlan &Plan; 1249 unsigned Depth; 1250 unsigned TabWidth = 2; 1251 std::string Indent; 1252 unsigned BID = 0; 1253 SmallDenseMap<const VPBlockBase *, unsigned> BlockID; 1254 VPlanPrinter(raw_ostream & O,VPlan & P)1255 VPlanPrinter(raw_ostream &O, VPlan &P) : OS(O), Plan(P) {} 1256 1257 /// Handle indentation. bumpIndent(int b)1258 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); } 1259 1260 /// Print a given \p Block of the Plan. 1261 void dumpBlock(const VPBlockBase *Block); 1262 1263 /// Print the information related to the CFG edges going out of a given 1264 /// \p Block, followed by printing the successor blocks themselves. 1265 void dumpEdges(const VPBlockBase *Block); 1266 1267 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing 1268 /// its successor blocks. 1269 void dumpBasicBlock(const VPBasicBlock *BasicBlock); 1270 1271 /// Print a given \p Region of the Plan. 1272 void dumpRegion(const VPRegionBlock *Region); 1273 getOrCreateBID(const VPBlockBase * Block)1274 unsigned getOrCreateBID(const VPBlockBase *Block) { 1275 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++; 1276 } 1277 1278 const Twine getOrCreateName(const VPBlockBase *Block); 1279 1280 const Twine getUID(const VPBlockBase *Block); 1281 1282 /// Print the information related to a CFG edge between two VPBlockBases. 1283 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden, 1284 const Twine &Label); 1285 1286 void dump(); 1287 1288 static void printAsIngredient(raw_ostream &O, Value *V); 1289 }; 1290 1291 struct VPlanIngredient { 1292 Value *V; 1293 VPlanIngredientVPlanIngredient1294 VPlanIngredient(Value *V) : V(V) {} 1295 }; 1296 1297 inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) { 1298 VPlanPrinter::printAsIngredient(OS, I.V); 1299 return OS; 1300 } 1301 1302 inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan) { 1303 VPlanPrinter Printer(OS, Plan); 1304 Printer.dump(); 1305 return OS; 1306 } 1307 1308 //===----------------------------------------------------------------------===// 1309 // GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs // 1310 //===----------------------------------------------------------------------===// 1311 1312 // The following set of template specializations implement GraphTraits to treat 1313 // any VPBlockBase as a node in a graph of VPBlockBases. It's important to note 1314 // that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the 1315 // VPBlockBase is a VPRegionBlock, this specialization provides access to its 1316 // successors/predecessors but not to the blocks inside the region. 1317 1318 template <> struct GraphTraits<VPBlockBase *> { 1319 using NodeRef = VPBlockBase *; 1320 using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator; 1321 1322 static NodeRef getEntryNode(NodeRef N) { return N; } 1323 1324 static inline ChildIteratorType child_begin(NodeRef N) { 1325 return N->getSuccessors().begin(); 1326 } 1327 1328 static inline ChildIteratorType child_end(NodeRef N) { 1329 return N->getSuccessors().end(); 1330 } 1331 }; 1332 1333 template <> struct GraphTraits<const VPBlockBase *> { 1334 using NodeRef = const VPBlockBase *; 1335 using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::const_iterator; 1336 1337 static NodeRef getEntryNode(NodeRef N) { return N; } 1338 1339 static inline ChildIteratorType child_begin(NodeRef N) { 1340 return N->getSuccessors().begin(); 1341 } 1342 1343 static inline ChildIteratorType child_end(NodeRef N) { 1344 return N->getSuccessors().end(); 1345 } 1346 }; 1347 1348 // Inverse order specialization for VPBasicBlocks. Predecessors are used instead 1349 // of successors for the inverse traversal. 1350 template <> struct GraphTraits<Inverse<VPBlockBase *>> { 1351 using NodeRef = VPBlockBase *; 1352 using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator; 1353 1354 static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; } 1355 1356 static inline ChildIteratorType child_begin(NodeRef N) { 1357 return N->getPredecessors().begin(); 1358 } 1359 1360 static inline ChildIteratorType child_end(NodeRef N) { 1361 return N->getPredecessors().end(); 1362 } 1363 }; 1364 1365 // The following set of template specializations implement GraphTraits to 1366 // treat VPRegionBlock as a graph and recurse inside its nodes. It's important 1367 // to note that the blocks inside the VPRegionBlock are treated as VPBlockBases 1368 // (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so 1369 // there won't be automatic recursion into other VPBlockBases that turn to be 1370 // VPRegionBlocks. 1371 1372 template <> 1373 struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> { 1374 using GraphRef = VPRegionBlock *; 1375 using nodes_iterator = df_iterator<NodeRef>; 1376 1377 static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } 1378 1379 static nodes_iterator nodes_begin(GraphRef N) { 1380 return nodes_iterator::begin(N->getEntry()); 1381 } 1382 1383 static nodes_iterator nodes_end(GraphRef N) { 1384 // df_iterator::end() returns an empty iterator so the node used doesn't 1385 // matter. 1386 return nodes_iterator::end(N); 1387 } 1388 }; 1389 1390 template <> 1391 struct GraphTraits<const VPRegionBlock *> 1392 : public GraphTraits<const VPBlockBase *> { 1393 using GraphRef = const VPRegionBlock *; 1394 using nodes_iterator = df_iterator<NodeRef>; 1395 1396 static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } 1397 1398 static nodes_iterator nodes_begin(GraphRef N) { 1399 return nodes_iterator::begin(N->getEntry()); 1400 } 1401 1402 static nodes_iterator nodes_end(GraphRef N) { 1403 // df_iterator::end() returns an empty iterator so the node used doesn't 1404 // matter. 1405 return nodes_iterator::end(N); 1406 } 1407 }; 1408 1409 template <> 1410 struct GraphTraits<Inverse<VPRegionBlock *>> 1411 : public GraphTraits<Inverse<VPBlockBase *>> { 1412 using GraphRef = VPRegionBlock *; 1413 using nodes_iterator = df_iterator<NodeRef>; 1414 1415 static NodeRef getEntryNode(Inverse<GraphRef> N) { 1416 return N.Graph->getExit(); 1417 } 1418 1419 static nodes_iterator nodes_begin(GraphRef N) { 1420 return nodes_iterator::begin(N->getExit()); 1421 } 1422 1423 static nodes_iterator nodes_end(GraphRef N) { 1424 // df_iterator::end() returns an empty iterator so the node used doesn't 1425 // matter. 1426 return nodes_iterator::end(N); 1427 } 1428 }; 1429 1430 //===----------------------------------------------------------------------===// 1431 // VPlan Utilities 1432 //===----------------------------------------------------------------------===// 1433 1434 /// Class that provides utilities for VPBlockBases in VPlan. 1435 class VPBlockUtils { 1436 public: 1437 VPBlockUtils() = delete; 1438 1439 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p 1440 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p 1441 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. If \p BlockPtr 1442 /// has more than one successor, its conditional bit is propagated to \p 1443 /// NewBlock. \p NewBlock must have neither successors nor predecessors. 1444 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) { 1445 assert(NewBlock->getSuccessors().empty() && 1446 "Can't insert new block with successors."); 1447 // TODO: move successors from BlockPtr to NewBlock when this functionality 1448 // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr 1449 // already has successors. 1450 BlockPtr->setOneSuccessor(NewBlock); 1451 NewBlock->setPredecessors({BlockPtr}); 1452 NewBlock->setParent(BlockPtr->getParent()); 1453 } 1454 1455 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p 1456 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p 1457 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr 1458 /// parent to \p IfTrue and \p IfFalse. \p Condition is set as the successor 1459 /// selector. \p BlockPtr must have no successors and \p IfTrue and \p IfFalse 1460 /// must have neither successors nor predecessors. 1461 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, 1462 VPValue *Condition, VPBlockBase *BlockPtr) { 1463 assert(IfTrue->getSuccessors().empty() && 1464 "Can't insert IfTrue with successors."); 1465 assert(IfFalse->getSuccessors().empty() && 1466 "Can't insert IfFalse with successors."); 1467 BlockPtr->setTwoSuccessors(IfTrue, IfFalse, Condition); 1468 IfTrue->setPredecessors({BlockPtr}); 1469 IfFalse->setPredecessors({BlockPtr}); 1470 IfTrue->setParent(BlockPtr->getParent()); 1471 IfFalse->setParent(BlockPtr->getParent()); 1472 } 1473 1474 /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to 1475 /// the successors of \p From and \p From to the predecessors of \p To. Both 1476 /// VPBlockBases must have the same parent, which can be null. Both 1477 /// VPBlockBases can be already connected to other VPBlockBases. 1478 static void connectBlocks(VPBlockBase *From, VPBlockBase *To) { 1479 assert((From->getParent() == To->getParent()) && 1480 "Can't connect two block with different parents"); 1481 assert(From->getNumSuccessors() < 2 && 1482 "Blocks can't have more than two successors."); 1483 From->appendSuccessor(To); 1484 To->appendPredecessor(From); 1485 } 1486 1487 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To 1488 /// from the successors of \p From and \p From from the predecessors of \p To. 1489 static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) { 1490 assert(To && "Successor to disconnect is null."); 1491 From->removeSuccessor(To); 1492 To->removePredecessor(From); 1493 } 1494 }; 1495 1496 class VPInterleavedAccessInfo { 1497 private: 1498 DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *> 1499 InterleaveGroupMap; 1500 1501 /// Type for mapping of instruction based interleave groups to VPInstruction 1502 /// interleave groups 1503 using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *, 1504 InterleaveGroup<VPInstruction> *>; 1505 1506 /// Recursively \p Region and populate VPlan based interleave groups based on 1507 /// \p IAI. 1508 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New, 1509 InterleavedAccessInfo &IAI); 1510 /// Recursively traverse \p Block and populate VPlan based interleave groups 1511 /// based on \p IAI. 1512 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, 1513 InterleavedAccessInfo &IAI); 1514 1515 public: 1516 VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI); 1517 1518 ~VPInterleavedAccessInfo() { 1519 SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet; 1520 // Avoid releasing a pointer twice. 1521 for (auto &I : InterleaveGroupMap) 1522 DelSet.insert(I.second); 1523 for (auto *Ptr : DelSet) 1524 delete Ptr; 1525 } 1526 1527 /// Get the interleave group that \p Instr belongs to. 1528 /// 1529 /// \returns nullptr if doesn't have such group. 1530 InterleaveGroup<VPInstruction> * 1531 getInterleaveGroup(VPInstruction *Instr) const { 1532 if (InterleaveGroupMap.count(Instr)) 1533 return InterleaveGroupMap.find(Instr)->second; 1534 return nullptr; 1535 } 1536 }; 1537 1538 /// Class that maps (parts of) an existing VPlan to trees of combined 1539 /// VPInstructions. 1540 class VPlanSlp { 1541 private: 1542 enum class OpMode { Failed, Load, Opcode }; 1543 1544 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as 1545 /// DenseMap keys. 1546 struct BundleDenseMapInfo { 1547 static SmallVector<VPValue *, 4> getEmptyKey() { 1548 return {reinterpret_cast<VPValue *>(-1)}; 1549 } 1550 1551 static SmallVector<VPValue *, 4> getTombstoneKey() { 1552 return {reinterpret_cast<VPValue *>(-2)}; 1553 } 1554 1555 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) { 1556 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end())); 1557 } 1558 1559 static bool isEqual(const SmallVector<VPValue *, 4> &LHS, 1560 const SmallVector<VPValue *, 4> &RHS) { 1561 return LHS == RHS; 1562 } 1563 }; 1564 1565 /// Mapping of values in the original VPlan to a combined VPInstruction. 1566 DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo> 1567 BundleToCombined; 1568 1569 VPInterleavedAccessInfo &IAI; 1570 1571 /// Basic block to operate on. For now, only instructions in a single BB are 1572 /// considered. 1573 const VPBasicBlock &BB; 1574 1575 /// Indicates whether we managed to combine all visited instructions or not. 1576 bool CompletelySLP = true; 1577 1578 /// Width of the widest combined bundle in bits. 1579 unsigned WidestBundleBits = 0; 1580 1581 using MultiNodeOpTy = 1582 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>; 1583 1584 // Input operand bundles for the current multi node. Each multi node operand 1585 // bundle contains values not matching the multi node's opcode. They will 1586 // be reordered in reorderMultiNodeOps, once we completed building a 1587 // multi node. 1588 SmallVector<MultiNodeOpTy, 4> MultiNodeOps; 1589 1590 /// Indicates whether we are building a multi node currently. 1591 bool MultiNodeActive = false; 1592 1593 /// Check if we can vectorize Operands together. 1594 bool areVectorizable(ArrayRef<VPValue *> Operands) const; 1595 1596 /// Add combined instruction \p New for the bundle \p Operands. 1597 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New); 1598 1599 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now. 1600 VPInstruction *markFailed(); 1601 1602 /// Reorder operands in the multi node to maximize sequential memory access 1603 /// and commutative operations. 1604 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps(); 1605 1606 /// Choose the best candidate to use for the lane after \p Last. The set of 1607 /// candidates to choose from are values with an opcode matching \p Last's 1608 /// or loads consecutive to \p Last. 1609 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last, 1610 SmallPtrSetImpl<VPValue *> &Candidates, 1611 VPInterleavedAccessInfo &IAI); 1612 1613 /// Print bundle \p Values to dbgs(). 1614 void dumpBundle(ArrayRef<VPValue *> Values); 1615 1616 public: 1617 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {} 1618 1619 ~VPlanSlp() { 1620 for (auto &KV : BundleToCombined) 1621 delete KV.second; 1622 } 1623 1624 /// Tries to build an SLP tree rooted at \p Operands and returns a 1625 /// VPInstruction combining \p Operands, if they can be combined. 1626 VPInstruction *buildGraph(ArrayRef<VPValue *> Operands); 1627 1628 /// Return the width of the widest combined bundle in bits. 1629 unsigned getWidestBundleBits() const { return WidestBundleBits; } 1630 1631 /// Return true if all visited instruction can be combined. 1632 bool isCompletelySLP() const { return CompletelySLP; } 1633 }; 1634 } // end namespace llvm 1635 1636 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 1637