1 //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file contains the declarations of the Vectorization Plan base classes:
11 /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12 ///    VPBlockBase, together implementing a Hierarchical CFG;
13 /// 2. Specializations of GraphTraits that allow VPBlockBase graphs to be
14 ///    treated as proper graphs for generic algorithms;
15 /// 3. Pure virtual VPRecipeBase serving as the base class for recipes contained
16 ///    within VPBasicBlocks;
17 /// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18 ///    instruction;
19 /// 5. The VPlan class holding a candidate for vectorization;
20 /// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21 /// These are documented in docs/VectorizationPlan.rst.
22 //
23 //===----------------------------------------------------------------------===//
24 
25 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27 
28 #include "VPlanLoopInfo.h"
29 #include "VPlanValue.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/DepthFirstIterator.h"
32 #include "llvm/ADT/GraphTraits.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/SmallBitVector.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Twine.h"
39 #include "llvm/ADT/ilist.h"
40 #include "llvm/ADT/ilist_node.h"
41 #include "llvm/Analysis/VectorUtils.h"
42 #include "llvm/IR/IRBuilder.h"
43 #include <algorithm>
44 #include <cassert>
45 #include <cstddef>
46 #include <map>
47 #include <string>
48 
49 namespace llvm {
50 
51 class BasicBlock;
52 class DominatorTree;
53 class InnerLoopVectorizer;
54 class LoopInfo;
55 class raw_ostream;
56 class RecurrenceDescriptor;
57 class Value;
58 class VPBasicBlock;
59 class VPRegionBlock;
60 class VPlan;
61 class VPlanSlp;
62 
63 /// Returns a calculation for the total number of elements for a given \p VF.
64 /// For fixed width vectors this value is a constant, whereas for scalable
65 /// vectors it is an expression determined at runtime.
66 Value *getRuntimeVF(IRBuilder<> &B, Type *Ty, ElementCount VF);
67 
68 /// A range of powers-of-2 vectorization factors with fixed start and
69 /// adjustable end. The range includes start and excludes end, e.g.,:
70 /// [1, 9) = {1, 2, 4, 8}
71 struct VFRange {
72   // A power of 2.
73   const ElementCount Start;
74 
75   // Need not be a power of 2. If End <= Start range is empty.
76   ElementCount End;
77 
78   bool isEmpty() const {
79     return End.getKnownMinValue() <= Start.getKnownMinValue();
80   }
81 
82   VFRange(const ElementCount &Start, const ElementCount &End)
83       : Start(Start), End(End) {
84     assert(Start.isScalable() == End.isScalable() &&
85            "Both Start and End should have the same scalable flag");
86     assert(isPowerOf2_32(Start.getKnownMinValue()) &&
87            "Expected Start to be a power of 2");
88   }
89 };
90 
91 using VPlanPtr = std::unique_ptr<VPlan>;
92 
93 /// In what follows, the term "input IR" refers to code that is fed into the
94 /// vectorizer whereas the term "output IR" refers to code that is generated by
95 /// the vectorizer.
96 
97 /// VPLane provides a way to access lanes in both fixed width and scalable
98 /// vectors, where for the latter the lane index sometimes needs calculating
99 /// as a runtime expression.
100 class VPLane {
101 public:
102   /// Kind describes how to interpret Lane.
103   enum class Kind : uint8_t {
104     /// For First, Lane is the index into the first N elements of a
105     /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
106     First,
107     /// For ScalableLast, Lane is the offset from the start of the last
108     /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
109     /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
110     /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
111     ScalableLast
112   };
113 
114 private:
115   /// in [0..VF)
116   unsigned Lane;
117 
118   /// Indicates how the Lane should be interpreted, as described above.
119   Kind LaneKind;
120 
121 public:
122   VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
123 
124   static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
125 
126   static VPLane getLastLaneForVF(const ElementCount &VF) {
127     unsigned LaneOffset = VF.getKnownMinValue() - 1;
128     Kind LaneKind;
129     if (VF.isScalable())
130       // In this case 'LaneOffset' refers to the offset from the start of the
131       // last subvector with VF.getKnownMinValue() elements.
132       LaneKind = VPLane::Kind::ScalableLast;
133     else
134       LaneKind = VPLane::Kind::First;
135     return VPLane(LaneOffset, LaneKind);
136   }
137 
138   /// Returns a compile-time known value for the lane index and asserts if the
139   /// lane can only be calculated at runtime.
140   unsigned getKnownLane() const {
141     assert(LaneKind == Kind::First);
142     return Lane;
143   }
144 
145   /// Returns an expression describing the lane index that can be used at
146   /// runtime.
147   Value *getAsRuntimeExpr(IRBuilder<> &Builder, const ElementCount &VF) const;
148 
149   /// Returns the Kind of lane offset.
150   Kind getKind() const { return LaneKind; }
151 
152   /// Returns true if this is the first lane of the whole vector.
153   bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
154 
155   /// Maps the lane to a cache index based on \p VF.
156   unsigned mapToCacheIndex(const ElementCount &VF) const {
157     switch (LaneKind) {
158     case VPLane::Kind::ScalableLast:
159       assert(VF.isScalable() && Lane < VF.getKnownMinValue());
160       return VF.getKnownMinValue() + Lane;
161     default:
162       assert(Lane < VF.getKnownMinValue());
163       return Lane;
164     }
165   }
166 
167   /// Returns the maxmimum number of lanes that we are able to consider
168   /// caching for \p VF.
169   static unsigned getNumCachedLanes(const ElementCount &VF) {
170     return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
171   }
172 };
173 
174 /// VPIteration represents a single point in the iteration space of the output
175 /// (vectorized and/or unrolled) IR loop.
176 struct VPIteration {
177   /// in [0..UF)
178   unsigned Part;
179 
180   VPLane Lane;
181 
182   VPIteration(unsigned Part, unsigned Lane,
183               VPLane::Kind Kind = VPLane::Kind::First)
184       : Part(Part), Lane(Lane, Kind) {}
185 
186   VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}
187 
188   bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }
189 };
190 
191 /// VPTransformState holds information passed down when "executing" a VPlan,
192 /// needed for generating the output IR.
193 struct VPTransformState {
194   VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
195                    DominatorTree *DT, IRBuilder<> &Builder,
196                    InnerLoopVectorizer *ILV, VPlan *Plan)
197       : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder), ILV(ILV),
198         Plan(Plan) {}
199 
200   /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
201   ElementCount VF;
202   unsigned UF;
203 
204   /// Hold the indices to generate specific scalar instructions. Null indicates
205   /// that all instances are to be generated, using either scalar or vector
206   /// instructions.
207   Optional<VPIteration> Instance;
208 
209   struct DataState {
210     /// A type for vectorized values in the new loop. Each value from the
211     /// original loop, when vectorized, is represented by UF vector values in
212     /// the new unrolled loop, where UF is the unroll factor.
213     typedef SmallVector<Value *, 2> PerPartValuesTy;
214 
215     DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
216 
217     using ScalarsPerPartValuesTy = SmallVector<SmallVector<Value *, 4>, 2>;
218     DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars;
219   } Data;
220 
221   /// Get the generated Value for a given VPValue and a given Part. Note that
222   /// as some Defs are still created by ILV and managed in its ValueMap, this
223   /// method will delegate the call to ILV in such cases in order to provide
224   /// callers a consistent API.
225   /// \see set.
226   Value *get(VPValue *Def, unsigned Part);
227 
228   /// Get the generated Value for a given VPValue and given Part and Lane.
229   Value *get(VPValue *Def, const VPIteration &Instance);
230 
231   bool hasVectorValue(VPValue *Def, unsigned Part) {
232     auto I = Data.PerPartOutput.find(Def);
233     return I != Data.PerPartOutput.end() && Part < I->second.size() &&
234            I->second[Part];
235   }
236 
237   bool hasAnyVectorValue(VPValue *Def) const {
238     return Data.PerPartOutput.find(Def) != Data.PerPartOutput.end();
239   }
240 
241   bool hasScalarValue(VPValue *Def, VPIteration Instance) {
242     auto I = Data.PerPartScalars.find(Def);
243     if (I == Data.PerPartScalars.end())
244       return false;
245     unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
246     return Instance.Part < I->second.size() &&
247            CacheIdx < I->second[Instance.Part].size() &&
248            I->second[Instance.Part][CacheIdx];
249   }
250 
251   /// Set the generated Value for a given VPValue and a given Part.
252   void set(VPValue *Def, Value *V, unsigned Part) {
253     if (!Data.PerPartOutput.count(Def)) {
254       DataState::PerPartValuesTy Entry(UF);
255       Data.PerPartOutput[Def] = Entry;
256     }
257     Data.PerPartOutput[Def][Part] = V;
258   }
259   /// Reset an existing vector value for \p Def and a given \p Part.
260   void reset(VPValue *Def, Value *V, unsigned Part) {
261     auto Iter = Data.PerPartOutput.find(Def);
262     assert(Iter != Data.PerPartOutput.end() &&
263            "need to overwrite existing value");
264     Iter->second[Part] = V;
265   }
266 
267   /// Set the generated scalar \p V for \p Def and the given \p Instance.
268   void set(VPValue *Def, Value *V, const VPIteration &Instance) {
269     auto Iter = Data.PerPartScalars.insert({Def, {}});
270     auto &PerPartVec = Iter.first->second;
271     while (PerPartVec.size() <= Instance.Part)
272       PerPartVec.emplace_back();
273     auto &Scalars = PerPartVec[Instance.Part];
274     unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
275     while (Scalars.size() <= CacheIdx)
276       Scalars.push_back(nullptr);
277     assert(!Scalars[CacheIdx] && "should overwrite existing value");
278     Scalars[CacheIdx] = V;
279   }
280 
281   /// Reset an existing scalar value for \p Def and a given \p Instance.
282   void reset(VPValue *Def, Value *V, const VPIteration &Instance) {
283     auto Iter = Data.PerPartScalars.find(Def);
284     assert(Iter != Data.PerPartScalars.end() &&
285            "need to overwrite existing value");
286     assert(Instance.Part < Iter->second.size() &&
287            "need to overwrite existing value");
288     unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
289     assert(CacheIdx < Iter->second[Instance.Part].size() &&
290            "need to overwrite existing value");
291     Iter->second[Instance.Part][CacheIdx] = V;
292   }
293 
294   /// Hold state information used when constructing the CFG of the output IR,
295   /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
296   struct CFGState {
297     /// The previous VPBasicBlock visited. Initially set to null.
298     VPBasicBlock *PrevVPBB = nullptr;
299 
300     /// The previous IR BasicBlock created or used. Initially set to the new
301     /// header BasicBlock.
302     BasicBlock *PrevBB = nullptr;
303 
304     /// The last IR BasicBlock in the output IR. Set to the new latch
305     /// BasicBlock, used for placing the newly created BasicBlocks.
306     BasicBlock *LastBB = nullptr;
307 
308     /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
309     /// of replication, maps the BasicBlock of the last replica created.
310     SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
311 
312     /// Vector of VPBasicBlocks whose terminator instruction needs to be fixed
313     /// up at the end of vector code generation.
314     SmallVector<VPBasicBlock *, 8> VPBBsToFix;
315 
316     CFGState() = default;
317   } CFG;
318 
319   /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
320   LoopInfo *LI;
321 
322   /// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
323   DominatorTree *DT;
324 
325   /// Hold a reference to the IRBuilder used to generate output IR code.
326   IRBuilder<> &Builder;
327 
328   VPValue2ValueTy VPValue2Value;
329 
330   /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF).
331   Value *CanonicalIV = nullptr;
332 
333   /// Hold the trip count of the scalar loop.
334   Value *TripCount = nullptr;
335 
336   /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
337   InnerLoopVectorizer *ILV;
338 
339   /// Pointer to the VPlan code is generated for.
340   VPlan *Plan;
341 };
342 
343 /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
344 /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
345 class VPBlockBase {
346   friend class VPBlockUtils;
347 
348   const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
349 
350   /// An optional name for the block.
351   std::string Name;
352 
353   /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
354   /// it is a topmost VPBlockBase.
355   VPRegionBlock *Parent = nullptr;
356 
357   /// List of predecessor blocks.
358   SmallVector<VPBlockBase *, 1> Predecessors;
359 
360   /// List of successor blocks.
361   SmallVector<VPBlockBase *, 1> Successors;
362 
363   /// Successor selector managed by a VPUser. For blocks with zero or one
364   /// successors, there is no operand. Otherwise there is exactly one operand
365   /// which is the branch condition.
366   VPUser CondBitUser;
367 
368   /// If the block is predicated, its predicate is stored as an operand of this
369   /// VPUser to maintain the def-use relations. Otherwise there is no operand
370   /// here.
371   VPUser PredicateUser;
372 
373   /// VPlan containing the block. Can only be set on the entry block of the
374   /// plan.
375   VPlan *Plan = nullptr;
376 
377   /// Add \p Successor as the last successor to this block.
378   void appendSuccessor(VPBlockBase *Successor) {
379     assert(Successor && "Cannot add nullptr successor!");
380     Successors.push_back(Successor);
381   }
382 
383   /// Add \p Predecessor as the last predecessor to this block.
384   void appendPredecessor(VPBlockBase *Predecessor) {
385     assert(Predecessor && "Cannot add nullptr predecessor!");
386     Predecessors.push_back(Predecessor);
387   }
388 
389   /// Remove \p Predecessor from the predecessors of this block.
390   void removePredecessor(VPBlockBase *Predecessor) {
391     auto Pos = find(Predecessors, Predecessor);
392     assert(Pos && "Predecessor does not exist");
393     Predecessors.erase(Pos);
394   }
395 
396   /// Remove \p Successor from the successors of this block.
397   void removeSuccessor(VPBlockBase *Successor) {
398     auto Pos = find(Successors, Successor);
399     assert(Pos && "Successor does not exist");
400     Successors.erase(Pos);
401   }
402 
403 protected:
404   VPBlockBase(const unsigned char SC, const std::string &N)
405       : SubclassID(SC), Name(N) {}
406 
407 public:
408   /// An enumeration for keeping track of the concrete subclass of VPBlockBase
409   /// that are actually instantiated. Values of this enumeration are kept in the
410   /// SubclassID field of the VPBlockBase objects. They are used for concrete
411   /// type identification.
412   using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC };
413 
414   using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
415 
416   virtual ~VPBlockBase() = default;
417 
418   const std::string &getName() const { return Name; }
419 
420   void setName(const Twine &newName) { Name = newName.str(); }
421 
422   /// \return an ID for the concrete type of this object.
423   /// This is used to implement the classof checks. This should not be used
424   /// for any other purpose, as the values may change as LLVM evolves.
425   unsigned getVPBlockID() const { return SubclassID; }
426 
427   VPRegionBlock *getParent() { return Parent; }
428   const VPRegionBlock *getParent() const { return Parent; }
429 
430   /// \return A pointer to the plan containing the current block.
431   VPlan *getPlan();
432   const VPlan *getPlan() const;
433 
434   /// Sets the pointer of the plan containing the block. The block must be the
435   /// entry block into the VPlan.
436   void setPlan(VPlan *ParentPlan);
437 
438   void setParent(VPRegionBlock *P) { Parent = P; }
439 
440   /// \return the VPBasicBlock that is the entry of this VPBlockBase,
441   /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
442   /// VPBlockBase is a VPBasicBlock, it is returned.
443   const VPBasicBlock *getEntryBasicBlock() const;
444   VPBasicBlock *getEntryBasicBlock();
445 
446   /// \return the VPBasicBlock that is the exit of this VPBlockBase,
447   /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
448   /// VPBlockBase is a VPBasicBlock, it is returned.
449   const VPBasicBlock *getExitBasicBlock() const;
450   VPBasicBlock *getExitBasicBlock();
451 
452   const VPBlocksTy &getSuccessors() const { return Successors; }
453   VPBlocksTy &getSuccessors() { return Successors; }
454 
455   const VPBlocksTy &getPredecessors() const { return Predecessors; }
456   VPBlocksTy &getPredecessors() { return Predecessors; }
457 
458   /// \return the successor of this VPBlockBase if it has a single successor.
459   /// Otherwise return a null pointer.
460   VPBlockBase *getSingleSuccessor() const {
461     return (Successors.size() == 1 ? *Successors.begin() : nullptr);
462   }
463 
464   /// \return the predecessor of this VPBlockBase if it has a single
465   /// predecessor. Otherwise return a null pointer.
466   VPBlockBase *getSinglePredecessor() const {
467     return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
468   }
469 
470   size_t getNumSuccessors() const { return Successors.size(); }
471   size_t getNumPredecessors() const { return Predecessors.size(); }
472 
473   /// An Enclosing Block of a block B is any block containing B, including B
474   /// itself. \return the closest enclosing block starting from "this", which
475   /// has successors. \return the root enclosing block if all enclosing blocks
476   /// have no successors.
477   VPBlockBase *getEnclosingBlockWithSuccessors();
478 
479   /// \return the closest enclosing block starting from "this", which has
480   /// predecessors. \return the root enclosing block if all enclosing blocks
481   /// have no predecessors.
482   VPBlockBase *getEnclosingBlockWithPredecessors();
483 
484   /// \return the successors either attached directly to this VPBlockBase or, if
485   /// this VPBlockBase is the exit block of a VPRegionBlock and has no
486   /// successors of its own, search recursively for the first enclosing
487   /// VPRegionBlock that has successors and return them. If no such
488   /// VPRegionBlock exists, return the (empty) successors of the topmost
489   /// VPBlockBase reached.
490   const VPBlocksTy &getHierarchicalSuccessors() {
491     return getEnclosingBlockWithSuccessors()->getSuccessors();
492   }
493 
494   /// \return the hierarchical successor of this VPBlockBase if it has a single
495   /// hierarchical successor. Otherwise return a null pointer.
496   VPBlockBase *getSingleHierarchicalSuccessor() {
497     return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
498   }
499 
500   /// \return the predecessors either attached directly to this VPBlockBase or,
501   /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
502   /// predecessors of its own, search recursively for the first enclosing
503   /// VPRegionBlock that has predecessors and return them. If no such
504   /// VPRegionBlock exists, return the (empty) predecessors of the topmost
505   /// VPBlockBase reached.
506   const VPBlocksTy &getHierarchicalPredecessors() {
507     return getEnclosingBlockWithPredecessors()->getPredecessors();
508   }
509 
510   /// \return the hierarchical predecessor of this VPBlockBase if it has a
511   /// single hierarchical predecessor. Otherwise return a null pointer.
512   VPBlockBase *getSingleHierarchicalPredecessor() {
513     return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
514   }
515 
516   /// \return the condition bit selecting the successor.
517   VPValue *getCondBit();
518   /// \return the condition bit selecting the successor.
519   const VPValue *getCondBit() const;
520   /// Set the condition bit selecting the successor.
521   void setCondBit(VPValue *CV);
522 
523   /// \return the block's predicate.
524   VPValue *getPredicate();
525   /// \return the block's predicate.
526   const VPValue *getPredicate() const;
527   /// Set the block's predicate.
528   void setPredicate(VPValue *Pred);
529 
530   /// Set a given VPBlockBase \p Successor as the single successor of this
531   /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
532   /// This VPBlockBase must have no successors.
533   void setOneSuccessor(VPBlockBase *Successor) {
534     assert(Successors.empty() && "Setting one successor when others exist.");
535     appendSuccessor(Successor);
536   }
537 
538   /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
539   /// successors of this VPBlockBase. \p Condition is set as the successor
540   /// selector. This VPBlockBase is not added as predecessor of \p IfTrue or \p
541   /// IfFalse. This VPBlockBase must have no successors.
542   void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
543                         VPValue *Condition) {
544     assert(Successors.empty() && "Setting two successors when others exist.");
545     assert(Condition && "Setting two successors without condition!");
546     setCondBit(Condition);
547     appendSuccessor(IfTrue);
548     appendSuccessor(IfFalse);
549   }
550 
551   /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
552   /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
553   /// as successor of any VPBasicBlock in \p NewPreds.
554   void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
555     assert(Predecessors.empty() && "Block predecessors already set.");
556     for (auto *Pred : NewPreds)
557       appendPredecessor(Pred);
558   }
559 
560   /// Remove all the predecessor of this block.
561   void clearPredecessors() { Predecessors.clear(); }
562 
563   /// Remove all the successors of this block and set to null its condition bit
564   void clearSuccessors() {
565     Successors.clear();
566     setCondBit(nullptr);
567   }
568 
569   /// The method which generates the output IR that correspond to this
570   /// VPBlockBase, thereby "executing" the VPlan.
571   virtual void execute(struct VPTransformState *State) = 0;
572 
573   /// Delete all blocks reachable from a given VPBlockBase, inclusive.
574   static void deleteCFG(VPBlockBase *Entry);
575 
576   void printAsOperand(raw_ostream &OS, bool PrintType) const {
577     OS << getName();
578   }
579 
580   void print(raw_ostream &OS) const {
581     // TODO: Only printing VPBB name for now since we only have dot printing
582     // support for VPInstructions/Recipes.
583     printAsOperand(OS, false);
584   }
585 
586   /// Return true if it is legal to hoist instructions into this block.
587   bool isLegalToHoistInto() {
588     // There are currently no constraints that prevent an instruction to be
589     // hoisted into a VPBlockBase.
590     return true;
591   }
592 
593   /// Replace all operands of VPUsers in the block with \p NewValue and also
594   /// replaces all uses of VPValues defined in the block with NewValue.
595   virtual void dropAllReferences(VPValue *NewValue) = 0;
596 };
597 
598 /// VPRecipeBase is a base class modeling a sequence of one or more output IR
599 /// instructions. VPRecipeBase owns the the VPValues it defines through VPDef
600 /// and is responsible for deleting its defined values. Single-value
601 /// VPRecipeBases that also inherit from VPValue must make sure to inherit from
602 /// VPRecipeBase before VPValue.
603 class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
604                      public VPDef,
605                      public VPUser {
606   friend VPBasicBlock;
607   friend class VPBlockUtils;
608 
609 
610   /// Each VPRecipe belongs to a single VPBasicBlock.
611   VPBasicBlock *Parent = nullptr;
612 
613 public:
614   VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands)
615       : VPDef(SC), VPUser(Operands) {}
616 
617   template <typename IterT>
618   VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands)
619       : VPDef(SC), VPUser(Operands) {}
620   virtual ~VPRecipeBase() = default;
621 
622   /// \return the VPBasicBlock which this VPRecipe belongs to.
623   VPBasicBlock *getParent() { return Parent; }
624   const VPBasicBlock *getParent() const { return Parent; }
625 
626   /// The method which generates the output IR instructions that correspond to
627   /// this VPRecipe, thereby "executing" the VPlan.
628   virtual void execute(struct VPTransformState &State) = 0;
629 
630   /// Insert an unlinked recipe into a basic block immediately before
631   /// the specified recipe.
632   void insertBefore(VPRecipeBase *InsertPos);
633 
634   /// Insert an unlinked Recipe into a basic block immediately after
635   /// the specified Recipe.
636   void insertAfter(VPRecipeBase *InsertPos);
637 
638   /// Unlink this recipe from its current VPBasicBlock and insert it into
639   /// the VPBasicBlock that MovePos lives in, right after MovePos.
640   void moveAfter(VPRecipeBase *MovePos);
641 
642   /// Unlink this recipe and insert into BB before I.
643   ///
644   /// \pre I is a valid iterator into BB.
645   void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
646 
647   /// This method unlinks 'this' from the containing basic block, but does not
648   /// delete it.
649   void removeFromParent();
650 
651   /// This method unlinks 'this' from the containing basic block and deletes it.
652   ///
653   /// \returns an iterator pointing to the element after the erased one
654   iplist<VPRecipeBase>::iterator eraseFromParent();
655 
656   /// Returns the underlying instruction, if the recipe is a VPValue or nullptr
657   /// otherwise.
658   Instruction *getUnderlyingInstr() {
659     return cast<Instruction>(getVPValue()->getUnderlyingValue());
660   }
661   const Instruction *getUnderlyingInstr() const {
662     return cast<Instruction>(getVPValue()->getUnderlyingValue());
663   }
664 
665   /// Method to support type inquiry through isa, cast, and dyn_cast.
666   static inline bool classof(const VPDef *D) {
667     // All VPDefs are also VPRecipeBases.
668     return true;
669   }
670 };
671 
672 inline bool VPUser::classof(const VPDef *Def) {
673   return Def->getVPDefID() == VPRecipeBase::VPInstructionSC ||
674          Def->getVPDefID() == VPRecipeBase::VPWidenSC ||
675          Def->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
676          Def->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
677          Def->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
678          Def->getVPDefID() == VPRecipeBase::VPBlendSC ||
679          Def->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
680          Def->getVPDefID() == VPRecipeBase::VPReplicateSC ||
681          Def->getVPDefID() == VPRecipeBase::VPReductionSC ||
682          Def->getVPDefID() == VPRecipeBase::VPBranchOnMaskSC ||
683          Def->getVPDefID() == VPRecipeBase::VPWidenMemoryInstructionSC;
684 }
685 
686 /// This is a concrete Recipe that models a single VPlan-level instruction.
687 /// While as any Recipe it may generate a sequence of IR instructions when
688 /// executed, these instructions would always form a single-def expression as
689 /// the VPInstruction is also a single def-use vertex.
690 class VPInstruction : public VPRecipeBase, public VPValue {
691   friend class VPlanSlp;
692 
693 public:
694   /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
695   enum {
696     Not = Instruction::OtherOpsEnd + 1,
697     ICmpULE,
698     SLPLoad,
699     SLPStore,
700     ActiveLaneMask,
701   };
702 
703 private:
704   typedef unsigned char OpcodeTy;
705   OpcodeTy Opcode;
706 
707   /// Utility method serving execute(): generates a single instance of the
708   /// modeled instruction.
709   void generateInstruction(VPTransformState &State, unsigned Part);
710 
711 protected:
712   void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
713 
714 public:
715   VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands)
716       : VPRecipeBase(VPRecipeBase::VPInstructionSC, Operands),
717         VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode) {}
718 
719   VPInstruction(unsigned Opcode, ArrayRef<VPInstruction *> Operands)
720       : VPRecipeBase(VPRecipeBase::VPInstructionSC, {}),
721         VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode) {
722     for (auto *I : Operands)
723       addOperand(I->getVPValue());
724   }
725 
726   VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands)
727       : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands)) {}
728 
729   /// Method to support type inquiry through isa, cast, and dyn_cast.
730   static inline bool classof(const VPValue *V) {
731     return V->getVPValueID() == VPValue::VPVInstructionSC;
732   }
733 
734   VPInstruction *clone() const {
735     SmallVector<VPValue *, 2> Operands(operands());
736     return new VPInstruction(Opcode, Operands);
737   }
738 
739   /// Method to support type inquiry through isa, cast, and dyn_cast.
740   static inline bool classof(const VPDef *R) {
741     return R->getVPDefID() == VPRecipeBase::VPInstructionSC;
742   }
743 
744   unsigned getOpcode() const { return Opcode; }
745 
746   /// Generate the instruction.
747   /// TODO: We currently execute only per-part unless a specific instance is
748   /// provided.
749   void execute(VPTransformState &State) override;
750 
751   /// Print the VPInstruction to \p O.
752   void print(raw_ostream &O, const Twine &Indent,
753              VPSlotTracker &SlotTracker) const override;
754 
755   /// Print the VPInstruction to dbgs() (for debugging).
756   void dump() const;
757 
758   /// Return true if this instruction may modify memory.
759   bool mayWriteToMemory() const {
760     // TODO: we can use attributes of the called function to rule out memory
761     //       modifications.
762     return Opcode == Instruction::Store || Opcode == Instruction::Call ||
763            Opcode == Instruction::Invoke || Opcode == SLPStore;
764   }
765 
766   bool hasResult() const {
767     // CallInst may or may not have a result, depending on the called function.
768     // Conservatively return calls have results for now.
769     switch (getOpcode()) {
770     case Instruction::Ret:
771     case Instruction::Br:
772     case Instruction::Store:
773     case Instruction::Switch:
774     case Instruction::IndirectBr:
775     case Instruction::Resume:
776     case Instruction::CatchRet:
777     case Instruction::Unreachable:
778     case Instruction::Fence:
779     case Instruction::AtomicRMW:
780       return false;
781     default:
782       return true;
783     }
784   }
785 };
786 
787 /// VPWidenRecipe is a recipe for producing a copy of vector type its
788 /// ingredient. This recipe covers most of the traditional vectorization cases
789 /// where each ingredient transforms into a vectorized version of itself.
790 class VPWidenRecipe : public VPRecipeBase, public VPValue {
791 public:
792   template <typename IterT>
793   VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
794       : VPRecipeBase(VPRecipeBase::VPWidenSC, Operands),
795         VPValue(VPValue::VPVWidenSC, &I, this) {}
796 
797   ~VPWidenRecipe() override = default;
798 
799   /// Method to support type inquiry through isa, cast, and dyn_cast.
800   static inline bool classof(const VPDef *D) {
801     return D->getVPDefID() == VPRecipeBase::VPWidenSC;
802   }
803   static inline bool classof(const VPValue *V) {
804     return V->getVPValueID() == VPValue::VPVWidenSC;
805   }
806 
807   /// Produce widened copies of all Ingredients.
808   void execute(VPTransformState &State) override;
809 
810   /// Print the recipe.
811   void print(raw_ostream &O, const Twine &Indent,
812              VPSlotTracker &SlotTracker) const override;
813 };
814 
815 /// A recipe for widening Call instructions.
816 class VPWidenCallRecipe : public VPRecipeBase, public VPValue {
817 
818 public:
819   template <typename IterT>
820   VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments)
821       : VPRecipeBase(VPRecipeBase::VPWidenCallSC, CallArguments),
822         VPValue(VPValue::VPVWidenCallSC, &I, this) {}
823 
824   ~VPWidenCallRecipe() override = default;
825 
826   /// Method to support type inquiry through isa, cast, and dyn_cast.
827   static inline bool classof(const VPDef *D) {
828     return D->getVPDefID() == VPRecipeBase::VPWidenCallSC;
829   }
830 
831   /// Produce a widened version of the call instruction.
832   void execute(VPTransformState &State) override;
833 
834   /// Print the recipe.
835   void print(raw_ostream &O, const Twine &Indent,
836              VPSlotTracker &SlotTracker) const override;
837 };
838 
839 /// A recipe for widening select instructions.
840 class VPWidenSelectRecipe : public VPRecipeBase, public VPValue {
841 
842   /// Is the condition of the select loop invariant?
843   bool InvariantCond;
844 
845 public:
846   template <typename IterT>
847   VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands,
848                       bool InvariantCond)
849       : VPRecipeBase(VPRecipeBase::VPWidenSelectSC, Operands),
850         VPValue(VPValue::VPVWidenSelectSC, &I, this),
851         InvariantCond(InvariantCond) {}
852 
853   ~VPWidenSelectRecipe() override = default;
854 
855   /// Method to support type inquiry through isa, cast, and dyn_cast.
856   static inline bool classof(const VPDef *D) {
857     return D->getVPDefID() == VPRecipeBase::VPWidenSelectSC;
858   }
859 
860   /// Produce a widened version of the select instruction.
861   void execute(VPTransformState &State) override;
862 
863   /// Print the recipe.
864   void print(raw_ostream &O, const Twine &Indent,
865              VPSlotTracker &SlotTracker) const override;
866 };
867 
868 /// A recipe for handling GEP instructions.
869 class VPWidenGEPRecipe : public VPRecipeBase, public VPValue {
870   bool IsPtrLoopInvariant;
871   SmallBitVector IsIndexLoopInvariant;
872 
873 public:
874   template <typename IterT>
875   VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands)
876       : VPRecipeBase(VPRecipeBase::VPWidenGEPSC, Operands),
877         VPValue(VPWidenGEPSC, GEP, this),
878         IsIndexLoopInvariant(GEP->getNumIndices(), false) {}
879 
880   template <typename IterT>
881   VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands,
882                    Loop *OrigLoop)
883       : VPRecipeBase(VPRecipeBase::VPWidenGEPSC, Operands),
884         VPValue(VPValue::VPVWidenGEPSC, GEP, this),
885         IsIndexLoopInvariant(GEP->getNumIndices(), false) {
886     IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand());
887     for (auto Index : enumerate(GEP->indices()))
888       IsIndexLoopInvariant[Index.index()] =
889           OrigLoop->isLoopInvariant(Index.value().get());
890   }
891   ~VPWidenGEPRecipe() override = default;
892 
893   /// Method to support type inquiry through isa, cast, and dyn_cast.
894   static inline bool classof(const VPDef *D) {
895     return D->getVPDefID() == VPRecipeBase::VPWidenGEPSC;
896   }
897 
898   /// Generate the gep nodes.
899   void execute(VPTransformState &State) override;
900 
901   /// Print the recipe.
902   void print(raw_ostream &O, const Twine &Indent,
903              VPSlotTracker &SlotTracker) const override;
904 };
905 
906 /// A recipe for handling phi nodes of integer and floating-point inductions,
907 /// producing their vector and scalar values.
908 class VPWidenIntOrFpInductionRecipe : public VPRecipeBase {
909   PHINode *IV;
910 
911 public:
912   VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, Instruction *Cast,
913                                 TruncInst *Trunc = nullptr)
914       : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), IV(IV) {
915     if (Trunc)
916       new VPValue(Trunc, this);
917     else
918       new VPValue(IV, this);
919 
920     if (Cast)
921       new VPValue(Cast, this);
922   }
923   ~VPWidenIntOrFpInductionRecipe() override = default;
924 
925   /// Method to support type inquiry through isa, cast, and dyn_cast.
926   static inline bool classof(const VPDef *D) {
927     return D->getVPDefID() == VPRecipeBase::VPWidenIntOrFpInductionSC;
928   }
929 
930   /// Generate the vectorized and scalarized versions of the phi node as
931   /// needed by their users.
932   void execute(VPTransformState &State) override;
933 
934   /// Print the recipe.
935   void print(raw_ostream &O, const Twine &Indent,
936              VPSlotTracker &SlotTracker) const override;
937 
938   /// Returns the start value of the induction.
939   VPValue *getStartValue() { return getOperand(0); }
940 
941   /// Returns the cast VPValue, if one is attached, or nullptr otherwise.
942   VPValue *getCastValue() {
943     if (getNumDefinedValues() != 2)
944       return nullptr;
945     return getVPValue(1);
946   }
947 
948   /// Returns the first defined value as TruncInst, if it is one or nullptr
949   /// otherwise.
950   TruncInst *getTruncInst() {
951     return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue());
952   }
953   const TruncInst *getTruncInst() const {
954     return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue());
955   }
956 };
957 
958 /// A recipe for handling all phi nodes except for integer and FP inductions.
959 /// For reduction PHIs, RdxDesc must point to the corresponding recurrence
960 /// descriptor and the start value is the first operand of the recipe.
961 /// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
962 /// managed in the recipe directly.
963 class VPWidenPHIRecipe : public VPRecipeBase, public VPValue {
964   /// Descriptor for a reduction PHI.
965   RecurrenceDescriptor *RdxDesc = nullptr;
966 
967   /// List of incoming blocks. Only used in the VPlan native path.
968   SmallVector<VPBasicBlock *, 2> IncomingBlocks;
969 
970 public:
971   /// Create a new VPWidenPHIRecipe for the reduction \p Phi described by \p
972   /// RdxDesc.
973   VPWidenPHIRecipe(PHINode *Phi, RecurrenceDescriptor &RdxDesc, VPValue &Start)
974       : VPWidenPHIRecipe(Phi) {
975     this->RdxDesc = &RdxDesc;
976     addOperand(&Start);
977   }
978 
979   /// Create a VPWidenPHIRecipe for \p Phi
980   VPWidenPHIRecipe(PHINode *Phi)
981       : VPRecipeBase(VPWidenPHISC, {}),
982         VPValue(VPValue::VPVWidenPHISC, Phi, this) {}
983   ~VPWidenPHIRecipe() override = default;
984 
985   /// Method to support type inquiry through isa, cast, and dyn_cast.
986   static inline bool classof(const VPDef *D) {
987     return D->getVPDefID() == VPRecipeBase::VPWidenPHISC;
988   }
989   static inline bool classof(const VPValue *V) {
990     return V->getVPValueID() == VPValue::VPVWidenPHISC;
991   }
992 
993   /// Generate the phi/select nodes.
994   void execute(VPTransformState &State) override;
995 
996   /// Print the recipe.
997   void print(raw_ostream &O, const Twine &Indent,
998              VPSlotTracker &SlotTracker) const override;
999 
1000   /// Returns the start value of the phi, if it is a reduction.
1001   VPValue *getStartValue() {
1002     return getNumOperands() == 0 ? nullptr : getOperand(0);
1003   }
1004 
1005   /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
1006   void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
1007     addOperand(IncomingV);
1008     IncomingBlocks.push_back(IncomingBlock);
1009   }
1010 
1011   /// Returns the \p I th incoming VPValue.
1012   VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
1013 
1014   /// Returns the \p I th incoming VPBasicBlock.
1015   VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
1016 };
1017 
1018 /// A recipe for vectorizing a phi-node as a sequence of mask-based select
1019 /// instructions.
1020 class VPBlendRecipe : public VPRecipeBase, public VPValue {
1021   PHINode *Phi;
1022 
1023 public:
1024   /// The blend operation is a User of the incoming values and of their
1025   /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value
1026   /// might be incoming with a full mask for which there is no VPValue.
1027   VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
1028       : VPRecipeBase(VPBlendSC, Operands),
1029         VPValue(VPValue::VPVBlendSC, Phi, this), Phi(Phi) {
1030     assert(Operands.size() > 0 &&
1031            ((Operands.size() == 1) || (Operands.size() % 2 == 0)) &&
1032            "Expected either a single incoming value or a positive even number "
1033            "of operands");
1034   }
1035 
1036   /// Method to support type inquiry through isa, cast, and dyn_cast.
1037   static inline bool classof(const VPDef *D) {
1038     return D->getVPDefID() == VPRecipeBase::VPBlendSC;
1039   }
1040 
1041   /// Return the number of incoming values, taking into account that a single
1042   /// incoming value has no mask.
1043   unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
1044 
1045   /// Return incoming value number \p Idx.
1046   VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); }
1047 
1048   /// Return mask number \p Idx.
1049   VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); }
1050 
1051   /// Generate the phi/select nodes.
1052   void execute(VPTransformState &State) override;
1053 
1054   /// Print the recipe.
1055   void print(raw_ostream &O, const Twine &Indent,
1056              VPSlotTracker &SlotTracker) const override;
1057 };
1058 
1059 /// VPInterleaveRecipe is a recipe for transforming an interleave group of load
1060 /// or stores into one wide load/store and shuffles. The first operand of a
1061 /// VPInterleave recipe is the address, followed by the stored values, followed
1062 /// by an optional mask.
1063 class VPInterleaveRecipe : public VPRecipeBase {
1064   const InterleaveGroup<Instruction> *IG;
1065 
1066   bool HasMask = false;
1067 
1068 public:
1069   VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
1070                      ArrayRef<VPValue *> StoredValues, VPValue *Mask)
1071       : VPRecipeBase(VPInterleaveSC, {Addr}), IG(IG) {
1072     for (unsigned i = 0; i < IG->getFactor(); ++i)
1073       if (Instruction *I = IG->getMember(i)) {
1074         if (I->getType()->isVoidTy())
1075           continue;
1076         new VPValue(I, this);
1077       }
1078 
1079     for (auto *SV : StoredValues)
1080       addOperand(SV);
1081     if (Mask) {
1082       HasMask = true;
1083       addOperand(Mask);
1084     }
1085   }
1086   ~VPInterleaveRecipe() override = default;
1087 
1088   /// Method to support type inquiry through isa, cast, and dyn_cast.
1089   static inline bool classof(const VPDef *D) {
1090     return D->getVPDefID() == VPRecipeBase::VPInterleaveSC;
1091   }
1092 
1093   /// Return the address accessed by this recipe.
1094   VPValue *getAddr() const {
1095     return getOperand(0); // Address is the 1st, mandatory operand.
1096   }
1097 
1098   /// Return the mask used by this recipe. Note that a full mask is represented
1099   /// by a nullptr.
1100   VPValue *getMask() const {
1101     // Mask is optional and therefore the last, currently 2nd operand.
1102     return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
1103   }
1104 
1105   /// Return the VPValues stored by this interleave group. If it is a load
1106   /// interleave group, return an empty ArrayRef.
1107   ArrayRef<VPValue *> getStoredValues() const {
1108     // The first operand is the address, followed by the stored values, followed
1109     // by an optional mask.
1110     return ArrayRef<VPValue *>(op_begin(), getNumOperands())
1111         .slice(1, getNumOperands() - (HasMask ? 2 : 1));
1112   }
1113 
1114   /// Generate the wide load or store, and shuffles.
1115   void execute(VPTransformState &State) override;
1116 
1117   /// Print the recipe.
1118   void print(raw_ostream &O, const Twine &Indent,
1119              VPSlotTracker &SlotTracker) const override;
1120 
1121   const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
1122 };
1123 
1124 /// A recipe to represent inloop reduction operations, performing a reduction on
1125 /// a vector operand into a scalar value, and adding the result to a chain.
1126 /// The Operands are {ChainOp, VecOp, [Condition]}.
1127 class VPReductionRecipe : public VPRecipeBase, public VPValue {
1128   /// The recurrence decriptor for the reduction in question.
1129   RecurrenceDescriptor *RdxDesc;
1130   /// Pointer to the TTI, needed to create the target reduction
1131   const TargetTransformInfo *TTI;
1132 
1133 public:
1134   VPReductionRecipe(RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp,
1135                     VPValue *VecOp, VPValue *CondOp,
1136                     const TargetTransformInfo *TTI)
1137       : VPRecipeBase(VPRecipeBase::VPReductionSC, {ChainOp, VecOp}),
1138         VPValue(VPValue::VPVReductionSC, I, this), RdxDesc(R), TTI(TTI) {
1139     if (CondOp)
1140       addOperand(CondOp);
1141   }
1142 
1143   ~VPReductionRecipe() override = default;
1144 
1145   /// Method to support type inquiry through isa, cast, and dyn_cast.
1146   static inline bool classof(const VPValue *V) {
1147     return V->getVPValueID() == VPValue::VPVReductionSC;
1148   }
1149 
1150   static inline bool classof(const VPDef *D) {
1151     return D->getVPDefID() == VPRecipeBase::VPReductionSC;
1152   }
1153 
1154   /// Generate the reduction in the loop
1155   void execute(VPTransformState &State) override;
1156 
1157   /// Print the recipe.
1158   void print(raw_ostream &O, const Twine &Indent,
1159              VPSlotTracker &SlotTracker) const override;
1160 
1161   /// The VPValue of the scalar Chain being accumulated.
1162   VPValue *getChainOp() const { return getOperand(0); }
1163   /// The VPValue of the vector value to be reduced.
1164   VPValue *getVecOp() const { return getOperand(1); }
1165   /// The VPValue of the condition for the block.
1166   VPValue *getCondOp() const {
1167     return getNumOperands() > 2 ? getOperand(2) : nullptr;
1168   }
1169 };
1170 
1171 /// VPReplicateRecipe replicates a given instruction producing multiple scalar
1172 /// copies of the original scalar type, one per lane, instead of producing a
1173 /// single copy of widened type for all lanes. If the instruction is known to be
1174 /// uniform only one copy, per lane zero, will be generated.
1175 class VPReplicateRecipe : public VPRecipeBase, public VPValue {
1176   /// Indicator if only a single replica per lane is needed.
1177   bool IsUniform;
1178 
1179   /// Indicator if the replicas are also predicated.
1180   bool IsPredicated;
1181 
1182   /// Indicator if the scalar values should also be packed into a vector.
1183   bool AlsoPack;
1184 
1185 public:
1186   template <typename IterT>
1187   VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
1188                     bool IsUniform, bool IsPredicated = false)
1189       : VPRecipeBase(VPReplicateSC, Operands), VPValue(VPVReplicateSC, I, this),
1190         IsUniform(IsUniform), IsPredicated(IsPredicated) {
1191     // Retain the previous behavior of predicateInstructions(), where an
1192     // insert-element of a predicated instruction got hoisted into the
1193     // predicated basic block iff it was its only user. This is achieved by
1194     // having predicated instructions also pack their values into a vector by
1195     // default unless they have a replicated user which uses their scalar value.
1196     AlsoPack = IsPredicated && !I->use_empty();
1197   }
1198 
1199   ~VPReplicateRecipe() override = default;
1200 
1201   /// Method to support type inquiry through isa, cast, and dyn_cast.
1202   static inline bool classof(const VPDef *D) {
1203     return D->getVPDefID() == VPRecipeBase::VPReplicateSC;
1204   }
1205 
1206   static inline bool classof(const VPValue *V) {
1207     return V->getVPValueID() == VPValue::VPVReplicateSC;
1208   }
1209 
1210   /// Generate replicas of the desired Ingredient. Replicas will be generated
1211   /// for all parts and lanes unless a specific part and lane are specified in
1212   /// the \p State.
1213   void execute(VPTransformState &State) override;
1214 
1215   void setAlsoPack(bool Pack) { AlsoPack = Pack; }
1216 
1217   /// Print the recipe.
1218   void print(raw_ostream &O, const Twine &Indent,
1219              VPSlotTracker &SlotTracker) const override;
1220 
1221   bool isUniform() const { return IsUniform; }
1222 
1223   bool isPacked() const { return AlsoPack; }
1224 };
1225 
1226 /// A recipe for generating conditional branches on the bits of a mask.
1227 class VPBranchOnMaskRecipe : public VPRecipeBase {
1228 public:
1229   VPBranchOnMaskRecipe(VPValue *BlockInMask)
1230       : VPRecipeBase(VPBranchOnMaskSC, {}) {
1231     if (BlockInMask) // nullptr means all-one mask.
1232       addOperand(BlockInMask);
1233   }
1234 
1235   /// Method to support type inquiry through isa, cast, and dyn_cast.
1236   static inline bool classof(const VPDef *D) {
1237     return D->getVPDefID() == VPRecipeBase::VPBranchOnMaskSC;
1238   }
1239 
1240   /// Generate the extraction of the appropriate bit from the block mask and the
1241   /// conditional branch.
1242   void execute(VPTransformState &State) override;
1243 
1244   /// Print the recipe.
1245   void print(raw_ostream &O, const Twine &Indent,
1246              VPSlotTracker &SlotTracker) const override {
1247     O << " +\n" << Indent << "\"BRANCH-ON-MASK ";
1248     if (VPValue *Mask = getMask())
1249       Mask->printAsOperand(O, SlotTracker);
1250     else
1251       O << " All-One";
1252     O << "\\l\"";
1253   }
1254 
1255   /// Return the mask used by this recipe. Note that a full mask is represented
1256   /// by a nullptr.
1257   VPValue *getMask() const {
1258     assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
1259     // Mask is optional.
1260     return getNumOperands() == 1 ? getOperand(0) : nullptr;
1261   }
1262 };
1263 
1264 /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
1265 /// control converges back from a Branch-on-Mask. The phi nodes are needed in
1266 /// order to merge values that are set under such a branch and feed their uses.
1267 /// The phi nodes can be scalar or vector depending on the users of the value.
1268 /// This recipe works in concert with VPBranchOnMaskRecipe.
1269 class VPPredInstPHIRecipe : public VPRecipeBase, public VPValue {
1270 public:
1271   /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
1272   /// nodes after merging back from a Branch-on-Mask.
1273   VPPredInstPHIRecipe(VPValue *PredV)
1274       : VPRecipeBase(VPPredInstPHISC, PredV),
1275         VPValue(VPValue::VPVPredInstPHI, nullptr, this) {}
1276   ~VPPredInstPHIRecipe() override = default;
1277 
1278   /// Method to support type inquiry through isa, cast, and dyn_cast.
1279   static inline bool classof(const VPDef *D) {
1280     return D->getVPDefID() == VPRecipeBase::VPPredInstPHISC;
1281   }
1282 
1283   /// Generates phi nodes for live-outs as needed to retain SSA form.
1284   void execute(VPTransformState &State) override;
1285 
1286   /// Print the recipe.
1287   void print(raw_ostream &O, const Twine &Indent,
1288              VPSlotTracker &SlotTracker) const override;
1289 };
1290 
1291 /// A Recipe for widening load/store operations.
1292 /// The recipe uses the following VPValues:
1293 /// - For load: Address, optional mask
1294 /// - For store: Address, stored value, optional mask
1295 /// TODO: We currently execute only per-part unless a specific instance is
1296 /// provided.
1297 class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
1298   Instruction &Ingredient;
1299 
1300   void setMask(VPValue *Mask) {
1301     if (!Mask)
1302       return;
1303     addOperand(Mask);
1304   }
1305 
1306   bool isMasked() const {
1307     return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
1308   }
1309 
1310 public:
1311   VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask)
1312       : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), Ingredient(Load) {
1313     new VPValue(VPValue::VPVMemoryInstructionSC, &Load, this);
1314     setMask(Mask);
1315   }
1316 
1317   VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
1318                                  VPValue *StoredValue, VPValue *Mask)
1319       : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr, StoredValue}),
1320         Ingredient(Store) {
1321     setMask(Mask);
1322   }
1323 
1324   /// Method to support type inquiry through isa, cast, and dyn_cast.
1325   static inline bool classof(const VPDef *D) {
1326     return D->getVPDefID() == VPRecipeBase::VPWidenMemoryInstructionSC;
1327   }
1328 
1329   /// Return the address accessed by this recipe.
1330   VPValue *getAddr() const {
1331     return getOperand(0); // Address is the 1st, mandatory operand.
1332   }
1333 
1334   /// Return the mask used by this recipe. Note that a full mask is represented
1335   /// by a nullptr.
1336   VPValue *getMask() const {
1337     // Mask is optional and therefore the last operand.
1338     return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1339   }
1340 
1341   /// Returns true if this recipe is a store.
1342   bool isStore() const { return isa<StoreInst>(Ingredient); }
1343 
1344   /// Return the address accessed by this recipe.
1345   VPValue *getStoredValue() const {
1346     assert(isStore() && "Stored value only available for store instructions");
1347     return getOperand(1); // Stored value is the 2nd, mandatory operand.
1348   }
1349 
1350   /// Generate the wide load/store.
1351   void execute(VPTransformState &State) override;
1352 
1353   /// Print the recipe.
1354   void print(raw_ostream &O, const Twine &Indent,
1355              VPSlotTracker &SlotTracker) const override;
1356 };
1357 
1358 /// A Recipe for widening the canonical induction variable of the vector loop.
1359 class VPWidenCanonicalIVRecipe : public VPRecipeBase {
1360 public:
1361   VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC, {}) {
1362     new VPValue(nullptr, this);
1363   }
1364 
1365   ~VPWidenCanonicalIVRecipe() override = default;
1366 
1367   /// Method to support type inquiry through isa, cast, and dyn_cast.
1368   static inline bool classof(const VPDef *D) {
1369     return D->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC;
1370   }
1371 
1372   /// Generate a canonical vector induction variable of the vector loop, with
1373   /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
1374   /// step = <VF*UF, VF*UF, ..., VF*UF>.
1375   void execute(VPTransformState &State) override;
1376 
1377   /// Print the recipe.
1378   void print(raw_ostream &O, const Twine &Indent,
1379              VPSlotTracker &SlotTracker) const override;
1380 };
1381 
1382 /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
1383 /// holds a sequence of zero or more VPRecipe's each representing a sequence of
1384 /// output IR instructions.
1385 class VPBasicBlock : public VPBlockBase {
1386 public:
1387   using RecipeListTy = iplist<VPRecipeBase>;
1388 
1389 private:
1390   /// The VPRecipes held in the order of output instructions to generate.
1391   RecipeListTy Recipes;
1392 
1393 public:
1394   VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
1395       : VPBlockBase(VPBasicBlockSC, Name.str()) {
1396     if (Recipe)
1397       appendRecipe(Recipe);
1398   }
1399 
1400   ~VPBasicBlock() override {
1401     while (!Recipes.empty())
1402       Recipes.pop_back();
1403   }
1404 
1405   /// Instruction iterators...
1406   using iterator = RecipeListTy::iterator;
1407   using const_iterator = RecipeListTy::const_iterator;
1408   using reverse_iterator = RecipeListTy::reverse_iterator;
1409   using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
1410 
1411   //===--------------------------------------------------------------------===//
1412   /// Recipe iterator methods
1413   ///
1414   inline iterator begin() { return Recipes.begin(); }
1415   inline const_iterator begin() const { return Recipes.begin(); }
1416   inline iterator end() { return Recipes.end(); }
1417   inline const_iterator end() const { return Recipes.end(); }
1418 
1419   inline reverse_iterator rbegin() { return Recipes.rbegin(); }
1420   inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
1421   inline reverse_iterator rend() { return Recipes.rend(); }
1422   inline const_reverse_iterator rend() const { return Recipes.rend(); }
1423 
1424   inline size_t size() const { return Recipes.size(); }
1425   inline bool empty() const { return Recipes.empty(); }
1426   inline const VPRecipeBase &front() const { return Recipes.front(); }
1427   inline VPRecipeBase &front() { return Recipes.front(); }
1428   inline const VPRecipeBase &back() const { return Recipes.back(); }
1429   inline VPRecipeBase &back() { return Recipes.back(); }
1430 
1431   /// Returns a reference to the list of recipes.
1432   RecipeListTy &getRecipeList() { return Recipes; }
1433 
1434   /// Returns a pointer to a member of the recipe list.
1435   static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
1436     return &VPBasicBlock::Recipes;
1437   }
1438 
1439   /// Method to support type inquiry through isa, cast, and dyn_cast.
1440   static inline bool classof(const VPBlockBase *V) {
1441     return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC;
1442   }
1443 
1444   void insert(VPRecipeBase *Recipe, iterator InsertPt) {
1445     assert(Recipe && "No recipe to append.");
1446     assert(!Recipe->Parent && "Recipe already in VPlan");
1447     Recipe->Parent = this;
1448     Recipes.insert(InsertPt, Recipe);
1449   }
1450 
1451   /// Augment the existing recipes of a VPBasicBlock with an additional
1452   /// \p Recipe as the last recipe.
1453   void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
1454 
1455   /// The method which generates the output IR instructions that correspond to
1456   /// this VPBasicBlock, thereby "executing" the VPlan.
1457   void execute(struct VPTransformState *State) override;
1458 
1459   /// Return the position of the first non-phi node recipe in the block.
1460   iterator getFirstNonPhi();
1461 
1462   void dropAllReferences(VPValue *NewValue) override;
1463 
1464 private:
1465   /// Create an IR BasicBlock to hold the output instructions generated by this
1466   /// VPBasicBlock, and return it. Update the CFGState accordingly.
1467   BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
1468 };
1469 
1470 /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
1471 /// which form a Single-Entry-Single-Exit subgraph of the output IR CFG.
1472 /// A VPRegionBlock may indicate that its contents are to be replicated several
1473 /// times. This is designed to support predicated scalarization, in which a
1474 /// scalar if-then code structure needs to be generated VF * UF times. Having
1475 /// this replication indicator helps to keep a single model for multiple
1476 /// candidate VF's. The actual replication takes place only once the desired VF
1477 /// and UF have been determined.
1478 class VPRegionBlock : public VPBlockBase {
1479   /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
1480   VPBlockBase *Entry;
1481 
1482   /// Hold the Single Exit of the SESE region modelled by the VPRegionBlock.
1483   VPBlockBase *Exit;
1484 
1485   /// An indicator whether this region is to generate multiple replicated
1486   /// instances of output IR corresponding to its VPBlockBases.
1487   bool IsReplicator;
1488 
1489 public:
1490   VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exit,
1491                 const std::string &Name = "", bool IsReplicator = false)
1492       : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exit(Exit),
1493         IsReplicator(IsReplicator) {
1494     assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
1495     assert(Exit->getSuccessors().empty() && "Exit block has successors.");
1496     Entry->setParent(this);
1497     Exit->setParent(this);
1498   }
1499   VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
1500       : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exit(nullptr),
1501         IsReplicator(IsReplicator) {}
1502 
1503   ~VPRegionBlock() override {
1504     if (Entry) {
1505       VPValue DummyValue;
1506       Entry->dropAllReferences(&DummyValue);
1507       deleteCFG(Entry);
1508     }
1509   }
1510 
1511   /// Method to support type inquiry through isa, cast, and dyn_cast.
1512   static inline bool classof(const VPBlockBase *V) {
1513     return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
1514   }
1515 
1516   const VPBlockBase *getEntry() const { return Entry; }
1517   VPBlockBase *getEntry() { return Entry; }
1518 
1519   /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
1520   /// EntryBlock must have no predecessors.
1521   void setEntry(VPBlockBase *EntryBlock) {
1522     assert(EntryBlock->getPredecessors().empty() &&
1523            "Entry block cannot have predecessors.");
1524     Entry = EntryBlock;
1525     EntryBlock->setParent(this);
1526   }
1527 
1528   // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a
1529   // specific interface of llvm::Function, instead of using
1530   // GraphTraints::getEntryNode. We should add a new template parameter to
1531   // DominatorTreeBase representing the Graph type.
1532   VPBlockBase &front() const { return *Entry; }
1533 
1534   const VPBlockBase *getExit() const { return Exit; }
1535   VPBlockBase *getExit() { return Exit; }
1536 
1537   /// Set \p ExitBlock as the exit VPBlockBase of this VPRegionBlock. \p
1538   /// ExitBlock must have no successors.
1539   void setExit(VPBlockBase *ExitBlock) {
1540     assert(ExitBlock->getSuccessors().empty() &&
1541            "Exit block cannot have successors.");
1542     Exit = ExitBlock;
1543     ExitBlock->setParent(this);
1544   }
1545 
1546   /// An indicator whether this region is to generate multiple replicated
1547   /// instances of output IR corresponding to its VPBlockBases.
1548   bool isReplicator() const { return IsReplicator; }
1549 
1550   /// The method which generates the output IR instructions that correspond to
1551   /// this VPRegionBlock, thereby "executing" the VPlan.
1552   void execute(struct VPTransformState *State) override;
1553 
1554   void dropAllReferences(VPValue *NewValue) override;
1555 };
1556 
1557 //===----------------------------------------------------------------------===//
1558 // GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs     //
1559 //===----------------------------------------------------------------------===//
1560 
1561 // The following set of template specializations implement GraphTraits to treat
1562 // any VPBlockBase as a node in a graph of VPBlockBases. It's important to note
1563 // that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the
1564 // VPBlockBase is a VPRegionBlock, this specialization provides access to its
1565 // successors/predecessors but not to the blocks inside the region.
1566 
1567 template <> struct GraphTraits<VPBlockBase *> {
1568   using NodeRef = VPBlockBase *;
1569   using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
1570 
1571   static NodeRef getEntryNode(NodeRef N) { return N; }
1572 
1573   static inline ChildIteratorType child_begin(NodeRef N) {
1574     return N->getSuccessors().begin();
1575   }
1576 
1577   static inline ChildIteratorType child_end(NodeRef N) {
1578     return N->getSuccessors().end();
1579   }
1580 };
1581 
1582 template <> struct GraphTraits<const VPBlockBase *> {
1583   using NodeRef = const VPBlockBase *;
1584   using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::const_iterator;
1585 
1586   static NodeRef getEntryNode(NodeRef N) { return N; }
1587 
1588   static inline ChildIteratorType child_begin(NodeRef N) {
1589     return N->getSuccessors().begin();
1590   }
1591 
1592   static inline ChildIteratorType child_end(NodeRef N) {
1593     return N->getSuccessors().end();
1594   }
1595 };
1596 
1597 // Inverse order specialization for VPBasicBlocks. Predecessors are used instead
1598 // of successors for the inverse traversal.
1599 template <> struct GraphTraits<Inverse<VPBlockBase *>> {
1600   using NodeRef = VPBlockBase *;
1601   using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
1602 
1603   static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; }
1604 
1605   static inline ChildIteratorType child_begin(NodeRef N) {
1606     return N->getPredecessors().begin();
1607   }
1608 
1609   static inline ChildIteratorType child_end(NodeRef N) {
1610     return N->getPredecessors().end();
1611   }
1612 };
1613 
1614 // The following set of template specializations implement GraphTraits to
1615 // treat VPRegionBlock as a graph and recurse inside its nodes. It's important
1616 // to note that the blocks inside the VPRegionBlock are treated as VPBlockBases
1617 // (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so
1618 // there won't be automatic recursion into other VPBlockBases that turn to be
1619 // VPRegionBlocks.
1620 
1621 template <>
1622 struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> {
1623   using GraphRef = VPRegionBlock *;
1624   using nodes_iterator = df_iterator<NodeRef>;
1625 
1626   static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
1627 
1628   static nodes_iterator nodes_begin(GraphRef N) {
1629     return nodes_iterator::begin(N->getEntry());
1630   }
1631 
1632   static nodes_iterator nodes_end(GraphRef N) {
1633     // df_iterator::end() returns an empty iterator so the node used doesn't
1634     // matter.
1635     return nodes_iterator::end(N);
1636   }
1637 };
1638 
1639 template <>
1640 struct GraphTraits<const VPRegionBlock *>
1641     : public GraphTraits<const VPBlockBase *> {
1642   using GraphRef = const VPRegionBlock *;
1643   using nodes_iterator = df_iterator<NodeRef>;
1644 
1645   static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
1646 
1647   static nodes_iterator nodes_begin(GraphRef N) {
1648     return nodes_iterator::begin(N->getEntry());
1649   }
1650 
1651   static nodes_iterator nodes_end(GraphRef N) {
1652     // df_iterator::end() returns an empty iterator so the node used doesn't
1653     // matter.
1654     return nodes_iterator::end(N);
1655   }
1656 };
1657 
1658 template <>
1659 struct GraphTraits<Inverse<VPRegionBlock *>>
1660     : public GraphTraits<Inverse<VPBlockBase *>> {
1661   using GraphRef = VPRegionBlock *;
1662   using nodes_iterator = df_iterator<NodeRef>;
1663 
1664   static NodeRef getEntryNode(Inverse<GraphRef> N) {
1665     return N.Graph->getExit();
1666   }
1667 
1668   static nodes_iterator nodes_begin(GraphRef N) {
1669     return nodes_iterator::begin(N->getExit());
1670   }
1671 
1672   static nodes_iterator nodes_end(GraphRef N) {
1673     // df_iterator::end() returns an empty iterator so the node used doesn't
1674     // matter.
1675     return nodes_iterator::end(N);
1676   }
1677 };
1678 
1679 /// VPlan models a candidate for vectorization, encoding various decisions take
1680 /// to produce efficient output IR, including which branches, basic-blocks and
1681 /// output IR instructions to generate, and their cost. VPlan holds a
1682 /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
1683 /// VPBlock.
1684 class VPlan {
1685   friend class VPlanPrinter;
1686   friend class VPSlotTracker;
1687 
1688   /// Hold the single entry to the Hierarchical CFG of the VPlan.
1689   VPBlockBase *Entry;
1690 
1691   /// Holds the VFs applicable to this VPlan.
1692   SmallSetVector<ElementCount, 2> VFs;
1693 
1694   /// Holds the name of the VPlan, for printing.
1695   std::string Name;
1696 
1697   /// Holds all the external definitions created for this VPlan.
1698   // TODO: Introduce a specific representation for external definitions in
1699   // VPlan. External definitions must be immutable and hold a pointer to its
1700   // underlying IR that will be used to implement its structural comparison
1701   // (operators '==' and '<').
1702   SmallPtrSet<VPValue *, 16> VPExternalDefs;
1703 
1704   /// Represents the backedge taken count of the original loop, for folding
1705   /// the tail.
1706   VPValue *BackedgeTakenCount = nullptr;
1707 
1708   /// Holds a mapping between Values and their corresponding VPValue inside
1709   /// VPlan.
1710   Value2VPValueTy Value2VPValue;
1711 
1712   /// Contains all VPValues that been allocated by addVPValue directly and need
1713   /// to be free when the plan's destructor is called.
1714   SmallVector<VPValue *, 16> VPValuesToFree;
1715 
1716   /// Holds the VPLoopInfo analysis for this VPlan.
1717   VPLoopInfo VPLInfo;
1718 
1719 public:
1720   VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {
1721     if (Entry)
1722       Entry->setPlan(this);
1723   }
1724 
1725   ~VPlan() {
1726     if (Entry) {
1727       VPValue DummyValue;
1728       for (VPBlockBase *Block : depth_first(Entry))
1729         Block->dropAllReferences(&DummyValue);
1730 
1731       VPBlockBase::deleteCFG(Entry);
1732     }
1733     for (VPValue *VPV : VPValuesToFree)
1734       delete VPV;
1735     if (BackedgeTakenCount)
1736       delete BackedgeTakenCount;
1737     for (VPValue *Def : VPExternalDefs)
1738       delete Def;
1739   }
1740 
1741   /// Generate the IR code for this VPlan.
1742   void execute(struct VPTransformState *State);
1743 
1744   VPBlockBase *getEntry() { return Entry; }
1745   const VPBlockBase *getEntry() const { return Entry; }
1746 
1747   VPBlockBase *setEntry(VPBlockBase *Block) {
1748     Entry = Block;
1749     Block->setPlan(this);
1750     return Entry;
1751   }
1752 
1753   /// The backedge taken count of the original loop.
1754   VPValue *getOrCreateBackedgeTakenCount() {
1755     if (!BackedgeTakenCount)
1756       BackedgeTakenCount = new VPValue();
1757     return BackedgeTakenCount;
1758   }
1759 
1760   void addVF(ElementCount VF) { VFs.insert(VF); }
1761 
1762   bool hasVF(ElementCount VF) { return VFs.count(VF); }
1763 
1764   const std::string &getName() const { return Name; }
1765 
1766   void setName(const Twine &newName) { Name = newName.str(); }
1767 
1768   /// Add \p VPVal to the pool of external definitions if it's not already
1769   /// in the pool.
1770   void addExternalDef(VPValue *VPVal) {
1771     VPExternalDefs.insert(VPVal);
1772   }
1773 
1774   void addVPValue(Value *V) {
1775     assert(V && "Trying to add a null Value to VPlan");
1776     assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
1777     VPValue *VPV = new VPValue(V);
1778     Value2VPValue[V] = VPV;
1779     VPValuesToFree.push_back(VPV);
1780   }
1781 
1782   void addVPValue(Value *V, VPValue *VPV) {
1783     assert(V && "Trying to add a null Value to VPlan");
1784     assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
1785     Value2VPValue[V] = VPV;
1786   }
1787 
1788   VPValue *getVPValue(Value *V) {
1789     assert(V && "Trying to get the VPValue of a null Value");
1790     assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
1791     return Value2VPValue[V];
1792   }
1793 
1794   VPValue *getOrAddVPValue(Value *V) {
1795     assert(V && "Trying to get or add the VPValue of a null Value");
1796     if (!Value2VPValue.count(V))
1797       addVPValue(V);
1798     return getVPValue(V);
1799   }
1800 
1801   void removeVPValueFor(Value *V) { Value2VPValue.erase(V); }
1802 
1803   /// Return the VPLoopInfo analysis for this VPlan.
1804   VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
1805   const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
1806 
1807   /// Dump the plan to stderr (for debugging).
1808   void dump() const;
1809 
1810   /// Returns a range mapping the values the range \p Operands to their
1811   /// corresponding VPValues.
1812   iterator_range<mapped_iterator<Use *, std::function<VPValue *(Value *)>>>
1813   mapToVPValues(User::op_range Operands) {
1814     std::function<VPValue *(Value *)> Fn = [this](Value *Op) {
1815       return getOrAddVPValue(Op);
1816     };
1817     return map_range(Operands, Fn);
1818   }
1819 
1820 private:
1821   /// Add to the given dominator tree the header block and every new basic block
1822   /// that was created between it and the latch block, inclusive.
1823   static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB,
1824                                   BasicBlock *LoopPreHeaderBB,
1825                                   BasicBlock *LoopExitBB);
1826 };
1827 
1828 /// VPlanPrinter prints a given VPlan to a given output stream. The printing is
1829 /// indented and follows the dot format.
1830 class VPlanPrinter {
1831   friend inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan);
1832   friend inline raw_ostream &operator<<(raw_ostream &OS,
1833                                         const struct VPlanIngredient &I);
1834 
1835 private:
1836   raw_ostream &OS;
1837   const VPlan &Plan;
1838   unsigned Depth = 0;
1839   unsigned TabWidth = 2;
1840   std::string Indent;
1841   unsigned BID = 0;
1842   SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
1843 
1844   VPSlotTracker SlotTracker;
1845 
1846   VPlanPrinter(raw_ostream &O, const VPlan &P)
1847       : OS(O), Plan(P), SlotTracker(&P) {}
1848 
1849   /// Handle indentation.
1850   void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
1851 
1852   /// Print a given \p Block of the Plan.
1853   void dumpBlock(const VPBlockBase *Block);
1854 
1855   /// Print the information related to the CFG edges going out of a given
1856   /// \p Block, followed by printing the successor blocks themselves.
1857   void dumpEdges(const VPBlockBase *Block);
1858 
1859   /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
1860   /// its successor blocks.
1861   void dumpBasicBlock(const VPBasicBlock *BasicBlock);
1862 
1863   /// Print a given \p Region of the Plan.
1864   void dumpRegion(const VPRegionBlock *Region);
1865 
1866   unsigned getOrCreateBID(const VPBlockBase *Block) {
1867     return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
1868   }
1869 
1870   const Twine getOrCreateName(const VPBlockBase *Block);
1871 
1872   const Twine getUID(const VPBlockBase *Block);
1873 
1874   /// Print the information related to a CFG edge between two VPBlockBases.
1875   void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
1876                 const Twine &Label);
1877 
1878   void dump();
1879 
1880   static void printAsIngredient(raw_ostream &O, const Value *V);
1881 };
1882 
1883 struct VPlanIngredient {
1884   const Value *V;
1885 
1886   VPlanIngredient(const Value *V) : V(V) {}
1887 };
1888 
1889 inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
1890   VPlanPrinter::printAsIngredient(OS, I.V);
1891   return OS;
1892 }
1893 
1894 inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
1895   VPlanPrinter Printer(OS, Plan);
1896   Printer.dump();
1897   return OS;
1898 }
1899 
1900 //===----------------------------------------------------------------------===//
1901 // VPlan Utilities
1902 //===----------------------------------------------------------------------===//
1903 
1904 /// Class that provides utilities for VPBlockBases in VPlan.
1905 class VPBlockUtils {
1906 public:
1907   VPBlockUtils() = delete;
1908 
1909   /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
1910   /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
1911   /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. If \p BlockPtr
1912   /// has more than one successor, its conditional bit is propagated to \p
1913   /// NewBlock. \p NewBlock must have neither successors nor predecessors.
1914   static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
1915     assert(NewBlock->getSuccessors().empty() &&
1916            "Can't insert new block with successors.");
1917     // TODO: move successors from BlockPtr to NewBlock when this functionality
1918     // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr
1919     // already has successors.
1920     BlockPtr->setOneSuccessor(NewBlock);
1921     NewBlock->setPredecessors({BlockPtr});
1922     NewBlock->setParent(BlockPtr->getParent());
1923   }
1924 
1925   /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
1926   /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
1927   /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
1928   /// parent to \p IfTrue and \p IfFalse. \p Condition is set as the successor
1929   /// selector. \p BlockPtr must have no successors and \p IfTrue and \p IfFalse
1930   /// must have neither successors nor predecessors.
1931   static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
1932                                    VPValue *Condition, VPBlockBase *BlockPtr) {
1933     assert(IfTrue->getSuccessors().empty() &&
1934            "Can't insert IfTrue with successors.");
1935     assert(IfFalse->getSuccessors().empty() &&
1936            "Can't insert IfFalse with successors.");
1937     BlockPtr->setTwoSuccessors(IfTrue, IfFalse, Condition);
1938     IfTrue->setPredecessors({BlockPtr});
1939     IfFalse->setPredecessors({BlockPtr});
1940     IfTrue->setParent(BlockPtr->getParent());
1941     IfFalse->setParent(BlockPtr->getParent());
1942   }
1943 
1944   /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
1945   /// the successors of \p From and \p From to the predecessors of \p To. Both
1946   /// VPBlockBases must have the same parent, which can be null. Both
1947   /// VPBlockBases can be already connected to other VPBlockBases.
1948   static void connectBlocks(VPBlockBase *From, VPBlockBase *To) {
1949     assert((From->getParent() == To->getParent()) &&
1950            "Can't connect two block with different parents");
1951     assert(From->getNumSuccessors() < 2 &&
1952            "Blocks can't have more than two successors.");
1953     From->appendSuccessor(To);
1954     To->appendPredecessor(From);
1955   }
1956 
1957   /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
1958   /// from the successors of \p From and \p From from the predecessors of \p To.
1959   static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) {
1960     assert(To && "Successor to disconnect is null.");
1961     From->removeSuccessor(To);
1962     To->removePredecessor(From);
1963   }
1964 
1965   /// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge.
1966   static bool isBackEdge(const VPBlockBase *FromBlock,
1967                          const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) {
1968     assert(FromBlock->getParent() == ToBlock->getParent() &&
1969            FromBlock->getParent() && "Must be in same region");
1970     const VPLoop *FromLoop = VPLI->getLoopFor(FromBlock);
1971     const VPLoop *ToLoop = VPLI->getLoopFor(ToBlock);
1972     if (!FromLoop || !ToLoop || FromLoop != ToLoop)
1973       return false;
1974 
1975     // A back-edge is a branch from the loop latch to its header.
1976     return ToLoop->isLoopLatch(FromBlock) && ToBlock == ToLoop->getHeader();
1977   }
1978 
1979   /// Returns true if \p Block is a loop latch
1980   static bool blockIsLoopLatch(const VPBlockBase *Block,
1981                                const VPLoopInfo *VPLInfo) {
1982     if (const VPLoop *ParentVPL = VPLInfo->getLoopFor(Block))
1983       return ParentVPL->isLoopLatch(Block);
1984 
1985     return false;
1986   }
1987 
1988   /// Count and return the number of succesors of \p PredBlock excluding any
1989   /// backedges.
1990   static unsigned countSuccessorsNoBE(VPBlockBase *PredBlock,
1991                                       VPLoopInfo *VPLI) {
1992     unsigned Count = 0;
1993     for (VPBlockBase *SuccBlock : PredBlock->getSuccessors()) {
1994       if (!VPBlockUtils::isBackEdge(PredBlock, SuccBlock, VPLI))
1995         Count++;
1996     }
1997     return Count;
1998   }
1999 };
2000 
2001 class VPInterleavedAccessInfo {
2002   DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
2003       InterleaveGroupMap;
2004 
2005   /// Type for mapping of instruction based interleave groups to VPInstruction
2006   /// interleave groups
2007   using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
2008                              InterleaveGroup<VPInstruction> *>;
2009 
2010   /// Recursively \p Region and populate VPlan based interleave groups based on
2011   /// \p IAI.
2012   void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
2013                    InterleavedAccessInfo &IAI);
2014   /// Recursively traverse \p Block and populate VPlan based interleave groups
2015   /// based on \p IAI.
2016   void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
2017                   InterleavedAccessInfo &IAI);
2018 
2019 public:
2020   VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
2021 
2022   ~VPInterleavedAccessInfo() {
2023     SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
2024     // Avoid releasing a pointer twice.
2025     for (auto &I : InterleaveGroupMap)
2026       DelSet.insert(I.second);
2027     for (auto *Ptr : DelSet)
2028       delete Ptr;
2029   }
2030 
2031   /// Get the interleave group that \p Instr belongs to.
2032   ///
2033   /// \returns nullptr if doesn't have such group.
2034   InterleaveGroup<VPInstruction> *
2035   getInterleaveGroup(VPInstruction *Instr) const {
2036     return InterleaveGroupMap.lookup(Instr);
2037   }
2038 };
2039 
2040 /// Class that maps (parts of) an existing VPlan to trees of combined
2041 /// VPInstructions.
2042 class VPlanSlp {
2043   enum class OpMode { Failed, Load, Opcode };
2044 
2045   /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
2046   /// DenseMap keys.
2047   struct BundleDenseMapInfo {
2048     static SmallVector<VPValue *, 4> getEmptyKey() {
2049       return {reinterpret_cast<VPValue *>(-1)};
2050     }
2051 
2052     static SmallVector<VPValue *, 4> getTombstoneKey() {
2053       return {reinterpret_cast<VPValue *>(-2)};
2054     }
2055 
2056     static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
2057       return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
2058     }
2059 
2060     static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
2061                         const SmallVector<VPValue *, 4> &RHS) {
2062       return LHS == RHS;
2063     }
2064   };
2065 
2066   /// Mapping of values in the original VPlan to a combined VPInstruction.
2067   DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
2068       BundleToCombined;
2069 
2070   VPInterleavedAccessInfo &IAI;
2071 
2072   /// Basic block to operate on. For now, only instructions in a single BB are
2073   /// considered.
2074   const VPBasicBlock &BB;
2075 
2076   /// Indicates whether we managed to combine all visited instructions or not.
2077   bool CompletelySLP = true;
2078 
2079   /// Width of the widest combined bundle in bits.
2080   unsigned WidestBundleBits = 0;
2081 
2082   using MultiNodeOpTy =
2083       typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
2084 
2085   // Input operand bundles for the current multi node. Each multi node operand
2086   // bundle contains values not matching the multi node's opcode. They will
2087   // be reordered in reorderMultiNodeOps, once we completed building a
2088   // multi node.
2089   SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
2090 
2091   /// Indicates whether we are building a multi node currently.
2092   bool MultiNodeActive = false;
2093 
2094   /// Check if we can vectorize Operands together.
2095   bool areVectorizable(ArrayRef<VPValue *> Operands) const;
2096 
2097   /// Add combined instruction \p New for the bundle \p Operands.
2098   void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
2099 
2100   /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
2101   VPInstruction *markFailed();
2102 
2103   /// Reorder operands in the multi node to maximize sequential memory access
2104   /// and commutative operations.
2105   SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
2106 
2107   /// Choose the best candidate to use for the lane after \p Last. The set of
2108   /// candidates to choose from are values with an opcode matching \p Last's
2109   /// or loads consecutive to \p Last.
2110   std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
2111                                        SmallPtrSetImpl<VPValue *> &Candidates,
2112                                        VPInterleavedAccessInfo &IAI);
2113 
2114   /// Print bundle \p Values to dbgs().
2115   void dumpBundle(ArrayRef<VPValue *> Values);
2116 
2117 public:
2118   VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
2119 
2120   ~VPlanSlp() = default;
2121 
2122   /// Tries to build an SLP tree rooted at \p Operands and returns a
2123   /// VPInstruction combining \p Operands, if they can be combined.
2124   VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
2125 
2126   /// Return the width of the widest combined bundle in bits.
2127   unsigned getWidestBundleBits() const { return WidestBundleBits; }
2128 
2129   /// Return true if all visited instruction can be combined.
2130   bool isCompletelySLP() const { return CompletelySLP; }
2131 };
2132 } // end namespace llvm
2133 
2134 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
2135