1 //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file contains the declarations of the Vectorization Plan base classes:
11 /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12 ///    VPBlockBase, together implementing a Hierarchical CFG;
13 /// 2. Specializations of GraphTraits that allow VPBlockBase graphs to be
14 ///    treated as proper graphs for generic algorithms;
15 /// 3. Pure virtual VPRecipeBase serving as the base class for recipes contained
16 ///    within VPBasicBlocks;
17 /// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18 ///    instruction;
19 /// 5. The VPlan class holding a candidate for vectorization;
20 /// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21 /// These are documented in docs/VectorizationPlan.rst.
22 //
23 //===----------------------------------------------------------------------===//
24 
25 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27 
28 #include "VPlanValue.h"
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/DepthFirstIterator.h"
31 #include "llvm/ADT/GraphTraits.h"
32 #include "llvm/ADT/MapVector.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/SmallBitVector.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Twine.h"
38 #include "llvm/ADT/ilist.h"
39 #include "llvm/ADT/ilist_node.h"
40 #include "llvm/Analysis/LoopInfo.h"
41 #include "llvm/Analysis/VectorUtils.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/FMF.h"
44 #include <algorithm>
45 #include <cassert>
46 #include <cstddef>
47 #include <string>
48 
49 namespace llvm {
50 
51 class BasicBlock;
52 class DominatorTree;
53 class InductionDescriptor;
54 class InnerLoopVectorizer;
55 class IRBuilderBase;
56 class LoopInfo;
57 class raw_ostream;
58 class RecurrenceDescriptor;
59 class Value;
60 class VPBasicBlock;
61 class VPRegionBlock;
62 class VPlan;
63 class VPReplicateRecipe;
64 class VPlanSlp;
65 
66 /// Returns a calculation for the total number of elements for a given \p VF.
67 /// For fixed width vectors this value is a constant, whereas for scalable
68 /// vectors it is an expression determined at runtime.
69 Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
70 
71 /// Return a value for Step multiplied by VF.
72 Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
73                        int64_t Step);
74 
75 /// A range of powers-of-2 vectorization factors with fixed start and
76 /// adjustable end. The range includes start and excludes end, e.g.,:
77 /// [1, 9) = {1, 2, 4, 8}
78 struct VFRange {
79   // A power of 2.
80   const ElementCount Start;
81 
82   // Need not be a power of 2. If End <= Start range is empty.
83   ElementCount End;
84 
85   bool isEmpty() const {
86     return End.getKnownMinValue() <= Start.getKnownMinValue();
87   }
88 
89   VFRange(const ElementCount &Start, const ElementCount &End)
90       : Start(Start), End(End) {
91     assert(Start.isScalable() == End.isScalable() &&
92            "Both Start and End should have the same scalable flag");
93     assert(isPowerOf2_32(Start.getKnownMinValue()) &&
94            "Expected Start to be a power of 2");
95   }
96 };
97 
98 using VPlanPtr = std::unique_ptr<VPlan>;
99 
100 /// In what follows, the term "input IR" refers to code that is fed into the
101 /// vectorizer whereas the term "output IR" refers to code that is generated by
102 /// the vectorizer.
103 
104 /// VPLane provides a way to access lanes in both fixed width and scalable
105 /// vectors, where for the latter the lane index sometimes needs calculating
106 /// as a runtime expression.
107 class VPLane {
108 public:
109   /// Kind describes how to interpret Lane.
110   enum class Kind : uint8_t {
111     /// For First, Lane is the index into the first N elements of a
112     /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
113     First,
114     /// For ScalableLast, Lane is the offset from the start of the last
115     /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
116     /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
117     /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
118     ScalableLast
119   };
120 
121 private:
122   /// in [0..VF)
123   unsigned Lane;
124 
125   /// Indicates how the Lane should be interpreted, as described above.
126   Kind LaneKind;
127 
128 public:
129   VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
130 
131   static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
132 
133   static VPLane getLastLaneForVF(const ElementCount &VF) {
134     unsigned LaneOffset = VF.getKnownMinValue() - 1;
135     Kind LaneKind;
136     if (VF.isScalable())
137       // In this case 'LaneOffset' refers to the offset from the start of the
138       // last subvector with VF.getKnownMinValue() elements.
139       LaneKind = VPLane::Kind::ScalableLast;
140     else
141       LaneKind = VPLane::Kind::First;
142     return VPLane(LaneOffset, LaneKind);
143   }
144 
145   /// Returns a compile-time known value for the lane index and asserts if the
146   /// lane can only be calculated at runtime.
147   unsigned getKnownLane() const {
148     assert(LaneKind == Kind::First);
149     return Lane;
150   }
151 
152   /// Returns an expression describing the lane index that can be used at
153   /// runtime.
154   Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
155 
156   /// Returns the Kind of lane offset.
157   Kind getKind() const { return LaneKind; }
158 
159   /// Returns true if this is the first lane of the whole vector.
160   bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
161 
162   /// Maps the lane to a cache index based on \p VF.
163   unsigned mapToCacheIndex(const ElementCount &VF) const {
164     switch (LaneKind) {
165     case VPLane::Kind::ScalableLast:
166       assert(VF.isScalable() && Lane < VF.getKnownMinValue());
167       return VF.getKnownMinValue() + Lane;
168     default:
169       assert(Lane < VF.getKnownMinValue());
170       return Lane;
171     }
172   }
173 
174   /// Returns the maxmimum number of lanes that we are able to consider
175   /// caching for \p VF.
176   static unsigned getNumCachedLanes(const ElementCount &VF) {
177     return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
178   }
179 };
180 
181 /// VPIteration represents a single point in the iteration space of the output
182 /// (vectorized and/or unrolled) IR loop.
183 struct VPIteration {
184   /// in [0..UF)
185   unsigned Part;
186 
187   VPLane Lane;
188 
189   VPIteration(unsigned Part, unsigned Lane,
190               VPLane::Kind Kind = VPLane::Kind::First)
191       : Part(Part), Lane(Lane, Kind) {}
192 
193   VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}
194 
195   bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }
196 };
197 
198 /// VPTransformState holds information passed down when "executing" a VPlan,
199 /// needed for generating the output IR.
200 struct VPTransformState {
201   VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
202                    DominatorTree *DT, IRBuilderBase &Builder,
203                    InnerLoopVectorizer *ILV, VPlan *Plan)
204       : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan) {
205   }
206 
207   /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
208   ElementCount VF;
209   unsigned UF;
210 
211   /// Hold the indices to generate specific scalar instructions. Null indicates
212   /// that all instances are to be generated, using either scalar or vector
213   /// instructions.
214   Optional<VPIteration> Instance;
215 
216   struct DataState {
217     /// A type for vectorized values in the new loop. Each value from the
218     /// original loop, when vectorized, is represented by UF vector values in
219     /// the new unrolled loop, where UF is the unroll factor.
220     typedef SmallVector<Value *, 2> PerPartValuesTy;
221 
222     DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
223 
224     using ScalarsPerPartValuesTy = SmallVector<SmallVector<Value *, 4>, 2>;
225     DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars;
226   } Data;
227 
228   /// Get the generated Value for a given VPValue and a given Part. Note that
229   /// as some Defs are still created by ILV and managed in its ValueMap, this
230   /// method will delegate the call to ILV in such cases in order to provide
231   /// callers a consistent API.
232   /// \see set.
233   Value *get(VPValue *Def, unsigned Part);
234 
235   /// Get the generated Value for a given VPValue and given Part and Lane.
236   Value *get(VPValue *Def, const VPIteration &Instance);
237 
238   bool hasVectorValue(VPValue *Def, unsigned Part) {
239     auto I = Data.PerPartOutput.find(Def);
240     return I != Data.PerPartOutput.end() && Part < I->second.size() &&
241            I->second[Part];
242   }
243 
244   bool hasAnyVectorValue(VPValue *Def) const {
245     return Data.PerPartOutput.find(Def) != Data.PerPartOutput.end();
246   }
247 
248   bool hasScalarValue(VPValue *Def, VPIteration Instance) {
249     auto I = Data.PerPartScalars.find(Def);
250     if (I == Data.PerPartScalars.end())
251       return false;
252     unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
253     return Instance.Part < I->second.size() &&
254            CacheIdx < I->second[Instance.Part].size() &&
255            I->second[Instance.Part][CacheIdx];
256   }
257 
258   /// Set the generated Value for a given VPValue and a given Part.
259   void set(VPValue *Def, Value *V, unsigned Part) {
260     if (!Data.PerPartOutput.count(Def)) {
261       DataState::PerPartValuesTy Entry(UF);
262       Data.PerPartOutput[Def] = Entry;
263     }
264     Data.PerPartOutput[Def][Part] = V;
265   }
266   /// Reset an existing vector value for \p Def and a given \p Part.
267   void reset(VPValue *Def, Value *V, unsigned Part) {
268     auto Iter = Data.PerPartOutput.find(Def);
269     assert(Iter != Data.PerPartOutput.end() &&
270            "need to overwrite existing value");
271     Iter->second[Part] = V;
272   }
273 
274   /// Set the generated scalar \p V for \p Def and the given \p Instance.
275   void set(VPValue *Def, Value *V, const VPIteration &Instance) {
276     auto Iter = Data.PerPartScalars.insert({Def, {}});
277     auto &PerPartVec = Iter.first->second;
278     while (PerPartVec.size() <= Instance.Part)
279       PerPartVec.emplace_back();
280     auto &Scalars = PerPartVec[Instance.Part];
281     unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
282     while (Scalars.size() <= CacheIdx)
283       Scalars.push_back(nullptr);
284     assert(!Scalars[CacheIdx] && "should overwrite existing value");
285     Scalars[CacheIdx] = V;
286   }
287 
288   /// Reset an existing scalar value for \p Def and a given \p Instance.
289   void reset(VPValue *Def, Value *V, const VPIteration &Instance) {
290     auto Iter = Data.PerPartScalars.find(Def);
291     assert(Iter != Data.PerPartScalars.end() &&
292            "need to overwrite existing value");
293     assert(Instance.Part < Iter->second.size() &&
294            "need to overwrite existing value");
295     unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
296     assert(CacheIdx < Iter->second[Instance.Part].size() &&
297            "need to overwrite existing value");
298     Iter->second[Instance.Part][CacheIdx] = V;
299   }
300 
301   /// Hold state information used when constructing the CFG of the output IR,
302   /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
303   struct CFGState {
304     /// The previous VPBasicBlock visited. Initially set to null.
305     VPBasicBlock *PrevVPBB = nullptr;
306 
307     /// The previous IR BasicBlock created or used. Initially set to the new
308     /// header BasicBlock.
309     BasicBlock *PrevBB = nullptr;
310 
311     /// The last IR BasicBlock in the output IR. Set to the exit block of the
312     /// vector loop.
313     BasicBlock *ExitBB = nullptr;
314 
315     /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
316     /// of replication, maps the BasicBlock of the last replica created.
317     SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
318 
319     CFGState() = default;
320 
321     /// Returns the BasicBlock* mapped to the pre-header of the loop region
322     /// containing \p R.
323     BasicBlock *getPreheaderBBFor(VPRecipeBase *R);
324   } CFG;
325 
326   /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
327   LoopInfo *LI;
328 
329   /// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
330   DominatorTree *DT;
331 
332   /// Hold a reference to the IRBuilder used to generate output IR code.
333   IRBuilderBase &Builder;
334 
335   VPValue2ValueTy VPValue2Value;
336 
337   /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF).
338   Value *CanonicalIV = nullptr;
339 
340   /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
341   InnerLoopVectorizer *ILV;
342 
343   /// Pointer to the VPlan code is generated for.
344   VPlan *Plan;
345 
346   /// Holds recipes that may generate a poison value that is used after
347   /// vectorization, even when their operands are not poison.
348   SmallPtrSet<VPRecipeBase *, 16> MayGeneratePoisonRecipes;
349 
350   /// The loop object for the current parent region, or nullptr.
351   Loop *CurrentVectorLoop = nullptr;
352 };
353 
354 /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
355 /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
356 class VPBlockBase {
357   friend class VPBlockUtils;
358 
359   const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
360 
361   /// An optional name for the block.
362   std::string Name;
363 
364   /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
365   /// it is a topmost VPBlockBase.
366   VPRegionBlock *Parent = nullptr;
367 
368   /// List of predecessor blocks.
369   SmallVector<VPBlockBase *, 1> Predecessors;
370 
371   /// List of successor blocks.
372   SmallVector<VPBlockBase *, 1> Successors;
373 
374   /// VPlan containing the block. Can only be set on the entry block of the
375   /// plan.
376   VPlan *Plan = nullptr;
377 
378   /// Add \p Successor as the last successor to this block.
379   void appendSuccessor(VPBlockBase *Successor) {
380     assert(Successor && "Cannot add nullptr successor!");
381     Successors.push_back(Successor);
382   }
383 
384   /// Add \p Predecessor as the last predecessor to this block.
385   void appendPredecessor(VPBlockBase *Predecessor) {
386     assert(Predecessor && "Cannot add nullptr predecessor!");
387     Predecessors.push_back(Predecessor);
388   }
389 
390   /// Remove \p Predecessor from the predecessors of this block.
391   void removePredecessor(VPBlockBase *Predecessor) {
392     auto Pos = find(Predecessors, Predecessor);
393     assert(Pos && "Predecessor does not exist");
394     Predecessors.erase(Pos);
395   }
396 
397   /// Remove \p Successor from the successors of this block.
398   void removeSuccessor(VPBlockBase *Successor) {
399     auto Pos = find(Successors, Successor);
400     assert(Pos && "Successor does not exist");
401     Successors.erase(Pos);
402   }
403 
404 protected:
405   VPBlockBase(const unsigned char SC, const std::string &N)
406       : SubclassID(SC), Name(N) {}
407 
408 public:
409   /// An enumeration for keeping track of the concrete subclass of VPBlockBase
410   /// that are actually instantiated. Values of this enumeration are kept in the
411   /// SubclassID field of the VPBlockBase objects. They are used for concrete
412   /// type identification.
413   using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC };
414 
415   using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
416 
417   virtual ~VPBlockBase() = default;
418 
419   const std::string &getName() const { return Name; }
420 
421   void setName(const Twine &newName) { Name = newName.str(); }
422 
423   /// \return an ID for the concrete type of this object.
424   /// This is used to implement the classof checks. This should not be used
425   /// for any other purpose, as the values may change as LLVM evolves.
426   unsigned getVPBlockID() const { return SubclassID; }
427 
428   VPRegionBlock *getParent() { return Parent; }
429   const VPRegionBlock *getParent() const { return Parent; }
430 
431   /// \return A pointer to the plan containing the current block.
432   VPlan *getPlan();
433   const VPlan *getPlan() const;
434 
435   /// Sets the pointer of the plan containing the block. The block must be the
436   /// entry block into the VPlan.
437   void setPlan(VPlan *ParentPlan);
438 
439   void setParent(VPRegionBlock *P) { Parent = P; }
440 
441   /// \return the VPBasicBlock that is the entry of this VPBlockBase,
442   /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
443   /// VPBlockBase is a VPBasicBlock, it is returned.
444   const VPBasicBlock *getEntryBasicBlock() const;
445   VPBasicBlock *getEntryBasicBlock();
446 
447   /// \return the VPBasicBlock that is the exiting this VPBlockBase,
448   /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
449   /// VPBlockBase is a VPBasicBlock, it is returned.
450   const VPBasicBlock *getExitingBasicBlock() const;
451   VPBasicBlock *getExitingBasicBlock();
452 
453   const VPBlocksTy &getSuccessors() const { return Successors; }
454   VPBlocksTy &getSuccessors() { return Successors; }
455 
456   iterator_range<VPBlockBase **> successors() { return Successors; }
457 
458   const VPBlocksTy &getPredecessors() const { return Predecessors; }
459   VPBlocksTy &getPredecessors() { return Predecessors; }
460 
461   /// \return the successor of this VPBlockBase if it has a single successor.
462   /// Otherwise return a null pointer.
463   VPBlockBase *getSingleSuccessor() const {
464     return (Successors.size() == 1 ? *Successors.begin() : nullptr);
465   }
466 
467   /// \return the predecessor of this VPBlockBase if it has a single
468   /// predecessor. Otherwise return a null pointer.
469   VPBlockBase *getSinglePredecessor() const {
470     return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
471   }
472 
473   size_t getNumSuccessors() const { return Successors.size(); }
474   size_t getNumPredecessors() const { return Predecessors.size(); }
475 
476   /// An Enclosing Block of a block B is any block containing B, including B
477   /// itself. \return the closest enclosing block starting from "this", which
478   /// has successors. \return the root enclosing block if all enclosing blocks
479   /// have no successors.
480   VPBlockBase *getEnclosingBlockWithSuccessors();
481 
482   /// \return the closest enclosing block starting from "this", which has
483   /// predecessors. \return the root enclosing block if all enclosing blocks
484   /// have no predecessors.
485   VPBlockBase *getEnclosingBlockWithPredecessors();
486 
487   /// \return the successors either attached directly to this VPBlockBase or, if
488   /// this VPBlockBase is the exit block of a VPRegionBlock and has no
489   /// successors of its own, search recursively for the first enclosing
490   /// VPRegionBlock that has successors and return them. If no such
491   /// VPRegionBlock exists, return the (empty) successors of the topmost
492   /// VPBlockBase reached.
493   const VPBlocksTy &getHierarchicalSuccessors() {
494     return getEnclosingBlockWithSuccessors()->getSuccessors();
495   }
496 
497   /// \return the hierarchical successor of this VPBlockBase if it has a single
498   /// hierarchical successor. Otherwise return a null pointer.
499   VPBlockBase *getSingleHierarchicalSuccessor() {
500     return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
501   }
502 
503   /// \return the predecessors either attached directly to this VPBlockBase or,
504   /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
505   /// predecessors of its own, search recursively for the first enclosing
506   /// VPRegionBlock that has predecessors and return them. If no such
507   /// VPRegionBlock exists, return the (empty) predecessors of the topmost
508   /// VPBlockBase reached.
509   const VPBlocksTy &getHierarchicalPredecessors() {
510     return getEnclosingBlockWithPredecessors()->getPredecessors();
511   }
512 
513   /// \return the hierarchical predecessor of this VPBlockBase if it has a
514   /// single hierarchical predecessor. Otherwise return a null pointer.
515   VPBlockBase *getSingleHierarchicalPredecessor() {
516     return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
517   }
518 
519   /// Set a given VPBlockBase \p Successor as the single successor of this
520   /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
521   /// This VPBlockBase must have no successors.
522   void setOneSuccessor(VPBlockBase *Successor) {
523     assert(Successors.empty() && "Setting one successor when others exist.");
524     appendSuccessor(Successor);
525   }
526 
527   /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
528   /// successors of this VPBlockBase. This VPBlockBase is not added as
529   /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
530   /// successors.
531   void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
532     assert(Successors.empty() && "Setting two successors when others exist.");
533     appendSuccessor(IfTrue);
534     appendSuccessor(IfFalse);
535   }
536 
537   /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
538   /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
539   /// as successor of any VPBasicBlock in \p NewPreds.
540   void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
541     assert(Predecessors.empty() && "Block predecessors already set.");
542     for (auto *Pred : NewPreds)
543       appendPredecessor(Pred);
544   }
545 
546   /// Remove all the predecessor of this block.
547   void clearPredecessors() { Predecessors.clear(); }
548 
549   /// Remove all the successors of this block.
550   void clearSuccessors() { Successors.clear(); }
551 
552   /// The method which generates the output IR that correspond to this
553   /// VPBlockBase, thereby "executing" the VPlan.
554   virtual void execute(struct VPTransformState *State) = 0;
555 
556   /// Delete all blocks reachable from a given VPBlockBase, inclusive.
557   static void deleteCFG(VPBlockBase *Entry);
558 
559   /// Return true if it is legal to hoist instructions into this block.
560   bool isLegalToHoistInto() {
561     // There are currently no constraints that prevent an instruction to be
562     // hoisted into a VPBlockBase.
563     return true;
564   }
565 
566   /// Replace all operands of VPUsers in the block with \p NewValue and also
567   /// replaces all uses of VPValues defined in the block with NewValue.
568   virtual void dropAllReferences(VPValue *NewValue) = 0;
569 
570 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
571   void printAsOperand(raw_ostream &OS, bool PrintType) const {
572     OS << getName();
573   }
574 
575   /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
576   /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
577   /// consequtive numbers.
578   ///
579   /// Note that the numbering is applied to the whole VPlan, so printing
580   /// individual blocks is consistent with the whole VPlan printing.
581   virtual void print(raw_ostream &O, const Twine &Indent,
582                      VPSlotTracker &SlotTracker) const = 0;
583 
584   /// Print plain-text dump of this VPlan to \p O.
585   void print(raw_ostream &O) const {
586     VPSlotTracker SlotTracker(getPlan());
587     print(O, "", SlotTracker);
588   }
589 
590   /// Print the successors of this block to \p O, prefixing all lines with \p
591   /// Indent.
592   void printSuccessors(raw_ostream &O, const Twine &Indent) const;
593 
594   /// Dump this VPBlockBase to dbgs().
595   LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
596 #endif
597 };
598 
599 /// A value that is used outside the VPlan. The operand of the user needs to be
600 /// added to the associated LCSSA phi node.
601 class VPLiveOut : public VPUser {
602   PHINode *Phi;
603 
604 public:
605   VPLiveOut(PHINode *Phi, VPValue *Op)
606       : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
607 
608   /// Fixup the wrapped LCSSA phi node in the unique exit block.  This simply
609   /// means we need to add the appropriate incoming value from the middle
610   /// block as exiting edges from the scalar epilogue loop (if present) are
611   /// already in place, and we exit the vector loop exclusively to the middle
612   /// block.
613   void fixPhi(VPlan &Plan, VPTransformState &State);
614 
615   /// Returns true if the VPLiveOut uses scalars of operand \p Op.
616   bool usesScalars(const VPValue *Op) const override {
617     assert(is_contained(operands(), Op) &&
618            "Op must be an operand of the recipe");
619     return true;
620   }
621 
622   PHINode *getPhi() const { return Phi; }
623 };
624 
625 /// VPRecipeBase is a base class modeling a sequence of one or more output IR
626 /// instructions. VPRecipeBase owns the the VPValues it defines through VPDef
627 /// and is responsible for deleting its defined values. Single-value
628 /// VPRecipeBases that also inherit from VPValue must make sure to inherit from
629 /// VPRecipeBase before VPValue.
630 class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
631                      public VPDef,
632                      public VPUser {
633   friend VPBasicBlock;
634   friend class VPBlockUtils;
635 
636   /// Each VPRecipe belongs to a single VPBasicBlock.
637   VPBasicBlock *Parent = nullptr;
638 
639 public:
640   VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands)
641       : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe) {}
642 
643   template <typename IterT>
644   VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands)
645       : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe) {}
646   virtual ~VPRecipeBase() = default;
647 
648   /// \return the VPBasicBlock which this VPRecipe belongs to.
649   VPBasicBlock *getParent() { return Parent; }
650   const VPBasicBlock *getParent() const { return Parent; }
651 
652   /// The method which generates the output IR instructions that correspond to
653   /// this VPRecipe, thereby "executing" the VPlan.
654   virtual void execute(struct VPTransformState &State) = 0;
655 
656   /// Insert an unlinked recipe into a basic block immediately before
657   /// the specified recipe.
658   void insertBefore(VPRecipeBase *InsertPos);
659   /// Insert an unlinked recipe into \p BB immediately before the insertion
660   /// point \p IP;
661   void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
662 
663   /// Insert an unlinked Recipe into a basic block immediately after
664   /// the specified Recipe.
665   void insertAfter(VPRecipeBase *InsertPos);
666 
667   /// Unlink this recipe from its current VPBasicBlock and insert it into
668   /// the VPBasicBlock that MovePos lives in, right after MovePos.
669   void moveAfter(VPRecipeBase *MovePos);
670 
671   /// Unlink this recipe and insert into BB before I.
672   ///
673   /// \pre I is a valid iterator into BB.
674   void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
675 
676   /// This method unlinks 'this' from the containing basic block, but does not
677   /// delete it.
678   void removeFromParent();
679 
680   /// This method unlinks 'this' from the containing basic block and deletes it.
681   ///
682   /// \returns an iterator pointing to the element after the erased one
683   iplist<VPRecipeBase>::iterator eraseFromParent();
684 
685   /// Returns the underlying instruction, if the recipe is a VPValue or nullptr
686   /// otherwise.
687   Instruction *getUnderlyingInstr() {
688     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
689   }
690   const Instruction *getUnderlyingInstr() const {
691     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
692   }
693 
694   /// Method to support type inquiry through isa, cast, and dyn_cast.
695   static inline bool classof(const VPDef *D) {
696     // All VPDefs are also VPRecipeBases.
697     return true;
698   }
699 
700   static inline bool classof(const VPUser *U) {
701     return U->getVPUserID() == VPUser::VPUserID::Recipe;
702   }
703 
704   /// Returns true if the recipe may have side-effects.
705   bool mayHaveSideEffects() const;
706 
707   /// Returns true for PHI-like recipes.
708   bool isPhi() const {
709     return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
710   }
711 
712   /// Returns true if the recipe may read from memory.
713   bool mayReadFromMemory() const;
714 
715   /// Returns true if the recipe may write to memory.
716   bool mayWriteToMemory() const;
717 
718   /// Returns true if the recipe may read from or write to memory.
719   bool mayReadOrWriteMemory() const {
720     return mayReadFromMemory() || mayWriteToMemory();
721   }
722 };
723 
724 inline bool VPUser::classof(const VPDef *Def) {
725   return Def->getVPDefID() == VPRecipeBase::VPInstructionSC ||
726          Def->getVPDefID() == VPRecipeBase::VPWidenSC ||
727          Def->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
728          Def->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
729          Def->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
730          Def->getVPDefID() == VPRecipeBase::VPBlendSC ||
731          Def->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
732          Def->getVPDefID() == VPRecipeBase::VPReplicateSC ||
733          Def->getVPDefID() == VPRecipeBase::VPReductionSC ||
734          Def->getVPDefID() == VPRecipeBase::VPBranchOnMaskSC ||
735          Def->getVPDefID() == VPRecipeBase::VPWidenMemoryInstructionSC;
736 }
737 
738 /// This is a concrete Recipe that models a single VPlan-level instruction.
739 /// While as any Recipe it may generate a sequence of IR instructions when
740 /// executed, these instructions would always form a single-def expression as
741 /// the VPInstruction is also a single def-use vertex.
742 class VPInstruction : public VPRecipeBase, public VPValue {
743   friend class VPlanSlp;
744 
745 public:
746   /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
747   enum {
748     FirstOrderRecurrenceSplice =
749         Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
750                                       // values of a first-order recurrence.
751     Not,
752     ICmpULE,
753     SLPLoad,
754     SLPStore,
755     ActiveLaneMask,
756     CanonicalIVIncrement,
757     CanonicalIVIncrementNUW,
758     BranchOnCount,
759     BranchOnCond
760   };
761 
762 private:
763   typedef unsigned char OpcodeTy;
764   OpcodeTy Opcode;
765   FastMathFlags FMF;
766   DebugLoc DL;
767 
768   /// Utility method serving execute(): generates a single instance of the
769   /// modeled instruction.
770   void generateInstruction(VPTransformState &State, unsigned Part);
771 
772 protected:
773   void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
774 
775 public:
776   VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL)
777       : VPRecipeBase(VPRecipeBase::VPInstructionSC, Operands),
778         VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode),
779         DL(DL) {}
780 
781   VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
782                 DebugLoc DL = {})
783       : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL) {}
784 
785   /// Method to support type inquiry through isa, cast, and dyn_cast.
786   static inline bool classof(const VPValue *V) {
787     return V->getVPValueID() == VPValue::VPVInstructionSC;
788   }
789 
790   VPInstruction *clone() const {
791     SmallVector<VPValue *, 2> Operands(operands());
792     return new VPInstruction(Opcode, Operands, DL);
793   }
794 
795   /// Method to support type inquiry through isa, cast, and dyn_cast.
796   static inline bool classof(const VPDef *R) {
797     return R->getVPDefID() == VPRecipeBase::VPInstructionSC;
798   }
799 
800   /// Extra classof implementations to allow directly casting from VPUser ->
801   /// VPInstruction.
802   static inline bool classof(const VPUser *U) {
803     auto *R = dyn_cast<VPRecipeBase>(U);
804     return R && R->getVPDefID() == VPRecipeBase::VPInstructionSC;
805   }
806   static inline bool classof(const VPRecipeBase *R) {
807     return R->getVPDefID() == VPRecipeBase::VPInstructionSC;
808   }
809 
810   unsigned getOpcode() const { return Opcode; }
811 
812   /// Generate the instruction.
813   /// TODO: We currently execute only per-part unless a specific instance is
814   /// provided.
815   void execute(VPTransformState &State) override;
816 
817 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
818   /// Print the VPInstruction to \p O.
819   void print(raw_ostream &O, const Twine &Indent,
820              VPSlotTracker &SlotTracker) const override;
821 
822   /// Print the VPInstruction to dbgs() (for debugging).
823   LLVM_DUMP_METHOD void dump() const;
824 #endif
825 
826   /// Return true if this instruction may modify memory.
827   bool mayWriteToMemory() const {
828     // TODO: we can use attributes of the called function to rule out memory
829     //       modifications.
830     return Opcode == Instruction::Store || Opcode == Instruction::Call ||
831            Opcode == Instruction::Invoke || Opcode == SLPStore;
832   }
833 
834   bool hasResult() const {
835     // CallInst may or may not have a result, depending on the called function.
836     // Conservatively return calls have results for now.
837     switch (getOpcode()) {
838     case Instruction::Ret:
839     case Instruction::Br:
840     case Instruction::Store:
841     case Instruction::Switch:
842     case Instruction::IndirectBr:
843     case Instruction::Resume:
844     case Instruction::CatchRet:
845     case Instruction::Unreachable:
846     case Instruction::Fence:
847     case Instruction::AtomicRMW:
848     case VPInstruction::BranchOnCond:
849     case VPInstruction::BranchOnCount:
850       return false;
851     default:
852       return true;
853     }
854   }
855 
856   /// Set the fast-math flags.
857   void setFastMathFlags(FastMathFlags FMFNew);
858 
859   /// Returns true if the recipe only uses the first lane of operand \p Op.
860   bool onlyFirstLaneUsed(const VPValue *Op) const override {
861     assert(is_contained(operands(), Op) &&
862            "Op must be an operand of the recipe");
863     if (getOperand(0) != Op)
864       return false;
865     switch (getOpcode()) {
866     default:
867       return false;
868     case VPInstruction::ActiveLaneMask:
869     case VPInstruction::CanonicalIVIncrement:
870     case VPInstruction::CanonicalIVIncrementNUW:
871     case VPInstruction::BranchOnCount:
872       return true;
873     };
874     llvm_unreachable("switch should return");
875   }
876 };
877 
878 /// VPWidenRecipe is a recipe for producing a copy of vector type its
879 /// ingredient. This recipe covers most of the traditional vectorization cases
880 /// where each ingredient transforms into a vectorized version of itself.
881 class VPWidenRecipe : public VPRecipeBase, public VPValue {
882 public:
883   template <typename IterT>
884   VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
885       : VPRecipeBase(VPRecipeBase::VPWidenSC, Operands),
886         VPValue(VPValue::VPVWidenSC, &I, this) {}
887 
888   ~VPWidenRecipe() override = default;
889 
890   /// Method to support type inquiry through isa, cast, and dyn_cast.
891   static inline bool classof(const VPDef *D) {
892     return D->getVPDefID() == VPRecipeBase::VPWidenSC;
893   }
894   static inline bool classof(const VPValue *V) {
895     return V->getVPValueID() == VPValue::VPVWidenSC;
896   }
897 
898   /// Produce widened copies of all Ingredients.
899   void execute(VPTransformState &State) override;
900 
901 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
902   /// Print the recipe.
903   void print(raw_ostream &O, const Twine &Indent,
904              VPSlotTracker &SlotTracker) const override;
905 #endif
906 };
907 
908 /// A recipe for widening Call instructions.
909 class VPWidenCallRecipe : public VPRecipeBase, public VPValue {
910 
911 public:
912   template <typename IterT>
913   VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments)
914       : VPRecipeBase(VPRecipeBase::VPWidenCallSC, CallArguments),
915         VPValue(VPValue::VPVWidenCallSC, &I, this) {}
916 
917   ~VPWidenCallRecipe() override = default;
918 
919   /// Method to support type inquiry through isa, cast, and dyn_cast.
920   static inline bool classof(const VPDef *D) {
921     return D->getVPDefID() == VPRecipeBase::VPWidenCallSC;
922   }
923 
924   /// Produce a widened version of the call instruction.
925   void execute(VPTransformState &State) override;
926 
927 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
928   /// Print the recipe.
929   void print(raw_ostream &O, const Twine &Indent,
930              VPSlotTracker &SlotTracker) const override;
931 #endif
932 };
933 
934 /// A recipe for widening select instructions.
935 class VPWidenSelectRecipe : public VPRecipeBase, public VPValue {
936 
937   /// Is the condition of the select loop invariant?
938   bool InvariantCond;
939 
940 public:
941   template <typename IterT>
942   VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands,
943                       bool InvariantCond)
944       : VPRecipeBase(VPRecipeBase::VPWidenSelectSC, Operands),
945         VPValue(VPValue::VPVWidenSelectSC, &I, this),
946         InvariantCond(InvariantCond) {}
947 
948   ~VPWidenSelectRecipe() override = default;
949 
950   /// Method to support type inquiry through isa, cast, and dyn_cast.
951   static inline bool classof(const VPDef *D) {
952     return D->getVPDefID() == VPRecipeBase::VPWidenSelectSC;
953   }
954 
955   /// Produce a widened version of the select instruction.
956   void execute(VPTransformState &State) override;
957 
958 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
959   /// Print the recipe.
960   void print(raw_ostream &O, const Twine &Indent,
961              VPSlotTracker &SlotTracker) const override;
962 #endif
963 };
964 
965 /// A recipe for handling GEP instructions.
966 class VPWidenGEPRecipe : public VPRecipeBase, public VPValue {
967   bool IsPtrLoopInvariant;
968   SmallBitVector IsIndexLoopInvariant;
969 
970 public:
971   template <typename IterT>
972   VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands)
973       : VPRecipeBase(VPRecipeBase::VPWidenGEPSC, Operands),
974         VPValue(VPWidenGEPSC, GEP, this),
975         IsIndexLoopInvariant(GEP->getNumIndices(), false) {}
976 
977   template <typename IterT>
978   VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands,
979                    Loop *OrigLoop)
980       : VPRecipeBase(VPRecipeBase::VPWidenGEPSC, Operands),
981         VPValue(VPValue::VPVWidenGEPSC, GEP, this),
982         IsIndexLoopInvariant(GEP->getNumIndices(), false) {
983     IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand());
984     for (auto Index : enumerate(GEP->indices()))
985       IsIndexLoopInvariant[Index.index()] =
986           OrigLoop->isLoopInvariant(Index.value().get());
987   }
988   ~VPWidenGEPRecipe() override = default;
989 
990   /// Method to support type inquiry through isa, cast, and dyn_cast.
991   static inline bool classof(const VPDef *D) {
992     return D->getVPDefID() == VPRecipeBase::VPWidenGEPSC;
993   }
994 
995   /// Generate the gep nodes.
996   void execute(VPTransformState &State) override;
997 
998 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
999   /// Print the recipe.
1000   void print(raw_ostream &O, const Twine &Indent,
1001              VPSlotTracker &SlotTracker) const override;
1002 #endif
1003 };
1004 
1005 /// A recipe for handling phi nodes of integer and floating-point inductions,
1006 /// producing their vector values.
1007 class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue {
1008   PHINode *IV;
1009   const InductionDescriptor &IndDesc;
1010   bool NeedsVectorIV;
1011 
1012 public:
1013   VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1014                                 const InductionDescriptor &IndDesc,
1015                                 bool NeedsVectorIV)
1016       : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start, Step}),
1017         VPValue(IV, this), IV(IV), IndDesc(IndDesc),
1018         NeedsVectorIV(NeedsVectorIV) {}
1019 
1020   VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1021                                 const InductionDescriptor &IndDesc,
1022                                 TruncInst *Trunc, bool NeedsVectorIV)
1023       : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start, Step}),
1024         VPValue(Trunc, this), IV(IV), IndDesc(IndDesc),
1025         NeedsVectorIV(NeedsVectorIV) {}
1026 
1027   ~VPWidenIntOrFpInductionRecipe() override = default;
1028 
1029   /// Method to support type inquiry through isa, cast, and dyn_cast.
1030   static inline bool classof(const VPDef *D) {
1031     return D->getVPDefID() == VPRecipeBase::VPWidenIntOrFpInductionSC;
1032   }
1033 
1034   /// Generate the vectorized and scalarized versions of the phi node as
1035   /// needed by their users.
1036   void execute(VPTransformState &State) override;
1037 
1038 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1039   /// Print the recipe.
1040   void print(raw_ostream &O, const Twine &Indent,
1041              VPSlotTracker &SlotTracker) const override;
1042 #endif
1043 
1044   /// Returns the start value of the induction.
1045   VPValue *getStartValue() { return getOperand(0); }
1046   const VPValue *getStartValue() const { return getOperand(0); }
1047 
1048   /// Returns the step value of the induction.
1049   VPValue *getStepValue() { return getOperand(1); }
1050   const VPValue *getStepValue() const { return getOperand(1); }
1051 
1052   /// Returns the first defined value as TruncInst, if it is one or nullptr
1053   /// otherwise.
1054   TruncInst *getTruncInst() {
1055     return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue());
1056   }
1057   const TruncInst *getTruncInst() const {
1058     return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue());
1059   }
1060 
1061   PHINode *getPHINode() { return IV; }
1062 
1063   /// Returns the induction descriptor for the recipe.
1064   const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1065 
1066   /// Returns true if the induction is canonical, i.e. starting at 0 and
1067   /// incremented by UF * VF (= the original IV is incremented by 1).
1068   bool isCanonical() const;
1069 
1070   /// Returns the scalar type of the induction.
1071   const Type *getScalarType() const {
1072     const TruncInst *TruncI = getTruncInst();
1073     return TruncI ? TruncI->getType() : IV->getType();
1074   }
1075 
1076   /// Returns true if a vector phi needs to be created for the induction.
1077   bool needsVectorIV() const { return NeedsVectorIV; }
1078 };
1079 
1080 /// A pure virtual base class for all recipes modeling header phis, including
1081 /// phis for first order recurrences, pointer inductions and reductions. The
1082 /// start value is the first operand of the recipe and the incoming value from
1083 /// the backedge is the second operand.
1084 class VPHeaderPHIRecipe : public VPRecipeBase, public VPValue {
1085 protected:
1086   VPHeaderPHIRecipe(unsigned char VPVID, unsigned char VPDefID, PHINode *Phi,
1087                     VPValue *Start = nullptr)
1088       : VPRecipeBase(VPDefID, {}), VPValue(VPVID, Phi, this) {
1089     if (Start)
1090       addOperand(Start);
1091   }
1092 
1093 public:
1094   ~VPHeaderPHIRecipe() override = default;
1095 
1096   /// Method to support type inquiry through isa, cast, and dyn_cast.
1097   static inline bool classof(const VPRecipeBase *B) {
1098     return B->getVPDefID() == VPRecipeBase::VPCanonicalIVPHISC ||
1099            B->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC ||
1100            B->getVPDefID() == VPRecipeBase::VPReductionPHISC ||
1101            B->getVPDefID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
1102            B->getVPDefID() == VPRecipeBase::VPWidenPHISC;
1103   }
1104   static inline bool classof(const VPValue *V) {
1105     return V->getVPValueID() == VPValue::VPVCanonicalIVPHISC ||
1106            V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC ||
1107            V->getVPValueID() == VPValue::VPVReductionPHISC ||
1108            V->getVPValueID() == VPValue::VPVWidenIntOrFpInductionSC ||
1109            V->getVPValueID() == VPValue::VPVWidenPHISC;
1110   }
1111 
1112   /// Generate the phi nodes.
1113   void execute(VPTransformState &State) override = 0;
1114 
1115 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1116   /// Print the recipe.
1117   void print(raw_ostream &O, const Twine &Indent,
1118              VPSlotTracker &SlotTracker) const override = 0;
1119 #endif
1120 
1121   /// Returns the start value of the phi, if one is set.
1122   VPValue *getStartValue() {
1123     return getNumOperands() == 0 ? nullptr : getOperand(0);
1124   }
1125   VPValue *getStartValue() const {
1126     return getNumOperands() == 0 ? nullptr : getOperand(0);
1127   }
1128 
1129   /// Returns the incoming value from the loop backedge.
1130   VPValue *getBackedgeValue() {
1131     return getOperand(1);
1132   }
1133 
1134   /// Returns the backedge value as a recipe. The backedge value is guaranteed
1135   /// to be a recipe.
1136   VPRecipeBase *getBackedgeRecipe() {
1137     return cast<VPRecipeBase>(getBackedgeValue()->getDef());
1138   }
1139 };
1140 
1141 class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
1142   const InductionDescriptor &IndDesc;
1143 
1144   /// SCEV used to expand step.
1145   /// FIXME: move expansion of step to the pre-header, once it is modeled
1146   /// explicitly.
1147   ScalarEvolution &SE;
1148 
1149 public:
1150   /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
1151   /// Start.
1152   VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start,
1153                                 const InductionDescriptor &IndDesc,
1154                                 ScalarEvolution &SE)
1155       : VPHeaderPHIRecipe(VPVWidenPointerInductionSC, VPWidenPointerInductionSC,
1156                           Phi),
1157         IndDesc(IndDesc), SE(SE) {
1158     addOperand(Start);
1159   }
1160 
1161   ~VPWidenPointerInductionRecipe() override = default;
1162 
1163   /// Method to support type inquiry through isa, cast, and dyn_cast.
1164   static inline bool classof(const VPRecipeBase *B) {
1165     return B->getVPDefID() == VPRecipeBase::VPWidenPointerInductionSC;
1166   }
1167   static inline bool classof(const VPHeaderPHIRecipe *R) {
1168     return R->getVPDefID() == VPRecipeBase::VPWidenPointerInductionSC;
1169   }
1170   static inline bool classof(const VPValue *V) {
1171     return V->getVPValueID() == VPValue::VPVWidenPointerInductionSC;
1172   }
1173 
1174   /// Generate vector values for the pointer induction.
1175   void execute(VPTransformState &State) override;
1176 
1177   /// Returns true if only scalar values will be generated.
1178   bool onlyScalarsGenerated(ElementCount VF);
1179 
1180 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1181   /// Print the recipe.
1182   void print(raw_ostream &O, const Twine &Indent,
1183              VPSlotTracker &SlotTracker) const override;
1184 #endif
1185 };
1186 
1187 /// A recipe for handling header phis that are widened in the vector loop.
1188 /// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
1189 /// managed in the recipe directly.
1190 class VPWidenPHIRecipe : public VPHeaderPHIRecipe {
1191   /// List of incoming blocks. Only used in the VPlan native path.
1192   SmallVector<VPBasicBlock *, 2> IncomingBlocks;
1193 
1194 public:
1195   /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
1196   VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
1197       : VPHeaderPHIRecipe(VPVWidenPHISC, VPWidenPHISC, Phi) {
1198     if (Start)
1199       addOperand(Start);
1200   }
1201 
1202   ~VPWidenPHIRecipe() override = default;
1203 
1204   /// Method to support type inquiry through isa, cast, and dyn_cast.
1205   static inline bool classof(const VPRecipeBase *B) {
1206     return B->getVPDefID() == VPRecipeBase::VPWidenPHISC;
1207   }
1208   static inline bool classof(const VPHeaderPHIRecipe *R) {
1209     return R->getVPDefID() == VPRecipeBase::VPWidenPHISC;
1210   }
1211   static inline bool classof(const VPValue *V) {
1212     return V->getVPValueID() == VPValue::VPVWidenPHISC;
1213   }
1214 
1215   /// Generate the phi/select nodes.
1216   void execute(VPTransformState &State) override;
1217 
1218 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1219   /// Print the recipe.
1220   void print(raw_ostream &O, const Twine &Indent,
1221              VPSlotTracker &SlotTracker) const override;
1222 #endif
1223 
1224   /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
1225   void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
1226     addOperand(IncomingV);
1227     IncomingBlocks.push_back(IncomingBlock);
1228   }
1229 
1230   /// Returns the \p I th incoming VPBasicBlock.
1231   VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
1232 
1233   /// Returns the \p I th incoming VPValue.
1234   VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
1235 };
1236 
1237 /// A recipe for handling first-order recurrence phis. The start value is the
1238 /// first operand of the recipe and the incoming value from the backedge is the
1239 /// second operand.
1240 struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
1241   VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
1242       : VPHeaderPHIRecipe(VPVFirstOrderRecurrencePHISC,
1243                           VPFirstOrderRecurrencePHISC, Phi, &Start) {}
1244 
1245   /// Method to support type inquiry through isa, cast, and dyn_cast.
1246   static inline bool classof(const VPRecipeBase *R) {
1247     return R->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC;
1248   }
1249   static inline bool classof(const VPHeaderPHIRecipe *R) {
1250     return R->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC;
1251   }
1252   static inline bool classof(const VPValue *V) {
1253     return V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC;
1254   }
1255 
1256   void execute(VPTransformState &State) override;
1257 
1258 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1259   /// Print the recipe.
1260   void print(raw_ostream &O, const Twine &Indent,
1261              VPSlotTracker &SlotTracker) const override;
1262 #endif
1263 };
1264 
1265 /// A recipe for handling reduction phis. The start value is the first operand
1266 /// of the recipe and the incoming value from the backedge is the second
1267 /// operand.
1268 class VPReductionPHIRecipe : public VPHeaderPHIRecipe {
1269   /// Descriptor for the reduction.
1270   const RecurrenceDescriptor &RdxDesc;
1271 
1272   /// The phi is part of an in-loop reduction.
1273   bool IsInLoop;
1274 
1275   /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
1276   bool IsOrdered;
1277 
1278 public:
1279   /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
1280   /// RdxDesc.
1281   VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc,
1282                        VPValue &Start, bool IsInLoop = false,
1283                        bool IsOrdered = false)
1284       : VPHeaderPHIRecipe(VPVReductionPHISC, VPReductionPHISC, Phi, &Start),
1285         RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
1286     assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
1287   }
1288 
1289   ~VPReductionPHIRecipe() override = default;
1290 
1291   /// Method to support type inquiry through isa, cast, and dyn_cast.
1292   static inline bool classof(const VPRecipeBase *R) {
1293     return R->getVPDefID() == VPRecipeBase::VPReductionPHISC;
1294   }
1295   static inline bool classof(const VPHeaderPHIRecipe *R) {
1296     return R->getVPDefID() == VPRecipeBase::VPReductionPHISC;
1297   }
1298   static inline bool classof(const VPValue *V) {
1299     return V->getVPValueID() == VPValue::VPVReductionPHISC;
1300   }
1301 
1302   /// Generate the phi/select nodes.
1303   void execute(VPTransformState &State) override;
1304 
1305 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1306   /// Print the recipe.
1307   void print(raw_ostream &O, const Twine &Indent,
1308              VPSlotTracker &SlotTracker) const override;
1309 #endif
1310 
1311   const RecurrenceDescriptor &getRecurrenceDescriptor() const {
1312     return RdxDesc;
1313   }
1314 
1315   /// Returns true, if the phi is part of an ordered reduction.
1316   bool isOrdered() const { return IsOrdered; }
1317 
1318   /// Returns true, if the phi is part of an in-loop reduction.
1319   bool isInLoop() const { return IsInLoop; }
1320 };
1321 
1322 /// A recipe for vectorizing a phi-node as a sequence of mask-based select
1323 /// instructions.
1324 class VPBlendRecipe : public VPRecipeBase, public VPValue {
1325   PHINode *Phi;
1326 
1327 public:
1328   /// The blend operation is a User of the incoming values and of their
1329   /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value
1330   /// might be incoming with a full mask for which there is no VPValue.
1331   VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
1332       : VPRecipeBase(VPBlendSC, Operands),
1333         VPValue(VPValue::VPVBlendSC, Phi, this), Phi(Phi) {
1334     assert(Operands.size() > 0 &&
1335            ((Operands.size() == 1) || (Operands.size() % 2 == 0)) &&
1336            "Expected either a single incoming value or a positive even number "
1337            "of operands");
1338   }
1339 
1340   /// Method to support type inquiry through isa, cast, and dyn_cast.
1341   static inline bool classof(const VPDef *D) {
1342     return D->getVPDefID() == VPRecipeBase::VPBlendSC;
1343   }
1344 
1345   /// Return the number of incoming values, taking into account that a single
1346   /// incoming value has no mask.
1347   unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
1348 
1349   /// Return incoming value number \p Idx.
1350   VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); }
1351 
1352   /// Return mask number \p Idx.
1353   VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); }
1354 
1355   /// Generate the phi/select nodes.
1356   void execute(VPTransformState &State) override;
1357 
1358 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1359   /// Print the recipe.
1360   void print(raw_ostream &O, const Twine &Indent,
1361              VPSlotTracker &SlotTracker) const override;
1362 #endif
1363 
1364   /// Returns true if the recipe only uses the first lane of operand \p Op.
1365   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1366     assert(is_contained(operands(), Op) &&
1367            "Op must be an operand of the recipe");
1368     // Recursing through Blend recipes only, must terminate at header phi's the
1369     // latest.
1370     return all_of(users(),
1371                   [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
1372   }
1373 };
1374 
1375 /// VPInterleaveRecipe is a recipe for transforming an interleave group of load
1376 /// or stores into one wide load/store and shuffles. The first operand of a
1377 /// VPInterleave recipe is the address, followed by the stored values, followed
1378 /// by an optional mask.
1379 class VPInterleaveRecipe : public VPRecipeBase {
1380   const InterleaveGroup<Instruction> *IG;
1381 
1382   bool HasMask = false;
1383 
1384 public:
1385   VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
1386                      ArrayRef<VPValue *> StoredValues, VPValue *Mask)
1387       : VPRecipeBase(VPInterleaveSC, {Addr}), IG(IG) {
1388     for (unsigned i = 0; i < IG->getFactor(); ++i)
1389       if (Instruction *I = IG->getMember(i)) {
1390         if (I->getType()->isVoidTy())
1391           continue;
1392         new VPValue(I, this);
1393       }
1394 
1395     for (auto *SV : StoredValues)
1396       addOperand(SV);
1397     if (Mask) {
1398       HasMask = true;
1399       addOperand(Mask);
1400     }
1401   }
1402   ~VPInterleaveRecipe() override = default;
1403 
1404   /// Method to support type inquiry through isa, cast, and dyn_cast.
1405   static inline bool classof(const VPDef *D) {
1406     return D->getVPDefID() == VPRecipeBase::VPInterleaveSC;
1407   }
1408 
1409   /// Return the address accessed by this recipe.
1410   VPValue *getAddr() const {
1411     return getOperand(0); // Address is the 1st, mandatory operand.
1412   }
1413 
1414   /// Return the mask used by this recipe. Note that a full mask is represented
1415   /// by a nullptr.
1416   VPValue *getMask() const {
1417     // Mask is optional and therefore the last, currently 2nd operand.
1418     return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
1419   }
1420 
1421   /// Return the VPValues stored by this interleave group. If it is a load
1422   /// interleave group, return an empty ArrayRef.
1423   ArrayRef<VPValue *> getStoredValues() const {
1424     // The first operand is the address, followed by the stored values, followed
1425     // by an optional mask.
1426     return ArrayRef<VPValue *>(op_begin(), getNumOperands())
1427         .slice(1, getNumStoreOperands());
1428   }
1429 
1430   /// Generate the wide load or store, and shuffles.
1431   void execute(VPTransformState &State) override;
1432 
1433 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1434   /// Print the recipe.
1435   void print(raw_ostream &O, const Twine &Indent,
1436              VPSlotTracker &SlotTracker) const override;
1437 #endif
1438 
1439   const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
1440 
1441   /// Returns the number of stored operands of this interleave group. Returns 0
1442   /// for load interleave groups.
1443   unsigned getNumStoreOperands() const {
1444     return getNumOperands() - (HasMask ? 2 : 1);
1445   }
1446 
1447   /// The recipe only uses the first lane of the address.
1448   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1449     assert(is_contained(operands(), Op) &&
1450            "Op must be an operand of the recipe");
1451     return Op == getAddr() && all_of(getStoredValues(), [Op](VPValue *StoredV) {
1452              return Op != StoredV;
1453            });
1454   }
1455 };
1456 
1457 /// A recipe to represent inloop reduction operations, performing a reduction on
1458 /// a vector operand into a scalar value, and adding the result to a chain.
1459 /// The Operands are {ChainOp, VecOp, [Condition]}.
1460 class VPReductionRecipe : public VPRecipeBase, public VPValue {
1461   /// The recurrence decriptor for the reduction in question.
1462   const RecurrenceDescriptor *RdxDesc;
1463   /// Pointer to the TTI, needed to create the target reduction
1464   const TargetTransformInfo *TTI;
1465 
1466 public:
1467   VPReductionRecipe(const RecurrenceDescriptor *R, Instruction *I,
1468                     VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
1469                     const TargetTransformInfo *TTI)
1470       : VPRecipeBase(VPRecipeBase::VPReductionSC, {ChainOp, VecOp}),
1471         VPValue(VPValue::VPVReductionSC, I, this), RdxDesc(R), TTI(TTI) {
1472     if (CondOp)
1473       addOperand(CondOp);
1474   }
1475 
1476   ~VPReductionRecipe() override = default;
1477 
1478   /// Method to support type inquiry through isa, cast, and dyn_cast.
1479   static inline bool classof(const VPValue *V) {
1480     return V->getVPValueID() == VPValue::VPVReductionSC;
1481   }
1482 
1483   /// Generate the reduction in the loop
1484   void execute(VPTransformState &State) override;
1485 
1486 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1487   /// Print the recipe.
1488   void print(raw_ostream &O, const Twine &Indent,
1489              VPSlotTracker &SlotTracker) const override;
1490 #endif
1491 
1492   /// The VPValue of the scalar Chain being accumulated.
1493   VPValue *getChainOp() const { return getOperand(0); }
1494   /// The VPValue of the vector value to be reduced.
1495   VPValue *getVecOp() const { return getOperand(1); }
1496   /// The VPValue of the condition for the block.
1497   VPValue *getCondOp() const {
1498     return getNumOperands() > 2 ? getOperand(2) : nullptr;
1499   }
1500 };
1501 
1502 /// VPReplicateRecipe replicates a given instruction producing multiple scalar
1503 /// copies of the original scalar type, one per lane, instead of producing a
1504 /// single copy of widened type for all lanes. If the instruction is known to be
1505 /// uniform only one copy, per lane zero, will be generated.
1506 class VPReplicateRecipe : public VPRecipeBase, public VPValue {
1507   /// Indicator if only a single replica per lane is needed.
1508   bool IsUniform;
1509 
1510   /// Indicator if the replicas are also predicated.
1511   bool IsPredicated;
1512 
1513   /// Indicator if the scalar values should also be packed into a vector.
1514   bool AlsoPack;
1515 
1516 public:
1517   template <typename IterT>
1518   VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
1519                     bool IsUniform, bool IsPredicated = false)
1520       : VPRecipeBase(VPReplicateSC, Operands), VPValue(VPVReplicateSC, I, this),
1521         IsUniform(IsUniform), IsPredicated(IsPredicated) {
1522     // Retain the previous behavior of predicateInstructions(), where an
1523     // insert-element of a predicated instruction got hoisted into the
1524     // predicated basic block iff it was its only user. This is achieved by
1525     // having predicated instructions also pack their values into a vector by
1526     // default unless they have a replicated user which uses their scalar value.
1527     AlsoPack = IsPredicated && !I->use_empty();
1528   }
1529 
1530   ~VPReplicateRecipe() override = default;
1531 
1532   /// Method to support type inquiry through isa, cast, and dyn_cast.
1533   static inline bool classof(const VPDef *D) {
1534     return D->getVPDefID() == VPRecipeBase::VPReplicateSC;
1535   }
1536 
1537   static inline bool classof(const VPValue *V) {
1538     return V->getVPValueID() == VPValue::VPVReplicateSC;
1539   }
1540 
1541   /// Generate replicas of the desired Ingredient. Replicas will be generated
1542   /// for all parts and lanes unless a specific part and lane are specified in
1543   /// the \p State.
1544   void execute(VPTransformState &State) override;
1545 
1546   void setAlsoPack(bool Pack) { AlsoPack = Pack; }
1547 
1548 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1549   /// Print the recipe.
1550   void print(raw_ostream &O, const Twine &Indent,
1551              VPSlotTracker &SlotTracker) const override;
1552 #endif
1553 
1554   bool isUniform() const { return IsUniform; }
1555 
1556   bool isPacked() const { return AlsoPack; }
1557 
1558   bool isPredicated() const { return IsPredicated; }
1559 
1560   /// Returns true if the recipe only uses the first lane of operand \p Op.
1561   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1562     assert(is_contained(operands(), Op) &&
1563            "Op must be an operand of the recipe");
1564     return isUniform();
1565   }
1566 
1567   /// Returns true if the recipe uses scalars of operand \p Op.
1568   bool usesScalars(const VPValue *Op) const override {
1569     assert(is_contained(operands(), Op) &&
1570            "Op must be an operand of the recipe");
1571     return true;
1572   }
1573 };
1574 
1575 /// A recipe for generating conditional branches on the bits of a mask.
1576 class VPBranchOnMaskRecipe : public VPRecipeBase {
1577 public:
1578   VPBranchOnMaskRecipe(VPValue *BlockInMask)
1579       : VPRecipeBase(VPBranchOnMaskSC, {}) {
1580     if (BlockInMask) // nullptr means all-one mask.
1581       addOperand(BlockInMask);
1582   }
1583 
1584   /// Method to support type inquiry through isa, cast, and dyn_cast.
1585   static inline bool classof(const VPDef *D) {
1586     return D->getVPDefID() == VPRecipeBase::VPBranchOnMaskSC;
1587   }
1588 
1589   /// Generate the extraction of the appropriate bit from the block mask and the
1590   /// conditional branch.
1591   void execute(VPTransformState &State) override;
1592 
1593 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1594   /// Print the recipe.
1595   void print(raw_ostream &O, const Twine &Indent,
1596              VPSlotTracker &SlotTracker) const override {
1597     O << Indent << "BRANCH-ON-MASK ";
1598     if (VPValue *Mask = getMask())
1599       Mask->printAsOperand(O, SlotTracker);
1600     else
1601       O << " All-One";
1602   }
1603 #endif
1604 
1605   /// Return the mask used by this recipe. Note that a full mask is represented
1606   /// by a nullptr.
1607   VPValue *getMask() const {
1608     assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
1609     // Mask is optional.
1610     return getNumOperands() == 1 ? getOperand(0) : nullptr;
1611   }
1612 
1613   /// Returns true if the recipe uses scalars of operand \p Op.
1614   bool usesScalars(const VPValue *Op) const override {
1615     assert(is_contained(operands(), Op) &&
1616            "Op must be an operand of the recipe");
1617     return true;
1618   }
1619 };
1620 
1621 /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
1622 /// control converges back from a Branch-on-Mask. The phi nodes are needed in
1623 /// order to merge values that are set under such a branch and feed their uses.
1624 /// The phi nodes can be scalar or vector depending on the users of the value.
1625 /// This recipe works in concert with VPBranchOnMaskRecipe.
1626 class VPPredInstPHIRecipe : public VPRecipeBase, public VPValue {
1627 public:
1628   /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
1629   /// nodes after merging back from a Branch-on-Mask.
1630   VPPredInstPHIRecipe(VPValue *PredV)
1631       : VPRecipeBase(VPPredInstPHISC, PredV),
1632         VPValue(VPValue::VPVPredInstPHI, nullptr, this) {}
1633   ~VPPredInstPHIRecipe() override = default;
1634 
1635   /// Method to support type inquiry through isa, cast, and dyn_cast.
1636   static inline bool classof(const VPDef *D) {
1637     return D->getVPDefID() == VPRecipeBase::VPPredInstPHISC;
1638   }
1639 
1640   /// Generates phi nodes for live-outs as needed to retain SSA form.
1641   void execute(VPTransformState &State) override;
1642 
1643 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1644   /// Print the recipe.
1645   void print(raw_ostream &O, const Twine &Indent,
1646              VPSlotTracker &SlotTracker) const override;
1647 #endif
1648 
1649   /// Returns true if the recipe uses scalars of operand \p Op.
1650   bool usesScalars(const VPValue *Op) const override {
1651     assert(is_contained(operands(), Op) &&
1652            "Op must be an operand of the recipe");
1653     return true;
1654   }
1655 };
1656 
1657 /// A Recipe for widening load/store operations.
1658 /// The recipe uses the following VPValues:
1659 /// - For load: Address, optional mask
1660 /// - For store: Address, stored value, optional mask
1661 /// TODO: We currently execute only per-part unless a specific instance is
1662 /// provided.
1663 class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
1664   Instruction &Ingredient;
1665 
1666   // Whether the loaded-from / stored-to addresses are consecutive.
1667   bool Consecutive;
1668 
1669   // Whether the consecutive loaded/stored addresses are in reverse order.
1670   bool Reverse;
1671 
1672   void setMask(VPValue *Mask) {
1673     if (!Mask)
1674       return;
1675     addOperand(Mask);
1676   }
1677 
1678   bool isMasked() const {
1679     return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
1680   }
1681 
1682 public:
1683   VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
1684                                  bool Consecutive, bool Reverse)
1685       : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), Ingredient(Load),
1686         Consecutive(Consecutive), Reverse(Reverse) {
1687     assert((Consecutive || !Reverse) && "Reverse implies consecutive");
1688     new VPValue(VPValue::VPVMemoryInstructionSC, &Load, this);
1689     setMask(Mask);
1690   }
1691 
1692   VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
1693                                  VPValue *StoredValue, VPValue *Mask,
1694                                  bool Consecutive, bool Reverse)
1695       : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr, StoredValue}),
1696         Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {
1697     assert((Consecutive || !Reverse) && "Reverse implies consecutive");
1698     setMask(Mask);
1699   }
1700 
1701   /// Method to support type inquiry through isa, cast, and dyn_cast.
1702   static inline bool classof(const VPDef *D) {
1703     return D->getVPDefID() == VPRecipeBase::VPWidenMemoryInstructionSC;
1704   }
1705 
1706   /// Return the address accessed by this recipe.
1707   VPValue *getAddr() const {
1708     return getOperand(0); // Address is the 1st, mandatory operand.
1709   }
1710 
1711   /// Return the mask used by this recipe. Note that a full mask is represented
1712   /// by a nullptr.
1713   VPValue *getMask() const {
1714     // Mask is optional and therefore the last operand.
1715     return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1716   }
1717 
1718   /// Returns true if this recipe is a store.
1719   bool isStore() const { return isa<StoreInst>(Ingredient); }
1720 
1721   /// Return the address accessed by this recipe.
1722   VPValue *getStoredValue() const {
1723     assert(isStore() && "Stored value only available for store instructions");
1724     return getOperand(1); // Stored value is the 2nd, mandatory operand.
1725   }
1726 
1727   // Return whether the loaded-from / stored-to addresses are consecutive.
1728   bool isConsecutive() const { return Consecutive; }
1729 
1730   // Return whether the consecutive loaded/stored addresses are in reverse
1731   // order.
1732   bool isReverse() const { return Reverse; }
1733 
1734   /// Generate the wide load/store.
1735   void execute(VPTransformState &State) override;
1736 
1737 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1738   /// Print the recipe.
1739   void print(raw_ostream &O, const Twine &Indent,
1740              VPSlotTracker &SlotTracker) const override;
1741 #endif
1742 
1743   /// Returns true if the recipe only uses the first lane of operand \p Op.
1744   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1745     assert(is_contained(operands(), Op) &&
1746            "Op must be an operand of the recipe");
1747 
1748     // Widened, consecutive memory operations only demand the first lane of
1749     // their address, unless the same operand is also stored. That latter can
1750     // happen with opaque pointers.
1751     return Op == getAddr() && isConsecutive() &&
1752            (!isStore() || Op != getStoredValue());
1753   }
1754 
1755   Instruction &getIngredient() const { return Ingredient; }
1756 };
1757 
1758 /// Recipe to expand a SCEV expression.
1759 class VPExpandSCEVRecipe : public VPRecipeBase, public VPValue {
1760   const SCEV *Expr;
1761   ScalarEvolution &SE;
1762 
1763 public:
1764   VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
1765       : VPRecipeBase(VPExpandSCEVSC, {}), VPValue(nullptr, this), Expr(Expr),
1766         SE(SE) {}
1767 
1768   ~VPExpandSCEVRecipe() override = default;
1769 
1770   /// Method to support type inquiry through isa, cast, and dyn_cast.
1771   static inline bool classof(const VPDef *D) {
1772     return D->getVPDefID() == VPExpandSCEVSC;
1773   }
1774 
1775   /// Generate a canonical vector induction variable of the vector loop, with
1776   void execute(VPTransformState &State) override;
1777 
1778 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1779   /// Print the recipe.
1780   void print(raw_ostream &O, const Twine &Indent,
1781              VPSlotTracker &SlotTracker) const override;
1782 #endif
1783 
1784   const SCEV *getSCEV() const { return Expr; }
1785 };
1786 
1787 /// Canonical scalar induction phi of the vector loop. Starting at the specified
1788 /// start value (either 0 or the resume value when vectorizing the epilogue
1789 /// loop). VPWidenCanonicalIVRecipe represents the vector version of the
1790 /// canonical induction variable.
1791 class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
1792   DebugLoc DL;
1793 
1794 public:
1795   VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
1796       : VPHeaderPHIRecipe(VPValue::VPVCanonicalIVPHISC, VPCanonicalIVPHISC,
1797                           nullptr, StartV),
1798         DL(DL) {}
1799 
1800   ~VPCanonicalIVPHIRecipe() override = default;
1801 
1802   /// Method to support type inquiry through isa, cast, and dyn_cast.
1803   static inline bool classof(const VPDef *D) {
1804     return D->getVPDefID() == VPCanonicalIVPHISC;
1805   }
1806   static inline bool classof(const VPHeaderPHIRecipe *D) {
1807     return D->getVPDefID() == VPCanonicalIVPHISC;
1808   }
1809   static inline bool classof(const VPValue *V) {
1810     return V->getVPValueID() == VPValue::VPVCanonicalIVPHISC;
1811   }
1812 
1813   /// Generate the canonical scalar induction phi of the vector loop.
1814   void execute(VPTransformState &State) override;
1815 
1816 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1817   /// Print the recipe.
1818   void print(raw_ostream &O, const Twine &Indent,
1819              VPSlotTracker &SlotTracker) const override;
1820 #endif
1821 
1822   /// Returns the scalar type of the induction.
1823   const Type *getScalarType() const {
1824     return getOperand(0)->getLiveInIRValue()->getType();
1825   }
1826 
1827   /// Returns true if the recipe only uses the first lane of operand \p Op.
1828   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1829     assert(is_contained(operands(), Op) &&
1830            "Op must be an operand of the recipe");
1831     return true;
1832   }
1833 };
1834 
1835 /// A Recipe for widening the canonical induction variable of the vector loop.
1836 class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue {
1837 public:
1838   VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
1839       : VPRecipeBase(VPWidenCanonicalIVSC, {CanonicalIV}),
1840         VPValue(VPValue::VPVWidenCanonicalIVSC, nullptr, this) {}
1841 
1842   ~VPWidenCanonicalIVRecipe() override = default;
1843 
1844   /// Method to support type inquiry through isa, cast, and dyn_cast.
1845   static inline bool classof(const VPDef *D) {
1846     return D->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC;
1847   }
1848 
1849   /// Extra classof implementations to allow directly casting from VPUser ->
1850   /// VPWidenCanonicalIVRecipe.
1851   static inline bool classof(const VPUser *U) {
1852     auto *R = dyn_cast<VPRecipeBase>(U);
1853     return R && R->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC;
1854   }
1855   static inline bool classof(const VPRecipeBase *R) {
1856     return R->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC;
1857   }
1858 
1859   /// Generate a canonical vector induction variable of the vector loop, with
1860   /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
1861   /// step = <VF*UF, VF*UF, ..., VF*UF>.
1862   void execute(VPTransformState &State) override;
1863 
1864 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1865   /// Print the recipe.
1866   void print(raw_ostream &O, const Twine &Indent,
1867              VPSlotTracker &SlotTracker) const override;
1868 #endif
1869 
1870   /// Returns the scalar type of the induction.
1871   const Type *getScalarType() const {
1872     return cast<VPCanonicalIVPHIRecipe>(getOperand(0)->getDef())
1873         ->getScalarType();
1874   }
1875 };
1876 
1877 /// A recipe for handling phi nodes of integer and floating-point inductions,
1878 /// producing their scalar values.
1879 class VPScalarIVStepsRecipe : public VPRecipeBase, public VPValue {
1880   /// Scalar type to use for the generated values.
1881   Type *Ty;
1882   /// If not nullptr, truncate the generated values to TruncToTy.
1883   Type *TruncToTy;
1884   const InductionDescriptor &IndDesc;
1885 
1886 public:
1887   VPScalarIVStepsRecipe(Type *Ty, const InductionDescriptor &IndDesc,
1888                         VPValue *CanonicalIV, VPValue *Start, VPValue *Step,
1889                         Type *TruncToTy)
1890       : VPRecipeBase(VPScalarIVStepsSC, {CanonicalIV, Start, Step}),
1891         VPValue(nullptr, this), Ty(Ty), TruncToTy(TruncToTy), IndDesc(IndDesc) {
1892   }
1893 
1894   ~VPScalarIVStepsRecipe() override = default;
1895 
1896   /// Method to support type inquiry through isa, cast, and dyn_cast.
1897   static inline bool classof(const VPDef *D) {
1898     return D->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC;
1899   }
1900   /// Extra classof implementations to allow directly casting from VPUser ->
1901   /// VPScalarIVStepsRecipe.
1902   static inline bool classof(const VPUser *U) {
1903     auto *R = dyn_cast<VPRecipeBase>(U);
1904     return R && R->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC;
1905   }
1906   static inline bool classof(const VPRecipeBase *R) {
1907     return R->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC;
1908   }
1909 
1910   /// Generate the scalarized versions of the phi node as needed by their users.
1911   void execute(VPTransformState &State) override;
1912 
1913 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1914   /// Print the recipe.
1915   void print(raw_ostream &O, const Twine &Indent,
1916              VPSlotTracker &SlotTracker) const override;
1917 #endif
1918 
1919   /// Returns true if the induction is canonical, i.e. starting at 0 and
1920   /// incremented by UF * VF (= the original IV is incremented by 1).
1921   bool isCanonical() const;
1922 
1923   VPCanonicalIVPHIRecipe *getCanonicalIV() const;
1924   VPValue *getStartValue() const { return getOperand(1); }
1925   VPValue *getStepValue() const { return getOperand(2); }
1926 
1927   /// Returns true if the recipe only uses the first lane of operand \p Op.
1928   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1929     assert(is_contained(operands(), Op) &&
1930            "Op must be an operand of the recipe");
1931     return true;
1932   }
1933 };
1934 
1935 /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
1936 /// holds a sequence of zero or more VPRecipe's each representing a sequence of
1937 /// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
1938 class VPBasicBlock : public VPBlockBase {
1939 public:
1940   using RecipeListTy = iplist<VPRecipeBase>;
1941 
1942 private:
1943   /// The VPRecipes held in the order of output instructions to generate.
1944   RecipeListTy Recipes;
1945 
1946 public:
1947   VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
1948       : VPBlockBase(VPBasicBlockSC, Name.str()) {
1949     if (Recipe)
1950       appendRecipe(Recipe);
1951   }
1952 
1953   ~VPBasicBlock() override {
1954     while (!Recipes.empty())
1955       Recipes.pop_back();
1956   }
1957 
1958   /// Instruction iterators...
1959   using iterator = RecipeListTy::iterator;
1960   using const_iterator = RecipeListTy::const_iterator;
1961   using reverse_iterator = RecipeListTy::reverse_iterator;
1962   using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
1963 
1964   //===--------------------------------------------------------------------===//
1965   /// Recipe iterator methods
1966   ///
1967   inline iterator begin() { return Recipes.begin(); }
1968   inline const_iterator begin() const { return Recipes.begin(); }
1969   inline iterator end() { return Recipes.end(); }
1970   inline const_iterator end() const { return Recipes.end(); }
1971 
1972   inline reverse_iterator rbegin() { return Recipes.rbegin(); }
1973   inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
1974   inline reverse_iterator rend() { return Recipes.rend(); }
1975   inline const_reverse_iterator rend() const { return Recipes.rend(); }
1976 
1977   inline size_t size() const { return Recipes.size(); }
1978   inline bool empty() const { return Recipes.empty(); }
1979   inline const VPRecipeBase &front() const { return Recipes.front(); }
1980   inline VPRecipeBase &front() { return Recipes.front(); }
1981   inline const VPRecipeBase &back() const { return Recipes.back(); }
1982   inline VPRecipeBase &back() { return Recipes.back(); }
1983 
1984   /// Returns a reference to the list of recipes.
1985   RecipeListTy &getRecipeList() { return Recipes; }
1986 
1987   /// Returns a pointer to a member of the recipe list.
1988   static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
1989     return &VPBasicBlock::Recipes;
1990   }
1991 
1992   /// Method to support type inquiry through isa, cast, and dyn_cast.
1993   static inline bool classof(const VPBlockBase *V) {
1994     return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC;
1995   }
1996 
1997   void insert(VPRecipeBase *Recipe, iterator InsertPt) {
1998     assert(Recipe && "No recipe to append.");
1999     assert(!Recipe->Parent && "Recipe already in VPlan");
2000     Recipe->Parent = this;
2001     Recipes.insert(InsertPt, Recipe);
2002   }
2003 
2004   /// Augment the existing recipes of a VPBasicBlock with an additional
2005   /// \p Recipe as the last recipe.
2006   void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
2007 
2008   /// The method which generates the output IR instructions that correspond to
2009   /// this VPBasicBlock, thereby "executing" the VPlan.
2010   void execute(struct VPTransformState *State) override;
2011 
2012   /// Return the position of the first non-phi node recipe in the block.
2013   iterator getFirstNonPhi();
2014 
2015   /// Returns an iterator range over the PHI-like recipes in the block.
2016   iterator_range<iterator> phis() {
2017     return make_range(begin(), getFirstNonPhi());
2018   }
2019 
2020   void dropAllReferences(VPValue *NewValue) override;
2021 
2022   /// Split current block at \p SplitAt by inserting a new block between the
2023   /// current block and its successors and moving all recipes starting at
2024   /// SplitAt to the new block. Returns the new block.
2025   VPBasicBlock *splitAt(iterator SplitAt);
2026 
2027   VPRegionBlock *getEnclosingLoopRegion();
2028 
2029 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2030   /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
2031   /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
2032   ///
2033   /// Note that the numbering is applied to the whole VPlan, so printing
2034   /// individual blocks is consistent with the whole VPlan printing.
2035   void print(raw_ostream &O, const Twine &Indent,
2036              VPSlotTracker &SlotTracker) const override;
2037   using VPBlockBase::print; // Get the print(raw_stream &O) version.
2038 #endif
2039 
2040   /// If the block has multiple successors, return the branch recipe terminating
2041   /// the block. If there are no or only a single successor, return nullptr;
2042   VPRecipeBase *getTerminator();
2043   const VPRecipeBase *getTerminator() const;
2044 
2045   /// Returns true if the block is exiting it's parent region.
2046   bool isExiting() const;
2047 
2048 private:
2049   /// Create an IR BasicBlock to hold the output instructions generated by this
2050   /// VPBasicBlock, and return it. Update the CFGState accordingly.
2051   BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
2052 };
2053 
2054 /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
2055 /// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
2056 /// A VPRegionBlock may indicate that its contents are to be replicated several
2057 /// times. This is designed to support predicated scalarization, in which a
2058 /// scalar if-then code structure needs to be generated VF * UF times. Having
2059 /// this replication indicator helps to keep a single model for multiple
2060 /// candidate VF's. The actual replication takes place only once the desired VF
2061 /// and UF have been determined.
2062 class VPRegionBlock : public VPBlockBase {
2063   /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
2064   VPBlockBase *Entry;
2065 
2066   /// Hold the Single Exiting block of the SESE region modelled by the
2067   /// VPRegionBlock.
2068   VPBlockBase *Exiting;
2069 
2070   /// An indicator whether this region is to generate multiple replicated
2071   /// instances of output IR corresponding to its VPBlockBases.
2072   bool IsReplicator;
2073 
2074 public:
2075   VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
2076                 const std::string &Name = "", bool IsReplicator = false)
2077       : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
2078         IsReplicator(IsReplicator) {
2079     assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
2080     assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
2081     Entry->setParent(this);
2082     Exiting->setParent(this);
2083   }
2084   VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
2085       : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
2086         IsReplicator(IsReplicator) {}
2087 
2088   ~VPRegionBlock() override {
2089     if (Entry) {
2090       VPValue DummyValue;
2091       Entry->dropAllReferences(&DummyValue);
2092       deleteCFG(Entry);
2093     }
2094   }
2095 
2096   /// Method to support type inquiry through isa, cast, and dyn_cast.
2097   static inline bool classof(const VPBlockBase *V) {
2098     return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
2099   }
2100 
2101   const VPBlockBase *getEntry() const { return Entry; }
2102   VPBlockBase *getEntry() { return Entry; }
2103 
2104   /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
2105   /// EntryBlock must have no predecessors.
2106   void setEntry(VPBlockBase *EntryBlock) {
2107     assert(EntryBlock->getPredecessors().empty() &&
2108            "Entry block cannot have predecessors.");
2109     Entry = EntryBlock;
2110     EntryBlock->setParent(this);
2111   }
2112 
2113   // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a
2114   // specific interface of llvm::Function, instead of using
2115   // GraphTraints::getEntryNode. We should add a new template parameter to
2116   // DominatorTreeBase representing the Graph type.
2117   VPBlockBase &front() const { return *Entry; }
2118 
2119   const VPBlockBase *getExiting() const { return Exiting; }
2120   VPBlockBase *getExiting() { return Exiting; }
2121 
2122   /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
2123   /// ExitingBlock must have no successors.
2124   void setExiting(VPBlockBase *ExitingBlock) {
2125     assert(ExitingBlock->getSuccessors().empty() &&
2126            "Exit block cannot have successors.");
2127     Exiting = ExitingBlock;
2128     ExitingBlock->setParent(this);
2129   }
2130 
2131   /// Returns the pre-header VPBasicBlock of the loop region.
2132   VPBasicBlock *getPreheaderVPBB() {
2133     assert(!isReplicator() && "should only get pre-header of loop regions");
2134     return getSinglePredecessor()->getExitingBasicBlock();
2135   }
2136 
2137   /// An indicator whether this region is to generate multiple replicated
2138   /// instances of output IR corresponding to its VPBlockBases.
2139   bool isReplicator() const { return IsReplicator; }
2140 
2141   /// The method which generates the output IR instructions that correspond to
2142   /// this VPRegionBlock, thereby "executing" the VPlan.
2143   void execute(struct VPTransformState *State) override;
2144 
2145   void dropAllReferences(VPValue *NewValue) override;
2146 
2147 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2148   /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
2149   /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
2150   /// consequtive numbers.
2151   ///
2152   /// Note that the numbering is applied to the whole VPlan, so printing
2153   /// individual regions is consistent with the whole VPlan printing.
2154   void print(raw_ostream &O, const Twine &Indent,
2155              VPSlotTracker &SlotTracker) const override;
2156   using VPBlockBase::print; // Get the print(raw_stream &O) version.
2157 #endif
2158 };
2159 
2160 //===----------------------------------------------------------------------===//
2161 // GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs     //
2162 //===----------------------------------------------------------------------===//
2163 
2164 // The following set of template specializations implement GraphTraits to treat
2165 // any VPBlockBase as a node in a graph of VPBlockBases. It's important to note
2166 // that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the
2167 // VPBlockBase is a VPRegionBlock, this specialization provides access to its
2168 // successors/predecessors but not to the blocks inside the region.
2169 
2170 template <> struct GraphTraits<VPBlockBase *> {
2171   using NodeRef = VPBlockBase *;
2172   using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
2173 
2174   static NodeRef getEntryNode(NodeRef N) { return N; }
2175 
2176   static inline ChildIteratorType child_begin(NodeRef N) {
2177     return N->getSuccessors().begin();
2178   }
2179 
2180   static inline ChildIteratorType child_end(NodeRef N) {
2181     return N->getSuccessors().end();
2182   }
2183 };
2184 
2185 template <> struct GraphTraits<const VPBlockBase *> {
2186   using NodeRef = const VPBlockBase *;
2187   using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::const_iterator;
2188 
2189   static NodeRef getEntryNode(NodeRef N) { return N; }
2190 
2191   static inline ChildIteratorType child_begin(NodeRef N) {
2192     return N->getSuccessors().begin();
2193   }
2194 
2195   static inline ChildIteratorType child_end(NodeRef N) {
2196     return N->getSuccessors().end();
2197   }
2198 };
2199 
2200 // Inverse order specialization for VPBasicBlocks. Predecessors are used instead
2201 // of successors for the inverse traversal.
2202 template <> struct GraphTraits<Inverse<VPBlockBase *>> {
2203   using NodeRef = VPBlockBase *;
2204   using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
2205 
2206   static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; }
2207 
2208   static inline ChildIteratorType child_begin(NodeRef N) {
2209     return N->getPredecessors().begin();
2210   }
2211 
2212   static inline ChildIteratorType child_end(NodeRef N) {
2213     return N->getPredecessors().end();
2214   }
2215 };
2216 
2217 // The following set of template specializations implement GraphTraits to
2218 // treat VPRegionBlock as a graph and recurse inside its nodes. It's important
2219 // to note that the blocks inside the VPRegionBlock are treated as VPBlockBases
2220 // (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so
2221 // there won't be automatic recursion into other VPBlockBases that turn to be
2222 // VPRegionBlocks.
2223 
2224 template <>
2225 struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> {
2226   using GraphRef = VPRegionBlock *;
2227   using nodes_iterator = df_iterator<NodeRef>;
2228 
2229   static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
2230 
2231   static nodes_iterator nodes_begin(GraphRef N) {
2232     return nodes_iterator::begin(N->getEntry());
2233   }
2234 
2235   static nodes_iterator nodes_end(GraphRef N) {
2236     // df_iterator::end() returns an empty iterator so the node used doesn't
2237     // matter.
2238     return nodes_iterator::end(N);
2239   }
2240 };
2241 
2242 template <>
2243 struct GraphTraits<const VPRegionBlock *>
2244     : public GraphTraits<const VPBlockBase *> {
2245   using GraphRef = const VPRegionBlock *;
2246   using nodes_iterator = df_iterator<NodeRef>;
2247 
2248   static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
2249 
2250   static nodes_iterator nodes_begin(GraphRef N) {
2251     return nodes_iterator::begin(N->getEntry());
2252   }
2253 
2254   static nodes_iterator nodes_end(GraphRef N) {
2255     // df_iterator::end() returns an empty iterator so the node used doesn't
2256     // matter.
2257     return nodes_iterator::end(N);
2258   }
2259 };
2260 
2261 template <>
2262 struct GraphTraits<Inverse<VPRegionBlock *>>
2263     : public GraphTraits<Inverse<VPBlockBase *>> {
2264   using GraphRef = VPRegionBlock *;
2265   using nodes_iterator = df_iterator<NodeRef>;
2266 
2267   static NodeRef getEntryNode(Inverse<GraphRef> N) {
2268     return N.Graph->getExiting();
2269   }
2270 
2271   static nodes_iterator nodes_begin(GraphRef N) {
2272     return nodes_iterator::begin(N->getExiting());
2273   }
2274 
2275   static nodes_iterator nodes_end(GraphRef N) {
2276     // df_iterator::end() returns an empty iterator so the node used doesn't
2277     // matter.
2278     return nodes_iterator::end(N);
2279   }
2280 };
2281 
2282 /// Iterator to traverse all successors of a VPBlockBase node. This includes the
2283 /// entry node of VPRegionBlocks. Exit blocks of a region implicitly have their
2284 /// parent region's successors. This ensures all blocks in a region are visited
2285 /// before any blocks in a successor region when doing a reverse post-order
2286 // traversal of the graph.
2287 template <typename BlockPtrTy>
2288 class VPAllSuccessorsIterator
2289     : public iterator_facade_base<VPAllSuccessorsIterator<BlockPtrTy>,
2290                                   std::forward_iterator_tag, VPBlockBase> {
2291   BlockPtrTy Block;
2292   /// Index of the current successor. For VPBasicBlock nodes, this simply is the
2293   /// index for the successor array. For VPRegionBlock, SuccessorIdx == 0 is
2294   /// used for the region's entry block, and SuccessorIdx - 1 are the indices
2295   /// for the successor array.
2296   size_t SuccessorIdx;
2297 
2298   static BlockPtrTy getBlockWithSuccs(BlockPtrTy Current) {
2299     while (Current && Current->getNumSuccessors() == 0)
2300       Current = Current->getParent();
2301     return Current;
2302   }
2303 
2304   /// Templated helper to dereference successor \p SuccIdx of \p Block. Used by
2305   /// both the const and non-const operator* implementations.
2306   template <typename T1> static T1 deref(T1 Block, unsigned SuccIdx) {
2307     if (auto *R = dyn_cast<VPRegionBlock>(Block)) {
2308       if (SuccIdx == 0)
2309         return R->getEntry();
2310       SuccIdx--;
2311     }
2312 
2313     // For exit blocks, use the next parent region with successors.
2314     return getBlockWithSuccs(Block)->getSuccessors()[SuccIdx];
2315   }
2316 
2317 public:
2318   VPAllSuccessorsIterator(BlockPtrTy Block, size_t Idx = 0)
2319       : Block(Block), SuccessorIdx(Idx) {}
2320   VPAllSuccessorsIterator(const VPAllSuccessorsIterator &Other)
2321       : Block(Other.Block), SuccessorIdx(Other.SuccessorIdx) {}
2322 
2323   VPAllSuccessorsIterator &operator=(const VPAllSuccessorsIterator &R) {
2324     Block = R.Block;
2325     SuccessorIdx = R.SuccessorIdx;
2326     return *this;
2327   }
2328 
2329   static VPAllSuccessorsIterator end(BlockPtrTy Block) {
2330     BlockPtrTy ParentWithSuccs = getBlockWithSuccs(Block);
2331     unsigned NumSuccessors = ParentWithSuccs
2332                                  ? ParentWithSuccs->getNumSuccessors()
2333                                  : Block->getNumSuccessors();
2334 
2335     if (auto *R = dyn_cast<VPRegionBlock>(Block))
2336       return {R, NumSuccessors + 1};
2337     return {Block, NumSuccessors};
2338   }
2339 
2340   bool operator==(const VPAllSuccessorsIterator &R) const {
2341     return Block == R.Block && SuccessorIdx == R.SuccessorIdx;
2342   }
2343 
2344   const VPBlockBase *operator*() const { return deref(Block, SuccessorIdx); }
2345 
2346   BlockPtrTy operator*() { return deref(Block, SuccessorIdx); }
2347 
2348   VPAllSuccessorsIterator &operator++() {
2349     SuccessorIdx++;
2350     return *this;
2351   }
2352 
2353   VPAllSuccessorsIterator operator++(int X) {
2354     VPAllSuccessorsIterator Orig = *this;
2355     SuccessorIdx++;
2356     return Orig;
2357   }
2358 };
2359 
2360 /// Helper for GraphTraits specialization that traverses through VPRegionBlocks.
2361 template <typename BlockTy> class VPBlockRecursiveTraversalWrapper {
2362   BlockTy Entry;
2363 
2364 public:
2365   VPBlockRecursiveTraversalWrapper(BlockTy Entry) : Entry(Entry) {}
2366   BlockTy getEntry() { return Entry; }
2367 };
2368 
2369 /// GraphTraits specialization to recursively traverse VPBlockBase nodes,
2370 /// including traversing through VPRegionBlocks.  Exit blocks of a region
2371 /// implicitly have their parent region's successors. This ensures all blocks in
2372 /// a region are visited before any blocks in a successor region when doing a
2373 /// reverse post-order traversal of the graph.
2374 template <>
2375 struct GraphTraits<VPBlockRecursiveTraversalWrapper<VPBlockBase *>> {
2376   using NodeRef = VPBlockBase *;
2377   using ChildIteratorType = VPAllSuccessorsIterator<VPBlockBase *>;
2378 
2379   static NodeRef
2380   getEntryNode(VPBlockRecursiveTraversalWrapper<VPBlockBase *> N) {
2381     return N.getEntry();
2382   }
2383 
2384   static inline ChildIteratorType child_begin(NodeRef N) {
2385     return ChildIteratorType(N);
2386   }
2387 
2388   static inline ChildIteratorType child_end(NodeRef N) {
2389     return ChildIteratorType::end(N);
2390   }
2391 };
2392 
2393 template <>
2394 struct GraphTraits<VPBlockRecursiveTraversalWrapper<const VPBlockBase *>> {
2395   using NodeRef = const VPBlockBase *;
2396   using ChildIteratorType = VPAllSuccessorsIterator<const VPBlockBase *>;
2397 
2398   static NodeRef
2399   getEntryNode(VPBlockRecursiveTraversalWrapper<const VPBlockBase *> N) {
2400     return N.getEntry();
2401   }
2402 
2403   static inline ChildIteratorType child_begin(NodeRef N) {
2404     return ChildIteratorType(N);
2405   }
2406 
2407   static inline ChildIteratorType child_end(NodeRef N) {
2408     return ChildIteratorType::end(N);
2409   }
2410 };
2411 
2412 /// VPlan models a candidate for vectorization, encoding various decisions take
2413 /// to produce efficient output IR, including which branches, basic-blocks and
2414 /// output IR instructions to generate, and their cost. VPlan holds a
2415 /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
2416 /// VPBlock.
2417 class VPlan {
2418   friend class VPlanPrinter;
2419   friend class VPSlotTracker;
2420 
2421   /// Hold the single entry to the Hierarchical CFG of the VPlan.
2422   VPBlockBase *Entry;
2423 
2424   /// Holds the VFs applicable to this VPlan.
2425   SmallSetVector<ElementCount, 2> VFs;
2426 
2427   /// Holds the name of the VPlan, for printing.
2428   std::string Name;
2429 
2430   /// Holds all the external definitions created for this VPlan. External
2431   /// definitions must be immutable and hold a pointer to their underlying IR.
2432   DenseMap<Value *, VPValue *> VPExternalDefs;
2433 
2434   /// Represents the trip count of the original loop, for folding
2435   /// the tail.
2436   VPValue *TripCount = nullptr;
2437 
2438   /// Represents the backedge taken count of the original loop, for folding
2439   /// the tail. It equals TripCount - 1.
2440   VPValue *BackedgeTakenCount = nullptr;
2441 
2442   /// Represents the vector trip count.
2443   VPValue VectorTripCount;
2444 
2445   /// Holds a mapping between Values and their corresponding VPValue inside
2446   /// VPlan.
2447   Value2VPValueTy Value2VPValue;
2448 
2449   /// Contains all VPValues that been allocated by addVPValue directly and need
2450   /// to be free when the plan's destructor is called.
2451   SmallVector<VPValue *, 16> VPValuesToFree;
2452 
2453   /// Indicates whether it is safe use the Value2VPValue mapping or if the
2454   /// mapping cannot be used any longer, because it is stale.
2455   bool Value2VPValueEnabled = true;
2456 
2457   /// Values used outside the plan.
2458   MapVector<PHINode *, VPLiveOut *> LiveOuts;
2459 
2460 public:
2461   VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {
2462     if (Entry)
2463       Entry->setPlan(this);
2464   }
2465 
2466   ~VPlan() {
2467     clearLiveOuts();
2468 
2469     if (Entry) {
2470       VPValue DummyValue;
2471       for (VPBlockBase *Block : depth_first(Entry))
2472         Block->dropAllReferences(&DummyValue);
2473 
2474       VPBlockBase::deleteCFG(Entry);
2475     }
2476     for (VPValue *VPV : VPValuesToFree)
2477       delete VPV;
2478     if (TripCount)
2479       delete TripCount;
2480     if (BackedgeTakenCount)
2481       delete BackedgeTakenCount;
2482     for (auto &P : VPExternalDefs)
2483       delete P.second;
2484   }
2485 
2486   /// Prepare the plan for execution, setting up the required live-in values.
2487   void prepareToExecute(Value *TripCount, Value *VectorTripCount,
2488                         Value *CanonicalIVStartValue, VPTransformState &State);
2489 
2490   /// Generate the IR code for this VPlan.
2491   void execute(struct VPTransformState *State);
2492 
2493   VPBlockBase *getEntry() { return Entry; }
2494   const VPBlockBase *getEntry() const { return Entry; }
2495 
2496   VPBlockBase *setEntry(VPBlockBase *Block) {
2497     Entry = Block;
2498     Block->setPlan(this);
2499     return Entry;
2500   }
2501 
2502   /// The trip count of the original loop.
2503   VPValue *getOrCreateTripCount() {
2504     if (!TripCount)
2505       TripCount = new VPValue();
2506     return TripCount;
2507   }
2508 
2509   /// The backedge taken count of the original loop.
2510   VPValue *getOrCreateBackedgeTakenCount() {
2511     if (!BackedgeTakenCount)
2512       BackedgeTakenCount = new VPValue();
2513     return BackedgeTakenCount;
2514   }
2515 
2516   /// The vector trip count.
2517   VPValue &getVectorTripCount() { return VectorTripCount; }
2518 
2519   /// Mark the plan to indicate that using Value2VPValue is not safe any
2520   /// longer, because it may be stale.
2521   void disableValue2VPValue() { Value2VPValueEnabled = false; }
2522 
2523   void addVF(ElementCount VF) { VFs.insert(VF); }
2524 
2525   bool hasVF(ElementCount VF) { return VFs.count(VF); }
2526 
2527   const std::string &getName() const { return Name; }
2528 
2529   void setName(const Twine &newName) { Name = newName.str(); }
2530 
2531   /// Get the existing or add a new external definition for \p V.
2532   VPValue *getOrAddExternalDef(Value *V) {
2533     auto I = VPExternalDefs.insert({V, nullptr});
2534     if (I.second)
2535       I.first->second = new VPValue(V);
2536     return I.first->second;
2537   }
2538 
2539   void addVPValue(Value *V) {
2540     assert(Value2VPValueEnabled &&
2541            "IR value to VPValue mapping may be out of date!");
2542     assert(V && "Trying to add a null Value to VPlan");
2543     assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
2544     VPValue *VPV = new VPValue(V);
2545     Value2VPValue[V] = VPV;
2546     VPValuesToFree.push_back(VPV);
2547   }
2548 
2549   void addVPValue(Value *V, VPValue *VPV) {
2550     assert(Value2VPValueEnabled && "Value2VPValue mapping may be out of date!");
2551     assert(V && "Trying to add a null Value to VPlan");
2552     assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
2553     Value2VPValue[V] = VPV;
2554   }
2555 
2556   /// Returns the VPValue for \p V. \p OverrideAllowed can be used to disable
2557   /// checking whether it is safe to query VPValues using IR Values.
2558   VPValue *getVPValue(Value *V, bool OverrideAllowed = false) {
2559     assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) &&
2560            "Value2VPValue mapping may be out of date!");
2561     assert(V && "Trying to get the VPValue of a null Value");
2562     assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
2563     return Value2VPValue[V];
2564   }
2565 
2566   /// Gets the VPValue or adds a new one (if none exists yet) for \p V. \p
2567   /// OverrideAllowed can be used to disable checking whether it is safe to
2568   /// query VPValues using IR Values.
2569   VPValue *getOrAddVPValue(Value *V, bool OverrideAllowed = false) {
2570     assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) &&
2571            "Value2VPValue mapping may be out of date!");
2572     assert(V && "Trying to get or add the VPValue of a null Value");
2573     if (!Value2VPValue.count(V))
2574       addVPValue(V);
2575     return getVPValue(V);
2576   }
2577 
2578   void removeVPValueFor(Value *V) {
2579     assert(Value2VPValueEnabled &&
2580            "IR value to VPValue mapping may be out of date!");
2581     Value2VPValue.erase(V);
2582   }
2583 
2584 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2585   /// Print this VPlan to \p O.
2586   void print(raw_ostream &O) const;
2587 
2588   /// Print this VPlan in DOT format to \p O.
2589   void printDOT(raw_ostream &O) const;
2590 
2591   /// Dump the plan to stderr (for debugging).
2592   LLVM_DUMP_METHOD void dump() const;
2593 #endif
2594 
2595   /// Returns a range mapping the values the range \p Operands to their
2596   /// corresponding VPValues.
2597   iterator_range<mapped_iterator<Use *, std::function<VPValue *(Value *)>>>
2598   mapToVPValues(User::op_range Operands) {
2599     std::function<VPValue *(Value *)> Fn = [this](Value *Op) {
2600       return getOrAddVPValue(Op);
2601     };
2602     return map_range(Operands, Fn);
2603   }
2604 
2605   /// Returns true if \p VPV is uniform after vectorization.
2606   bool isUniformAfterVectorization(VPValue *VPV) const {
2607     auto RepR = dyn_cast_or_null<VPReplicateRecipe>(VPV->getDef());
2608     return !VPV->getDef() || (RepR && RepR->isUniform());
2609   }
2610 
2611   /// Returns the VPRegionBlock of the vector loop.
2612   VPRegionBlock *getVectorLoopRegion() {
2613     return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
2614   }
2615   const VPRegionBlock *getVectorLoopRegion() const {
2616     return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
2617   }
2618 
2619   /// Returns the canonical induction recipe of the vector loop.
2620   VPCanonicalIVPHIRecipe *getCanonicalIV() {
2621     VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock();
2622     if (EntryVPBB->empty()) {
2623       // VPlan native path.
2624       EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
2625     }
2626     return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
2627   }
2628 
2629   void addLiveOut(PHINode *PN, VPValue *V);
2630 
2631   void clearLiveOuts() {
2632     for (auto &KV : LiveOuts)
2633       delete KV.second;
2634     LiveOuts.clear();
2635   }
2636 
2637   void removeLiveOut(PHINode *PN) {
2638     delete LiveOuts[PN];
2639     LiveOuts.erase(PN);
2640   }
2641 
2642   const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
2643     return LiveOuts;
2644   }
2645 
2646 private:
2647   /// Add to the given dominator tree the header block and every new basic block
2648   /// that was created between it and the latch block, inclusive.
2649   static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB,
2650                                   BasicBlock *LoopPreHeaderBB,
2651                                   BasicBlock *LoopExitBB);
2652 };
2653 
2654 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2655 /// VPlanPrinter prints a given VPlan to a given output stream. The printing is
2656 /// indented and follows the dot format.
2657 class VPlanPrinter {
2658   raw_ostream &OS;
2659   const VPlan &Plan;
2660   unsigned Depth = 0;
2661   unsigned TabWidth = 2;
2662   std::string Indent;
2663   unsigned BID = 0;
2664   SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
2665 
2666   VPSlotTracker SlotTracker;
2667 
2668   /// Handle indentation.
2669   void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
2670 
2671   /// Print a given \p Block of the Plan.
2672   void dumpBlock(const VPBlockBase *Block);
2673 
2674   /// Print the information related to the CFG edges going out of a given
2675   /// \p Block, followed by printing the successor blocks themselves.
2676   void dumpEdges(const VPBlockBase *Block);
2677 
2678   /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
2679   /// its successor blocks.
2680   void dumpBasicBlock(const VPBasicBlock *BasicBlock);
2681 
2682   /// Print a given \p Region of the Plan.
2683   void dumpRegion(const VPRegionBlock *Region);
2684 
2685   unsigned getOrCreateBID(const VPBlockBase *Block) {
2686     return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
2687   }
2688 
2689   Twine getOrCreateName(const VPBlockBase *Block);
2690 
2691   Twine getUID(const VPBlockBase *Block);
2692 
2693   /// Print the information related to a CFG edge between two VPBlockBases.
2694   void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
2695                 const Twine &Label);
2696 
2697 public:
2698   VPlanPrinter(raw_ostream &O, const VPlan &P)
2699       : OS(O), Plan(P), SlotTracker(&P) {}
2700 
2701   LLVM_DUMP_METHOD void dump();
2702 };
2703 
2704 struct VPlanIngredient {
2705   const Value *V;
2706 
2707   VPlanIngredient(const Value *V) : V(V) {}
2708 
2709   void print(raw_ostream &O) const;
2710 };
2711 
2712 inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
2713   I.print(OS);
2714   return OS;
2715 }
2716 
2717 inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
2718   Plan.print(OS);
2719   return OS;
2720 }
2721 #endif
2722 
2723 //===----------------------------------------------------------------------===//
2724 // VPlan Utilities
2725 //===----------------------------------------------------------------------===//
2726 
2727 /// Class that provides utilities for VPBlockBases in VPlan.
2728 class VPBlockUtils {
2729 public:
2730   VPBlockUtils() = delete;
2731 
2732   /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
2733   /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
2734   /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
2735   /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
2736   /// have neither successors nor predecessors.
2737   static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
2738     assert(NewBlock->getSuccessors().empty() &&
2739            NewBlock->getPredecessors().empty() &&
2740            "Can't insert new block with predecessors or successors.");
2741     NewBlock->setParent(BlockPtr->getParent());
2742     SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
2743     for (VPBlockBase *Succ : Succs) {
2744       disconnectBlocks(BlockPtr, Succ);
2745       connectBlocks(NewBlock, Succ);
2746     }
2747     connectBlocks(BlockPtr, NewBlock);
2748   }
2749 
2750   /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
2751   /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
2752   /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
2753   /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
2754   /// and \p IfTrue and \p IfFalse must have neither successors nor
2755   /// predecessors.
2756   static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
2757                                    VPBlockBase *BlockPtr) {
2758     assert(IfTrue->getSuccessors().empty() &&
2759            "Can't insert IfTrue with successors.");
2760     assert(IfFalse->getSuccessors().empty() &&
2761            "Can't insert IfFalse with successors.");
2762     BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
2763     IfTrue->setPredecessors({BlockPtr});
2764     IfFalse->setPredecessors({BlockPtr});
2765     IfTrue->setParent(BlockPtr->getParent());
2766     IfFalse->setParent(BlockPtr->getParent());
2767   }
2768 
2769   /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
2770   /// the successors of \p From and \p From to the predecessors of \p To. Both
2771   /// VPBlockBases must have the same parent, which can be null. Both
2772   /// VPBlockBases can be already connected to other VPBlockBases.
2773   static void connectBlocks(VPBlockBase *From, VPBlockBase *To) {
2774     assert((From->getParent() == To->getParent()) &&
2775            "Can't connect two block with different parents");
2776     assert(From->getNumSuccessors() < 2 &&
2777            "Blocks can't have more than two successors.");
2778     From->appendSuccessor(To);
2779     To->appendPredecessor(From);
2780   }
2781 
2782   /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
2783   /// from the successors of \p From and \p From from the predecessors of \p To.
2784   static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) {
2785     assert(To && "Successor to disconnect is null.");
2786     From->removeSuccessor(To);
2787     To->removePredecessor(From);
2788   }
2789 
2790   /// Try to merge \p Block into its single predecessor, if \p Block is a
2791   /// VPBasicBlock and its predecessor has a single successor. Returns a pointer
2792   /// to the predecessor \p Block was merged into or nullptr otherwise.
2793   static VPBasicBlock *tryToMergeBlockIntoPredecessor(VPBlockBase *Block) {
2794     auto *VPBB = dyn_cast<VPBasicBlock>(Block);
2795     auto *PredVPBB =
2796         dyn_cast_or_null<VPBasicBlock>(Block->getSinglePredecessor());
2797     if (!VPBB || !PredVPBB || PredVPBB->getNumSuccessors() != 1)
2798       return nullptr;
2799 
2800     for (VPRecipeBase &R : make_early_inc_range(*VPBB))
2801       R.moveBefore(*PredVPBB, PredVPBB->end());
2802     VPBlockUtils::disconnectBlocks(PredVPBB, VPBB);
2803     auto *ParentRegion = cast<VPRegionBlock>(Block->getParent());
2804     if (ParentRegion->getExiting() == Block)
2805       ParentRegion->setExiting(PredVPBB);
2806     SmallVector<VPBlockBase *> Successors(Block->successors());
2807     for (auto *Succ : Successors) {
2808       VPBlockUtils::disconnectBlocks(Block, Succ);
2809       VPBlockUtils::connectBlocks(PredVPBB, Succ);
2810     }
2811     delete Block;
2812     return PredVPBB;
2813   }
2814 
2815   /// Return an iterator range over \p Range which only includes \p BlockTy
2816   /// blocks. The accesses are casted to \p BlockTy.
2817   template <typename BlockTy, typename T>
2818   static auto blocksOnly(const T &Range) {
2819     // Create BaseTy with correct const-ness based on BlockTy.
2820     using BaseTy =
2821         typename std::conditional<std::is_const<BlockTy>::value,
2822                                   const VPBlockBase, VPBlockBase>::type;
2823 
2824     // We need to first create an iterator range over (const) BlocktTy & instead
2825     // of (const) BlockTy * for filter_range to work properly.
2826     auto Mapped =
2827         map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
2828     auto Filter = make_filter_range(
2829         Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
2830     return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
2831       return cast<BlockTy>(&Block);
2832     });
2833   }
2834 };
2835 
2836 class VPInterleavedAccessInfo {
2837   DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
2838       InterleaveGroupMap;
2839 
2840   /// Type for mapping of instruction based interleave groups to VPInstruction
2841   /// interleave groups
2842   using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
2843                              InterleaveGroup<VPInstruction> *>;
2844 
2845   /// Recursively \p Region and populate VPlan based interleave groups based on
2846   /// \p IAI.
2847   void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
2848                    InterleavedAccessInfo &IAI);
2849   /// Recursively traverse \p Block and populate VPlan based interleave groups
2850   /// based on \p IAI.
2851   void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
2852                   InterleavedAccessInfo &IAI);
2853 
2854 public:
2855   VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
2856 
2857   ~VPInterleavedAccessInfo() {
2858     SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
2859     // Avoid releasing a pointer twice.
2860     for (auto &I : InterleaveGroupMap)
2861       DelSet.insert(I.second);
2862     for (auto *Ptr : DelSet)
2863       delete Ptr;
2864   }
2865 
2866   /// Get the interleave group that \p Instr belongs to.
2867   ///
2868   /// \returns nullptr if doesn't have such group.
2869   InterleaveGroup<VPInstruction> *
2870   getInterleaveGroup(VPInstruction *Instr) const {
2871     return InterleaveGroupMap.lookup(Instr);
2872   }
2873 };
2874 
2875 /// Class that maps (parts of) an existing VPlan to trees of combined
2876 /// VPInstructions.
2877 class VPlanSlp {
2878   enum class OpMode { Failed, Load, Opcode };
2879 
2880   /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
2881   /// DenseMap keys.
2882   struct BundleDenseMapInfo {
2883     static SmallVector<VPValue *, 4> getEmptyKey() {
2884       return {reinterpret_cast<VPValue *>(-1)};
2885     }
2886 
2887     static SmallVector<VPValue *, 4> getTombstoneKey() {
2888       return {reinterpret_cast<VPValue *>(-2)};
2889     }
2890 
2891     static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
2892       return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
2893     }
2894 
2895     static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
2896                         const SmallVector<VPValue *, 4> &RHS) {
2897       return LHS == RHS;
2898     }
2899   };
2900 
2901   /// Mapping of values in the original VPlan to a combined VPInstruction.
2902   DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
2903       BundleToCombined;
2904 
2905   VPInterleavedAccessInfo &IAI;
2906 
2907   /// Basic block to operate on. For now, only instructions in a single BB are
2908   /// considered.
2909   const VPBasicBlock &BB;
2910 
2911   /// Indicates whether we managed to combine all visited instructions or not.
2912   bool CompletelySLP = true;
2913 
2914   /// Width of the widest combined bundle in bits.
2915   unsigned WidestBundleBits = 0;
2916 
2917   using MultiNodeOpTy =
2918       typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
2919 
2920   // Input operand bundles for the current multi node. Each multi node operand
2921   // bundle contains values not matching the multi node's opcode. They will
2922   // be reordered in reorderMultiNodeOps, once we completed building a
2923   // multi node.
2924   SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
2925 
2926   /// Indicates whether we are building a multi node currently.
2927   bool MultiNodeActive = false;
2928 
2929   /// Check if we can vectorize Operands together.
2930   bool areVectorizable(ArrayRef<VPValue *> Operands) const;
2931 
2932   /// Add combined instruction \p New for the bundle \p Operands.
2933   void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
2934 
2935   /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
2936   VPInstruction *markFailed();
2937 
2938   /// Reorder operands in the multi node to maximize sequential memory access
2939   /// and commutative operations.
2940   SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
2941 
2942   /// Choose the best candidate to use for the lane after \p Last. The set of
2943   /// candidates to choose from are values with an opcode matching \p Last's
2944   /// or loads consecutive to \p Last.
2945   std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
2946                                        SmallPtrSetImpl<VPValue *> &Candidates,
2947                                        VPInterleavedAccessInfo &IAI);
2948 
2949 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2950   /// Print bundle \p Values to dbgs().
2951   void dumpBundle(ArrayRef<VPValue *> Values);
2952 #endif
2953 
2954 public:
2955   VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
2956 
2957   ~VPlanSlp() = default;
2958 
2959   /// Tries to build an SLP tree rooted at \p Operands and returns a
2960   /// VPInstruction combining \p Operands, if they can be combined.
2961   VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
2962 
2963   /// Return the width of the widest combined bundle in bits.
2964   unsigned getWidestBundleBits() const { return WidestBundleBits; }
2965 
2966   /// Return true if all visited instruction can be combined.
2967   bool isCompletelySLP() const { return CompletelySLP; }
2968 };
2969 
2970 namespace vputils {
2971 
2972 /// Returns true if only the first lane of \p Def is used.
2973 bool onlyFirstLaneUsed(VPValue *Def);
2974 
2975 /// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
2976 /// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
2977 /// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
2978 /// pre-header already contains a recipe expanding \p Expr, return it. If not,
2979 /// create a new one.
2980 VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
2981                                        ScalarEvolution &SE);
2982 } // end namespace vputils
2983 
2984 } // end namespace llvm
2985 
2986 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
2987