1 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This file defines the LoopVectorizationLegality class. Original code
12 /// in Loop Vectorizer has been moved out to its own file for modularity
13 /// and reusability.
14 ///
15 /// Currently, it works for innermost loop vectorization. Extending this to
16 /// outer loop vectorization is a TODO item.
17 ///
18 /// Also provides:
19 /// 1) LoopVectorizeHints class which keeps a number of loop annotations
20 /// locally for easy look up. It has the ability to write them back as
21 /// loop metadata, upon request.
22 /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
23 /// of reporting useful failure to vectorize message.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
28 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
29 
30 #include "llvm/ADT/MapVector.h"
31 #include "llvm/Analysis/LoopAccessAnalysis.h"
32 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
33 #include "llvm/Transforms/Utils/LoopUtils.h"
34 
35 namespace llvm {
36 
37 /// Create an analysis remark that explains why vectorization failed
38 ///
39 /// \p PassName is the name of the pass (e.g. can be AlwaysPrint).  \p
40 /// RemarkName is the identifier for the remark.  If \p I is passed it is an
41 /// instruction that prevents vectorization.  Otherwise \p TheLoop is used for
42 /// the location of the remark.  \return the remark object that can be
43 /// streamed to.
44 OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
45                                                   StringRef RemarkName,
46                                                   Loop *TheLoop,
47                                                   Instruction *I = nullptr);
48 
49 /// Utility class for getting and setting loop vectorizer hints in the form
50 /// of loop metadata.
51 /// This class keeps a number of loop annotations locally (as member variables)
52 /// and can, upon request, write them back as metadata on the loop. It will
53 /// initially scan the loop for existing metadata, and will update the local
54 /// values based on information in the loop.
55 /// We cannot write all values to metadata, as the mere presence of some info,
56 /// for example 'force', means a decision has been made. So, we need to be
57 /// careful NOT to add them if the user hasn't specifically asked so.
58 class LoopVectorizeHints {
59   enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED };
60 
61   /// Hint - associates name and validation with the hint value.
62   struct Hint {
63     const char *Name;
64     unsigned Value; // This may have to change for non-numeric values.
65     HintKind Kind;
66 
HintHint67     Hint(const char *Name, unsigned Value, HintKind Kind)
68         : Name(Name), Value(Value), Kind(Kind) {}
69 
70     bool validate(unsigned Val);
71   };
72 
73   /// Vectorization width.
74   Hint Width;
75 
76   /// Vectorization interleave factor.
77   Hint Interleave;
78 
79   /// Vectorization forced
80   Hint Force;
81 
82   /// Already Vectorized
83   Hint IsVectorized;
84 
85   /// Return the loop metadata prefix.
Prefix()86   static StringRef Prefix() { return "llvm.loop."; }
87 
88   /// True if there is any unsafe math in the loop.
89   bool PotentiallyUnsafe = false;
90 
91 public:
92   enum ForceKind {
93     FK_Undefined = -1, ///< Not selected.
94     FK_Disabled = 0,   ///< Forcing disabled.
95     FK_Enabled = 1,    ///< Forcing enabled.
96   };
97 
98   LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
99                      OptimizationRemarkEmitter &ORE);
100 
101   /// Mark the loop L as already vectorized by setting the width to 1.
setAlreadyVectorized()102   void setAlreadyVectorized() {
103     IsVectorized.Value = 1;
104     Hint Hints[] = {IsVectorized};
105     writeHintsToMetadata(Hints);
106   }
107 
108   bool allowVectorization(Function *F, Loop *L,
109                           bool VectorizeOnlyWhenForced) const;
110 
111   /// Dumps all the hint information.
112   void emitRemarkWithHints() const;
113 
getWidth()114   unsigned getWidth() const { return Width.Value; }
getInterleave()115   unsigned getInterleave() const { return Interleave.Value; }
getIsVectorized()116   unsigned getIsVectorized() const { return IsVectorized.Value; }
getForce()117   enum ForceKind getForce() const {
118     if ((ForceKind)Force.Value == FK_Undefined &&
119         hasDisableAllTransformsHint(TheLoop))
120       return FK_Disabled;
121     return (ForceKind)Force.Value;
122   }
123 
124   /// If hints are provided that force vectorization, use the AlwaysPrint
125   /// pass name to force the frontend to print the diagnostic.
126   const char *vectorizeAnalysisPassName() const;
127 
allowReordering()128   bool allowReordering() const {
129     // When enabling loop hints are provided we allow the vectorizer to change
130     // the order of operations that is given by the scalar loop. This is not
131     // enabled by default because can be unsafe or inefficient. For example,
132     // reordering floating-point operations will change the way round-off
133     // error accumulates in the loop.
134     return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
135   }
136 
isPotentiallyUnsafe()137   bool isPotentiallyUnsafe() const {
138     // Avoid FP vectorization if the target is unsure about proper support.
139     // This may be related to the SIMD unit in the target not handling
140     // IEEE 754 FP ops properly, or bad single-to-double promotions.
141     // Otherwise, a sequence of vectorized loops, even without reduction,
142     // could lead to different end results on the destination vectors.
143     return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
144   }
145 
setPotentiallyUnsafe()146   void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
147 
148 private:
149   /// Find hints specified in the loop metadata and update local values.
150   void getHintsFromMetadata();
151 
152   /// Checks string hint with one operand and set value if valid.
153   void setHint(StringRef Name, Metadata *Arg);
154 
155   /// Create a new hint from name / value pair.
156   MDNode *createHintMetadata(StringRef Name, unsigned V) const;
157 
158   /// Matches metadata with hint name.
159   bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes);
160 
161   /// Sets current hints into loop metadata, keeping other values intact.
162   void writeHintsToMetadata(ArrayRef<Hint> HintTypes);
163 
164   /// The loop these hints belong to.
165   const Loop *TheLoop;
166 
167   /// Interface to emit optimization remarks.
168   OptimizationRemarkEmitter &ORE;
169 };
170 
171 /// This holds vectorization requirements that must be verified late in
172 /// the process. The requirements are set by legalize and costmodel. Once
173 /// vectorization has been determined to be possible and profitable the
174 /// requirements can be verified by looking for metadata or compiler options.
175 /// For example, some loops require FP commutativity which is only allowed if
176 /// vectorization is explicitly specified or if the fast-math compiler option
177 /// has been provided.
178 /// Late evaluation of these requirements allows helpful diagnostics to be
179 /// composed that tells the user what need to be done to vectorize the loop. For
180 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
181 /// evaluation should be used only when diagnostics can generated that can be
182 /// followed by a non-expert user.
183 class LoopVectorizationRequirements {
184 public:
LoopVectorizationRequirements(OptimizationRemarkEmitter & ORE)185   LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {}
186 
addUnsafeAlgebraInst(Instruction * I)187   void addUnsafeAlgebraInst(Instruction *I) {
188     // First unsafe algebra instruction.
189     if (!UnsafeAlgebraInst)
190       UnsafeAlgebraInst = I;
191   }
192 
addRuntimePointerChecks(unsigned Num)193   void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
194 
195   bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints);
196 
197 private:
198   unsigned NumRuntimePointerChecks = 0;
199   Instruction *UnsafeAlgebraInst = nullptr;
200 
201   /// Interface to emit optimization remarks.
202   OptimizationRemarkEmitter &ORE;
203 };
204 
205 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
206 /// to what vectorization factor.
207 /// This class does not look at the profitability of vectorization, only the
208 /// legality. This class has two main kinds of checks:
209 /// * Memory checks - The code in canVectorizeMemory checks if vectorization
210 ///   will change the order of memory accesses in a way that will change the
211 ///   correctness of the program.
212 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
213 /// checks for a number of different conditions, such as the availability of a
214 /// single induction variable, that all types are supported and vectorize-able,
215 /// etc. This code reflects the capabilities of InnerLoopVectorizer.
216 /// This class is also used by InnerLoopVectorizer for identifying
217 /// induction variable and the different reduction variables.
218 class LoopVectorizationLegality {
219 public:
LoopVectorizationLegality(Loop * L,PredicatedScalarEvolution & PSE,DominatorTree * DT,TargetLibraryInfo * TLI,AliasAnalysis * AA,Function * F,std::function<const LoopAccessInfo & (Loop &)> * GetLAA,LoopInfo * LI,OptimizationRemarkEmitter * ORE,LoopVectorizationRequirements * R,LoopVectorizeHints * H,DemandedBits * DB,AssumptionCache * AC)220   LoopVectorizationLegality(
221       Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
222       TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F,
223       std::function<const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI,
224       OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R,
225       LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC)
226       : TheLoop(L), LI(LI), PSE(PSE), TLI(TLI), DT(DT), GetLAA(GetLAA),
227         ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
228 
229   /// ReductionList contains the reduction descriptors for all
230   /// of the reductions that were found in the loop.
231   using ReductionList = DenseMap<PHINode *, RecurrenceDescriptor>;
232 
233   /// InductionList saves induction variables and maps them to the
234   /// induction descriptor.
235   using InductionList = MapVector<PHINode *, InductionDescriptor>;
236 
237   /// RecurrenceSet contains the phi nodes that are recurrences other than
238   /// inductions and reductions.
239   using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
240 
241   /// Returns true if it is legal to vectorize this loop.
242   /// This does not mean that it is profitable to vectorize this
243   /// loop, only that it is legal to do so.
244   /// Temporarily taking UseVPlanNativePath parameter. If true, take
245   /// the new code path being implemented for outer loop vectorization
246   /// (should be functional for inner loop vectorization) based on VPlan.
247   /// If false, good old LV code.
248   bool canVectorize(bool UseVPlanNativePath);
249 
250   /// Return true if we can vectorize this loop while folding its tail by
251   /// masking.
252   bool canFoldTailByMasking();
253 
254   /// Returns the primary induction variable.
getPrimaryInduction()255   PHINode *getPrimaryInduction() { return PrimaryInduction; }
256 
257   /// Returns the reduction variables found in the loop.
getReductionVars()258   ReductionList *getReductionVars() { return &Reductions; }
259 
260   /// Returns the induction variables found in the loop.
getInductionVars()261   InductionList *getInductionVars() { return &Inductions; }
262 
263   /// Return the first-order recurrences found in the loop.
getFirstOrderRecurrences()264   RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; }
265 
266   /// Return the set of instructions to sink to handle first-order recurrences.
getSinkAfter()267   DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
268 
269   /// Returns the widest induction type.
getWidestInductionType()270   Type *getWidestInductionType() { return WidestIndTy; }
271 
272   /// Returns True if V is a Phi node of an induction variable in this loop.
273   bool isInductionPhi(const Value *V);
274 
275   /// Returns True if V is a cast that is part of an induction def-use chain,
276   /// and had been proven to be redundant under a runtime guard (in other
277   /// words, the cast has the same SCEV expression as the induction phi).
278   bool isCastedInductionVariable(const Value *V);
279 
280   /// Returns True if V can be considered as an induction variable in this
281   /// loop. V can be the induction phi, or some redundant cast in the def-use
282   /// chain of the inducion phi.
283   bool isInductionVariable(const Value *V);
284 
285   /// Returns True if PN is a reduction variable in this loop.
isReductionVariable(PHINode * PN)286   bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
287 
288   /// Returns True if Phi is a first-order recurrence in this loop.
289   bool isFirstOrderRecurrence(const PHINode *Phi);
290 
291   /// Return true if the block BB needs to be predicated in order for the loop
292   /// to be vectorized.
293   bool blockNeedsPredication(BasicBlock *BB);
294 
295   /// Check if this pointer is consecutive when vectorizing. This happens
296   /// when the last index of the GEP is the induction variable, or that the
297   /// pointer itself is an induction variable.
298   /// This check allows us to vectorize A[idx] into a wide load/store.
299   /// Returns:
300   /// 0 - Stride is unknown or non-consecutive.
301   /// 1 - Address is consecutive.
302   /// -1 - Address is consecutive, and decreasing.
303   /// NOTE: This method must only be used before modifying the original scalar
304   /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
305   int isConsecutivePtr(Value *Ptr);
306 
307   /// Returns true if the value V is uniform within the loop.
308   bool isUniform(Value *V);
309 
310   /// Returns the information that we collected about runtime memory check.
getRuntimePointerChecking()311   const RuntimePointerChecking *getRuntimePointerChecking() const {
312     return LAI->getRuntimePointerChecking();
313   }
314 
getLAI()315   const LoopAccessInfo *getLAI() const { return LAI; }
316 
getMaxSafeDepDistBytes()317   unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
318 
getMaxSafeRegisterWidth()319   uint64_t getMaxSafeRegisterWidth() const {
320     return LAI->getDepChecker().getMaxSafeRegisterWidth();
321   }
322 
hasStride(Value * V)323   bool hasStride(Value *V) { return LAI->hasStride(V); }
324 
325   /// Returns true if vector representation of the instruction \p I
326   /// requires mask.
isMaskRequired(const Instruction * I)327   bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
328 
getNumStores()329   unsigned getNumStores() const { return LAI->getNumStores(); }
getNumLoads()330   unsigned getNumLoads() const { return LAI->getNumLoads(); }
331 
332   // Returns true if the NoNaN attribute is set on the function.
hasFunNoNaNAttr()333   bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
334 
335 private:
336   /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
337   /// its nested loops are considered legal for vectorization. These legal
338   /// checks are common for inner and outer loop vectorization.
339   /// Temporarily taking UseVPlanNativePath parameter. If true, take
340   /// the new code path being implemented for outer loop vectorization
341   /// (should be functional for inner loop vectorization) based on VPlan.
342   /// If false, good old LV code.
343   bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
344 
345   /// Set up outer loop inductions by checking Phis in outer loop header for
346   /// supported inductions (int inductions). Return false if any of these Phis
347   /// is not a supported induction or if we fail to find an induction.
348   bool setupOuterLoopInductions();
349 
350   /// Return true if the pre-header, exiting and latch blocks of \p Lp
351   /// (non-recursive) are considered legal for vectorization.
352   /// Temporarily taking UseVPlanNativePath parameter. If true, take
353   /// the new code path being implemented for outer loop vectorization
354   /// (should be functional for inner loop vectorization) based on VPlan.
355   /// If false, good old LV code.
356   bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath);
357 
358   /// Check if a single basic block loop is vectorizable.
359   /// At this point we know that this is a loop with a constant trip count
360   /// and we only need to check individual instructions.
361   bool canVectorizeInstrs();
362 
363   /// When we vectorize loops we may change the order in which
364   /// we read and write from memory. This method checks if it is
365   /// legal to vectorize the code, considering only memory constrains.
366   /// Returns true if the loop is vectorizable
367   bool canVectorizeMemory();
368 
369   /// Return true if we can vectorize this loop using the IF-conversion
370   /// transformation.
371   bool canVectorizeWithIfConvert();
372 
373   /// Return true if we can vectorize this outer loop. The method performs
374   /// specific checks for outer loop vectorization.
375   bool canVectorizeOuterLoop();
376 
377   /// Return true if all of the instructions in the block can be speculatively
378   /// executed. \p SafePtrs is a list of addresses that are known to be legal
379   /// and we know that we can read from them without segfault.
380   bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
381 
382   /// Updates the vectorization state by adding \p Phi to the inductions list.
383   /// This can set \p Phi as the main induction of the loop if \p Phi is a
384   /// better choice for the main induction than the existing one.
385   void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
386                        SmallPtrSetImpl<Value *> &AllowedExit);
387 
388   /// Create an analysis remark that explains why vectorization failed
389   ///
390   /// \p RemarkName is the identifier for the remark.  If \p I is passed it is
391   /// an instruction that prevents vectorization.  Otherwise the loop is used
392   /// for the location of the remark.  \return the remark object that can be
393   /// streamed to.
394   OptimizationRemarkAnalysis
395   createMissedAnalysis(StringRef RemarkName, Instruction *I = nullptr) const {
396     return createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
397                                   RemarkName, TheLoop, I);
398   }
399 
400   /// If an access has a symbolic strides, this maps the pointer value to
401   /// the stride symbol.
getSymbolicStrides()402   const ValueToValueMap *getSymbolicStrides() {
403     // FIXME: Currently, the set of symbolic strides is sometimes queried before
404     // it's collected.  This happens from canVectorizeWithIfConvert, when the
405     // pointer is checked to reference consecutive elements suitable for a
406     // masked access.
407     return LAI ? &LAI->getSymbolicStrides() : nullptr;
408   }
409 
410   /// The loop that we evaluate.
411   Loop *TheLoop;
412 
413   /// Loop Info analysis.
414   LoopInfo *LI;
415 
416   /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
417   /// Applies dynamic knowledge to simplify SCEV expressions in the context
418   /// of existing SCEV assumptions. The analysis will also add a minimal set
419   /// of new predicates if this is required to enable vectorization and
420   /// unrolling.
421   PredicatedScalarEvolution &PSE;
422 
423   /// Target Library Info.
424   TargetLibraryInfo *TLI;
425 
426   /// Dominator Tree.
427   DominatorTree *DT;
428 
429   // LoopAccess analysis.
430   std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
431 
432   // And the loop-accesses info corresponding to this loop.  This pointer is
433   // null until canVectorizeMemory sets it up.
434   const LoopAccessInfo *LAI = nullptr;
435 
436   /// Interface to emit optimization remarks.
437   OptimizationRemarkEmitter *ORE;
438 
439   //  ---  vectorization state --- //
440 
441   /// Holds the primary induction variable. This is the counter of the
442   /// loop.
443   PHINode *PrimaryInduction = nullptr;
444 
445   /// Holds the reduction variables.
446   ReductionList Reductions;
447 
448   /// Holds all of the induction variables that we found in the loop.
449   /// Notice that inductions don't need to start at zero and that induction
450   /// variables can be pointers.
451   InductionList Inductions;
452 
453   /// Holds all the casts that participate in the update chain of the induction
454   /// variables, and that have been proven to be redundant (possibly under a
455   /// runtime guard). These casts can be ignored when creating the vectorized
456   /// loop body.
457   SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
458 
459   /// Holds the phi nodes that are first-order recurrences.
460   RecurrenceSet FirstOrderRecurrences;
461 
462   /// Holds instructions that need to sink past other instructions to handle
463   /// first-order recurrences.
464   DenseMap<Instruction *, Instruction *> SinkAfter;
465 
466   /// Holds the widest induction type encountered.
467   Type *WidestIndTy = nullptr;
468 
469   /// Allowed outside users. This holds the induction and reduction
470   /// vars which can be accessed from outside the loop.
471   SmallPtrSet<Value *, 4> AllowedExit;
472 
473   /// Can we assume the absence of NaNs.
474   bool HasFunNoNaNAttr = false;
475 
476   /// Vectorization requirements that will go through late-evaluation.
477   LoopVectorizationRequirements *Requirements;
478 
479   /// Used to emit an analysis of any legality issues.
480   LoopVectorizeHints *Hints;
481 
482   /// The demanded bits analsyis is used to compute the minimum type size in
483   /// which a reduction can be computed.
484   DemandedBits *DB;
485 
486   /// The assumption cache analysis is used to compute the minimum type size in
487   /// which a reduction can be computed.
488   AssumptionCache *AC;
489 
490   /// While vectorizing these instructions we have to generate a
491   /// call to the appropriate masked intrinsic
492   SmallPtrSet<const Instruction *, 8> MaskedOp;
493 };
494 
495 } // namespace llvm
496 
497 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
498