1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a function pass that inserts VSETVLI instructions where
10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11 // instructions.
12 //
13 // This pass consists of 3 phases:
14 //
15 // Phase 1 collects how each basic block affects VL/VTYPE.
16 //
17 // Phase 2 uses the information from phase 1 to do a data flow analysis to
18 // propagate the VL/VTYPE changes through the function. This gives us the
19 // VL/VTYPE at the start of each basic block.
20 //
21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from
22 // phase 2 is used to prevent inserting a VSETVLI before the first vector
23 // instruction in the block if possible.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "RISCV.h"
28 #include "RISCVSubtarget.h"
29 #include "llvm/CodeGen/LiveIntervals.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include <queue>
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "riscv-insert-vsetvli"
35 #define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass"
36 
37 static cl::opt<bool> DisableInsertVSETVLPHIOpt(
38     "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
39     cl::desc("Disable looking through phis when inserting vsetvlis."));
40 
41 static cl::opt<bool> UseStrictAsserts(
42     "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
43     cl::desc("Enable strict assertion checking for the dataflow algorithm"));
44 
45 namespace {
46 
47 static unsigned getVLOpNum(const MachineInstr &MI) {
48   return RISCVII::getVLOpNum(MI.getDesc());
49 }
50 
51 static unsigned getSEWOpNum(const MachineInstr &MI) {
52   return RISCVII::getSEWOpNum(MI.getDesc());
53 }
54 
55 static bool isScalarMoveInstr(const MachineInstr &MI) {
56   switch (MI.getOpcode()) {
57   default:
58     return false;
59   case RISCV::PseudoVMV_S_X_M1:
60   case RISCV::PseudoVMV_S_X_M2:
61   case RISCV::PseudoVMV_S_X_M4:
62   case RISCV::PseudoVMV_S_X_M8:
63   case RISCV::PseudoVMV_S_X_MF2:
64   case RISCV::PseudoVMV_S_X_MF4:
65   case RISCV::PseudoVMV_S_X_MF8:
66   case RISCV::PseudoVFMV_S_F16_M1:
67   case RISCV::PseudoVFMV_S_F16_M2:
68   case RISCV::PseudoVFMV_S_F16_M4:
69   case RISCV::PseudoVFMV_S_F16_M8:
70   case RISCV::PseudoVFMV_S_F16_MF2:
71   case RISCV::PseudoVFMV_S_F16_MF4:
72   case RISCV::PseudoVFMV_S_F32_M1:
73   case RISCV::PseudoVFMV_S_F32_M2:
74   case RISCV::PseudoVFMV_S_F32_M4:
75   case RISCV::PseudoVFMV_S_F32_M8:
76   case RISCV::PseudoVFMV_S_F32_MF2:
77   case RISCV::PseudoVFMV_S_F64_M1:
78   case RISCV::PseudoVFMV_S_F64_M2:
79   case RISCV::PseudoVFMV_S_F64_M4:
80   case RISCV::PseudoVFMV_S_F64_M8:
81     return true;
82   }
83 }
84 
85 
86 class VSETVLIInfo {
87   union {
88     Register AVLReg;
89     unsigned AVLImm;
90   };
91 
92   enum : uint8_t {
93     Uninitialized,
94     AVLIsReg,
95     AVLIsImm,
96     Unknown,
97   } State = Uninitialized;
98 
99   // Fields from VTYPE.
100   RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
101   uint8_t SEW = 0;
102   uint8_t TailAgnostic : 1;
103   uint8_t MaskAgnostic : 1;
104   uint8_t SEWLMULRatioOnly : 1;
105 
106 public:
107   VSETVLIInfo()
108       : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
109         SEWLMULRatioOnly(false) {}
110 
111   static VSETVLIInfo getUnknown() {
112     VSETVLIInfo Info;
113     Info.setUnknown();
114     return Info;
115   }
116 
117   bool isValid() const { return State != Uninitialized; }
118   void setUnknown() { State = Unknown; }
119   bool isUnknown() const { return State == Unknown; }
120 
121   void setAVLReg(Register Reg) {
122     AVLReg = Reg;
123     State = AVLIsReg;
124   }
125 
126   void setAVLImm(unsigned Imm) {
127     AVLImm = Imm;
128     State = AVLIsImm;
129   }
130 
131   bool hasAVLImm() const { return State == AVLIsImm; }
132   bool hasAVLReg() const { return State == AVLIsReg; }
133   Register getAVLReg() const {
134     assert(hasAVLReg());
135     return AVLReg;
136   }
137   unsigned getAVLImm() const {
138     assert(hasAVLImm());
139     return AVLImm;
140   }
141 
142   unsigned getSEW() const { return SEW; }
143   RISCVII::VLMUL getVLMUL() const { return VLMul; }
144 
145   bool hasZeroAVL() const {
146     if (hasAVLImm())
147       return getAVLImm() == 0;
148     return false;
149   }
150   bool hasNonZeroAVL() const {
151     if (hasAVLImm())
152       return getAVLImm() > 0;
153     if (hasAVLReg())
154       return getAVLReg() == RISCV::X0;
155     return false;
156   }
157 
158   bool hasSameAVL(const VSETVLIInfo &Other) const {
159     assert(isValid() && Other.isValid() &&
160            "Can't compare invalid VSETVLIInfos");
161     assert(!isUnknown() && !Other.isUnknown() &&
162            "Can't compare AVL in unknown state");
163     if (hasAVLReg() && Other.hasAVLReg())
164       return getAVLReg() == Other.getAVLReg();
165 
166     if (hasAVLImm() && Other.hasAVLImm())
167       return getAVLImm() == Other.getAVLImm();
168 
169     return false;
170   }
171 
172   void setVTYPE(unsigned VType) {
173     assert(isValid() && !isUnknown() &&
174            "Can't set VTYPE for uninitialized or unknown");
175     VLMul = RISCVVType::getVLMUL(VType);
176     SEW = RISCVVType::getSEW(VType);
177     TailAgnostic = RISCVVType::isTailAgnostic(VType);
178     MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
179   }
180   void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
181     assert(isValid() && !isUnknown() &&
182            "Can't set VTYPE for uninitialized or unknown");
183     VLMul = L;
184     SEW = S;
185     TailAgnostic = TA;
186     MaskAgnostic = MA;
187   }
188 
189   unsigned encodeVTYPE() const {
190     assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
191            "Can't encode VTYPE for uninitialized or unknown");
192     return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
193   }
194 
195   bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
196 
197   bool hasSameSEW(const VSETVLIInfo &Other) const {
198     assert(isValid() && Other.isValid() &&
199            "Can't compare invalid VSETVLIInfos");
200     assert(!isUnknown() && !Other.isUnknown() &&
201            "Can't compare VTYPE in unknown state");
202     assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
203            "Can't compare when only LMUL/SEW ratio is valid.");
204     return SEW == Other.SEW;
205   }
206 
207   bool hasSameVTYPE(const VSETVLIInfo &Other) const {
208     assert(isValid() && Other.isValid() &&
209            "Can't compare invalid VSETVLIInfos");
210     assert(!isUnknown() && !Other.isUnknown() &&
211            "Can't compare VTYPE in unknown state");
212     assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
213            "Can't compare when only LMUL/SEW ratio is valid.");
214     return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
215            std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
216                     Other.MaskAgnostic);
217   }
218 
219   static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) {
220     unsigned LMul;
221     bool Fractional;
222     std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul);
223 
224     // Convert LMul to a fixed point value with 3 fractional bits.
225     LMul = Fractional ? (8 / LMul) : (LMul * 8);
226 
227     assert(SEW >= 8 && "Unexpected SEW value");
228     return (SEW * 8) / LMul;
229   }
230 
231   unsigned getSEWLMULRatio() const {
232     assert(isValid() && !isUnknown() &&
233            "Can't use VTYPE for uninitialized or unknown");
234     return getSEWLMULRatio(SEW, VLMul);
235   }
236 
237   // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
238   // Note that having the same VLMAX ensures that both share the same
239   // function from AVL to VL; that is, they must produce the same VL value
240   // for any given AVL value.
241   bool hasSameVLMAX(const VSETVLIInfo &Other) const {
242     assert(isValid() && Other.isValid() &&
243            "Can't compare invalid VSETVLIInfos");
244     assert(!isUnknown() && !Other.isUnknown() &&
245            "Can't compare VTYPE in unknown state");
246     return getSEWLMULRatio() == Other.getSEWLMULRatio();
247   }
248 
249   bool hasSamePolicy(const VSETVLIInfo &Other) const {
250     assert(isValid() && Other.isValid() &&
251            "Can't compare invalid VSETVLIInfos");
252     assert(!isUnknown() && !Other.isUnknown() &&
253            "Can't compare VTYPE in unknown state");
254     return TailAgnostic == Other.TailAgnostic &&
255            MaskAgnostic == Other.MaskAgnostic;
256   }
257 
258   bool hasCompatibleVTYPE(const MachineInstr &MI,
259                           const VSETVLIInfo &Require) const {
260     // Simple case, see if full VTYPE matches.
261     if (hasSameVTYPE(Require))
262       return true;
263 
264     // If this is a mask reg operation, it only cares about VLMAX.
265     // FIXME: Mask reg operations are probably ok if "this" VLMAX is larger
266     // than "Require".
267     // FIXME: The policy bits can probably be ignored for mask reg operations.
268     const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
269     // A Log2SEW of 0 is an operation on mask registers only.
270     const bool MaskRegOp = Log2SEW == 0;
271     if (MaskRegOp && hasSameVLMAX(Require) &&
272         TailAgnostic == Require.TailAgnostic &&
273         MaskAgnostic == Require.MaskAgnostic)
274       return true;
275 
276     return false;
277   }
278 
279   // Determine whether the vector instructions requirements represented by
280   // Require are compatible with the previous vsetvli instruction represented
281   // by this.  MI is the instruction whose requirements we're considering.
282   bool isCompatible(const MachineInstr &MI, const VSETVLIInfo &Require) const {
283     assert(isValid() && Require.isValid() &&
284            "Can't compare invalid VSETVLIInfos");
285     assert(!Require.SEWLMULRatioOnly &&
286            "Expected a valid VTYPE for instruction!");
287     // Nothing is compatible with Unknown.
288     if (isUnknown() || Require.isUnknown())
289       return false;
290 
291     // If only our VLMAX ratio is valid, then this isn't compatible.
292     if (SEWLMULRatioOnly)
293       return false;
294 
295     // If the instruction doesn't need an AVLReg and the SEW matches, consider
296     // it compatible.
297     if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister)
298       if (SEW == Require.SEW)
299         return true;
300 
301     // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0.
302     // So it's compatible when we could make sure that both VL be the same
303     // situation.
304     if (isScalarMoveInstr(MI) && Require.hasAVLImm() &&
305         ((hasNonZeroAVL() && Require.hasNonZeroAVL()) ||
306          (hasZeroAVL() && Require.hasZeroAVL())) &&
307         hasSameSEW(Require) && hasSamePolicy(Require))
308       return true;
309 
310     // The AVL must match.
311     if (!hasSameAVL(Require))
312       return false;
313 
314     if (hasCompatibleVTYPE(MI, Require))
315       return true;
316 
317     // Store instructions don't use the policy fields.
318     const bool StoreOp = MI.getNumExplicitDefs() == 0;
319     if (StoreOp && VLMul == Require.VLMul && SEW == Require.SEW)
320       return true;
321 
322     // Anything else is not compatible.
323     return false;
324   }
325 
326   bool isCompatibleWithLoadStoreEEW(unsigned EEW,
327                                     const VSETVLIInfo &Require) const {
328     assert(isValid() && Require.isValid() &&
329            "Can't compare invalid VSETVLIInfos");
330     assert(!Require.SEWLMULRatioOnly &&
331            "Expected a valid VTYPE for instruction!");
332     assert(EEW == Require.SEW && "Mismatched EEW/SEW for store");
333 
334     if (isUnknown() || hasSEWLMULRatioOnly())
335       return false;
336 
337     if (!hasSameAVL(Require))
338       return false;
339 
340     return getSEWLMULRatio() == getSEWLMULRatio(EEW, Require.VLMul);
341   }
342 
343   bool operator==(const VSETVLIInfo &Other) const {
344     // Uninitialized is only equal to another Uninitialized.
345     if (!isValid())
346       return !Other.isValid();
347     if (!Other.isValid())
348       return !isValid();
349 
350     // Unknown is only equal to another Unknown.
351     if (isUnknown())
352       return Other.isUnknown();
353     if (Other.isUnknown())
354       return isUnknown();
355 
356     if (!hasSameAVL(Other))
357       return false;
358 
359     // If the SEWLMULRatioOnly bits are different, then they aren't equal.
360     if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
361       return false;
362 
363     // If only the VLMAX is valid, check that it is the same.
364     if (SEWLMULRatioOnly)
365       return hasSameVLMAX(Other);
366 
367     // If the full VTYPE is valid, check that it is the same.
368     return hasSameVTYPE(Other);
369   }
370 
371   bool operator!=(const VSETVLIInfo &Other) const {
372     return !(*this == Other);
373   }
374 
375   // Calculate the VSETVLIInfo visible to a block assuming this and Other are
376   // both predecessors.
377   VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
378     // If the new value isn't valid, ignore it.
379     if (!Other.isValid())
380       return *this;
381 
382     // If this value isn't valid, this must be the first predecessor, use it.
383     if (!isValid())
384       return Other;
385 
386     // If either is unknown, the result is unknown.
387     if (isUnknown() || Other.isUnknown())
388       return VSETVLIInfo::getUnknown();
389 
390     // If we have an exact, match return this.
391     if (*this == Other)
392       return *this;
393 
394     // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
395     // return an SEW/LMUL ratio only value.
396     if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
397       VSETVLIInfo MergeInfo = *this;
398       MergeInfo.SEWLMULRatioOnly = true;
399       return MergeInfo;
400     }
401 
402     // Otherwise the result is unknown.
403     return VSETVLIInfo::getUnknown();
404   }
405 
406 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
407   /// Support for debugging, callable in GDB: V->dump()
408   LLVM_DUMP_METHOD void dump() const {
409     print(dbgs());
410     dbgs() << "\n";
411   }
412 
413   /// Implement operator<<.
414   /// @{
415   void print(raw_ostream &OS) const {
416     OS << "{";
417     if (!isValid())
418       OS << "Uninitialized";
419     if (isUnknown())
420       OS << "unknown";;
421     if (hasAVLReg())
422       OS << "AVLReg=" << (unsigned)AVLReg;
423     if (hasAVLImm())
424       OS << "AVLImm=" << (unsigned)AVLImm;
425     OS << ", "
426        << "VLMul=" << (unsigned)VLMul << ", "
427        << "SEW=" << (unsigned)SEW << ", "
428        << "TailAgnostic=" << (bool)TailAgnostic << ", "
429        << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
430        << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
431   }
432 #endif
433 };
434 
435 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
436 LLVM_ATTRIBUTE_USED
437 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
438   V.print(OS);
439   return OS;
440 }
441 #endif
442 
443 struct BlockData {
444   // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers
445   // made by this block. Calculated in Phase 1.
446   VSETVLIInfo Change;
447 
448   // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
449   // block. Calculated in Phase 2.
450   VSETVLIInfo Exit;
451 
452   // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
453   // blocks. Calculated in Phase 2, and used by Phase 3.
454   VSETVLIInfo Pred;
455 
456   // Keeps track of whether the block is already in the queue.
457   bool InQueue = false;
458 
459   BlockData() = default;
460 };
461 
462 class RISCVInsertVSETVLI : public MachineFunctionPass {
463   const TargetInstrInfo *TII;
464   MachineRegisterInfo *MRI;
465 
466   std::vector<BlockData> BlockInfo;
467   std::queue<const MachineBasicBlock *> WorkList;
468 
469 public:
470   static char ID;
471 
472   RISCVInsertVSETVLI() : MachineFunctionPass(ID) {
473     initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry());
474   }
475   bool runOnMachineFunction(MachineFunction &MF) override;
476 
477   void getAnalysisUsage(AnalysisUsage &AU) const override {
478     AU.setPreservesCFG();
479     MachineFunctionPass::getAnalysisUsage(AU);
480   }
481 
482   StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
483 
484 private:
485   bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
486                    const VSETVLIInfo &CurInfo) const;
487   bool needVSETVLIPHI(const VSETVLIInfo &Require,
488                       const MachineBasicBlock &MBB) const;
489   void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
490                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
491   void insertVSETVLI(MachineBasicBlock &MBB,
492                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
493                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
494 
495   bool computeVLVTYPEChanges(const MachineBasicBlock &MBB);
496   void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
497   void emitVSETVLIs(MachineBasicBlock &MBB);
498   void doLocalPrepass(MachineBasicBlock &MBB);
499   void doLocalPostpass(MachineBasicBlock &MBB);
500   void doPRE(MachineBasicBlock &MBB);
501   void insertReadVL(MachineBasicBlock &MBB);
502 };
503 
504 } // end anonymous namespace
505 
506 char RISCVInsertVSETVLI::ID = 0;
507 
508 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
509                 false, false)
510 
511 static bool isVectorConfigInstr(const MachineInstr &MI) {
512   return MI.getOpcode() == RISCV::PseudoVSETVLI ||
513          MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
514          MI.getOpcode() == RISCV::PseudoVSETIVLI;
515 }
516 
517 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves
518 /// VL and only sets VTYPE.
519 static bool isVLPreservingConfig(const MachineInstr &MI) {
520   if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
521     return false;
522   assert(RISCV::X0 == MI.getOperand(1).getReg());
523   return RISCV::X0 == MI.getOperand(0).getReg();
524 }
525 
526 static MachineInstr *elideCopies(MachineInstr *MI,
527                                  const MachineRegisterInfo *MRI) {
528   while (true) {
529     if (!MI->isFullCopy())
530       return MI;
531     if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
532       return nullptr;
533     MI = MRI->getVRegDef(MI->getOperand(1).getReg());
534     if (!MI)
535       return nullptr;
536   }
537 }
538 
539 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
540                                        const MachineRegisterInfo *MRI) {
541   VSETVLIInfo InstrInfo;
542 
543   // If the instruction has policy argument, use the argument.
544   // If there is no policy argument, default to tail agnostic unless the
545   // destination is tied to a source. Unless the source is undef. In that case
546   // the user would have some control over the policy values.
547   bool TailAgnostic = true;
548   bool UsesMaskPolicy = RISCVII::usesMaskPolicy(TSFlags);
549   // FIXME: Could we look at the above or below instructions to choose the
550   // matched mask policy to reduce vsetvli instructions? Default mask policy is
551   // agnostic if instructions use mask policy, otherwise is undisturbed. Because
552   // most mask operations are mask undisturbed, so we could possibly reduce the
553   // vsetvli between mask and nomasked instruction sequence.
554   bool MaskAgnostic = UsesMaskPolicy;
555   unsigned UseOpIdx;
556   if (RISCVII::hasVecPolicyOp(TSFlags)) {
557     const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
558     uint64_t Policy = Op.getImm();
559     assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
560            "Invalid Policy Value");
561     // Although in some cases, mismatched passthru/maskedoff with policy value
562     // does not make sense (ex. tied operand is IMPLICIT_DEF with non-TAMA
563     // policy, or tied operand is not IMPLICIT_DEF with TAMA policy), but users
564     // have set the policy value explicitly, so compiler would not fix it.
565     TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
566     MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
567   } else if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
568     TailAgnostic = false;
569     if (UsesMaskPolicy)
570       MaskAgnostic = false;
571     // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
572     const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
573     MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg());
574     if (UseMI) {
575       UseMI = elideCopies(UseMI, MRI);
576       if (UseMI && UseMI->isImplicitDef()) {
577         TailAgnostic = true;
578         if (UsesMaskPolicy)
579           MaskAgnostic = true;
580       }
581     }
582     // Some pseudo instructions force a tail agnostic policy despite having a
583     // tied def.
584     if (RISCVII::doesForceTailAgnostic(TSFlags))
585       TailAgnostic = true;
586   }
587 
588   RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
589 
590   unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
591   // A Log2SEW of 0 is an operation on mask registers only.
592   unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
593   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
594 
595   if (RISCVII::hasVLOp(TSFlags)) {
596     const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
597     if (VLOp.isImm()) {
598       int64_t Imm = VLOp.getImm();
599       // Conver the VLMax sentintel to X0 register.
600       if (Imm == RISCV::VLMaxSentinel)
601         InstrInfo.setAVLReg(RISCV::X0);
602       else
603         InstrInfo.setAVLImm(Imm);
604     } else {
605       InstrInfo.setAVLReg(VLOp.getReg());
606     }
607   } else {
608     InstrInfo.setAVLReg(RISCV::NoRegister);
609   }
610   InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
611 
612   return InstrInfo;
613 }
614 
615 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
616                                        const VSETVLIInfo &Info,
617                                        const VSETVLIInfo &PrevInfo) {
618   DebugLoc DL = MI.getDebugLoc();
619   insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
620 }
621 
622 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
623                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
624                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
625 
626   // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
627   // VLMAX.
628   if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
629       Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
630     BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
631         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
632         .addReg(RISCV::X0, RegState::Kill)
633         .addImm(Info.encodeVTYPE())
634         .addReg(RISCV::VL, RegState::Implicit);
635     return;
636   }
637 
638   if (Info.hasAVLImm()) {
639     BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
640         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
641         .addImm(Info.getAVLImm())
642         .addImm(Info.encodeVTYPE());
643     return;
644   }
645 
646   Register AVLReg = Info.getAVLReg();
647   if (AVLReg == RISCV::NoRegister) {
648     // We can only use x0, x0 if there's no chance of the vtype change causing
649     // the previous vl to become invalid.
650     if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
651         Info.hasSameVLMAX(PrevInfo)) {
652       BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
653           .addReg(RISCV::X0, RegState::Define | RegState::Dead)
654           .addReg(RISCV::X0, RegState::Kill)
655           .addImm(Info.encodeVTYPE())
656           .addReg(RISCV::VL, RegState::Implicit);
657       return;
658     }
659     // Otherwise use an AVL of 0 to avoid depending on previous vl.
660     BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
661         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
662         .addImm(0)
663         .addImm(Info.encodeVTYPE());
664     return;
665   }
666 
667   if (AVLReg.isVirtual())
668     MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
669 
670   // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
671   // opcode if the AVLReg is X0 as they have different register classes for
672   // the AVL operand.
673   Register DestReg = RISCV::X0;
674   unsigned Opcode = RISCV::PseudoVSETVLI;
675   if (AVLReg == RISCV::X0) {
676     DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
677     Opcode = RISCV::PseudoVSETVLIX0;
678   }
679   BuildMI(MBB, InsertPt, DL, TII->get(Opcode))
680       .addReg(DestReg, RegState::Define | RegState::Dead)
681       .addReg(AVLReg)
682       .addImm(Info.encodeVTYPE());
683 }
684 
685 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
686 // VSETIVLI instruction.
687 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
688   VSETVLIInfo NewInfo;
689   if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
690     NewInfo.setAVLImm(MI.getOperand(1).getImm());
691   } else {
692     assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
693            MI.getOpcode() == RISCV::PseudoVSETVLIX0);
694     Register AVLReg = MI.getOperand(1).getReg();
695     assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
696            "Can't handle X0, X0 vsetvli yet");
697     NewInfo.setAVLReg(AVLReg);
698   }
699   NewInfo.setVTYPE(MI.getOperand(2).getImm());
700 
701   return NewInfo;
702 }
703 
704 bool canSkipVSETVLIForLoadStore(const MachineInstr &MI,
705                                 const VSETVLIInfo &Require,
706                                 const VSETVLIInfo &CurInfo) {
707   unsigned EEW;
708   switch (MI.getOpcode()) {
709   default:
710     return false;
711   case RISCV::PseudoVLE8_V_M1:
712   case RISCV::PseudoVLE8_V_M1_MASK:
713   case RISCV::PseudoVLE8_V_M2:
714   case RISCV::PseudoVLE8_V_M2_MASK:
715   case RISCV::PseudoVLE8_V_M4:
716   case RISCV::PseudoVLE8_V_M4_MASK:
717   case RISCV::PseudoVLE8_V_M8:
718   case RISCV::PseudoVLE8_V_M8_MASK:
719   case RISCV::PseudoVLE8_V_MF2:
720   case RISCV::PseudoVLE8_V_MF2_MASK:
721   case RISCV::PseudoVLE8_V_MF4:
722   case RISCV::PseudoVLE8_V_MF4_MASK:
723   case RISCV::PseudoVLE8_V_MF8:
724   case RISCV::PseudoVLE8_V_MF8_MASK:
725   case RISCV::PseudoVLSE8_V_M1:
726   case RISCV::PseudoVLSE8_V_M1_MASK:
727   case RISCV::PseudoVLSE8_V_M2:
728   case RISCV::PseudoVLSE8_V_M2_MASK:
729   case RISCV::PseudoVLSE8_V_M4:
730   case RISCV::PseudoVLSE8_V_M4_MASK:
731   case RISCV::PseudoVLSE8_V_M8:
732   case RISCV::PseudoVLSE8_V_M8_MASK:
733   case RISCV::PseudoVLSE8_V_MF2:
734   case RISCV::PseudoVLSE8_V_MF2_MASK:
735   case RISCV::PseudoVLSE8_V_MF4:
736   case RISCV::PseudoVLSE8_V_MF4_MASK:
737   case RISCV::PseudoVLSE8_V_MF8:
738   case RISCV::PseudoVLSE8_V_MF8_MASK:
739   case RISCV::PseudoVSE8_V_M1:
740   case RISCV::PseudoVSE8_V_M1_MASK:
741   case RISCV::PseudoVSE8_V_M2:
742   case RISCV::PseudoVSE8_V_M2_MASK:
743   case RISCV::PseudoVSE8_V_M4:
744   case RISCV::PseudoVSE8_V_M4_MASK:
745   case RISCV::PseudoVSE8_V_M8:
746   case RISCV::PseudoVSE8_V_M8_MASK:
747   case RISCV::PseudoVSE8_V_MF2:
748   case RISCV::PseudoVSE8_V_MF2_MASK:
749   case RISCV::PseudoVSE8_V_MF4:
750   case RISCV::PseudoVSE8_V_MF4_MASK:
751   case RISCV::PseudoVSE8_V_MF8:
752   case RISCV::PseudoVSE8_V_MF8_MASK:
753   case RISCV::PseudoVSSE8_V_M1:
754   case RISCV::PseudoVSSE8_V_M1_MASK:
755   case RISCV::PseudoVSSE8_V_M2:
756   case RISCV::PseudoVSSE8_V_M2_MASK:
757   case RISCV::PseudoVSSE8_V_M4:
758   case RISCV::PseudoVSSE8_V_M4_MASK:
759   case RISCV::PseudoVSSE8_V_M8:
760   case RISCV::PseudoVSSE8_V_M8_MASK:
761   case RISCV::PseudoVSSE8_V_MF2:
762   case RISCV::PseudoVSSE8_V_MF2_MASK:
763   case RISCV::PseudoVSSE8_V_MF4:
764   case RISCV::PseudoVSSE8_V_MF4_MASK:
765   case RISCV::PseudoVSSE8_V_MF8:
766   case RISCV::PseudoVSSE8_V_MF8_MASK:
767     EEW = 8;
768     break;
769   case RISCV::PseudoVLE16_V_M1:
770   case RISCV::PseudoVLE16_V_M1_MASK:
771   case RISCV::PseudoVLE16_V_M2:
772   case RISCV::PseudoVLE16_V_M2_MASK:
773   case RISCV::PseudoVLE16_V_M4:
774   case RISCV::PseudoVLE16_V_M4_MASK:
775   case RISCV::PseudoVLE16_V_M8:
776   case RISCV::PseudoVLE16_V_M8_MASK:
777   case RISCV::PseudoVLE16_V_MF2:
778   case RISCV::PseudoVLE16_V_MF2_MASK:
779   case RISCV::PseudoVLE16_V_MF4:
780   case RISCV::PseudoVLE16_V_MF4_MASK:
781   case RISCV::PseudoVLSE16_V_M1:
782   case RISCV::PseudoVLSE16_V_M1_MASK:
783   case RISCV::PseudoVLSE16_V_M2:
784   case RISCV::PseudoVLSE16_V_M2_MASK:
785   case RISCV::PseudoVLSE16_V_M4:
786   case RISCV::PseudoVLSE16_V_M4_MASK:
787   case RISCV::PseudoVLSE16_V_M8:
788   case RISCV::PseudoVLSE16_V_M8_MASK:
789   case RISCV::PseudoVLSE16_V_MF2:
790   case RISCV::PseudoVLSE16_V_MF2_MASK:
791   case RISCV::PseudoVLSE16_V_MF4:
792   case RISCV::PseudoVLSE16_V_MF4_MASK:
793   case RISCV::PseudoVSE16_V_M1:
794   case RISCV::PseudoVSE16_V_M1_MASK:
795   case RISCV::PseudoVSE16_V_M2:
796   case RISCV::PseudoVSE16_V_M2_MASK:
797   case RISCV::PseudoVSE16_V_M4:
798   case RISCV::PseudoVSE16_V_M4_MASK:
799   case RISCV::PseudoVSE16_V_M8:
800   case RISCV::PseudoVSE16_V_M8_MASK:
801   case RISCV::PseudoVSE16_V_MF2:
802   case RISCV::PseudoVSE16_V_MF2_MASK:
803   case RISCV::PseudoVSE16_V_MF4:
804   case RISCV::PseudoVSE16_V_MF4_MASK:
805   case RISCV::PseudoVSSE16_V_M1:
806   case RISCV::PseudoVSSE16_V_M1_MASK:
807   case RISCV::PseudoVSSE16_V_M2:
808   case RISCV::PseudoVSSE16_V_M2_MASK:
809   case RISCV::PseudoVSSE16_V_M4:
810   case RISCV::PseudoVSSE16_V_M4_MASK:
811   case RISCV::PseudoVSSE16_V_M8:
812   case RISCV::PseudoVSSE16_V_M8_MASK:
813   case RISCV::PseudoVSSE16_V_MF2:
814   case RISCV::PseudoVSSE16_V_MF2_MASK:
815   case RISCV::PseudoVSSE16_V_MF4:
816   case RISCV::PseudoVSSE16_V_MF4_MASK:
817     EEW = 16;
818     break;
819   case RISCV::PseudoVLE32_V_M1:
820   case RISCV::PseudoVLE32_V_M1_MASK:
821   case RISCV::PseudoVLE32_V_M2:
822   case RISCV::PseudoVLE32_V_M2_MASK:
823   case RISCV::PseudoVLE32_V_M4:
824   case RISCV::PseudoVLE32_V_M4_MASK:
825   case RISCV::PseudoVLE32_V_M8:
826   case RISCV::PseudoVLE32_V_M8_MASK:
827   case RISCV::PseudoVLE32_V_MF2:
828   case RISCV::PseudoVLE32_V_MF2_MASK:
829   case RISCV::PseudoVLSE32_V_M1:
830   case RISCV::PseudoVLSE32_V_M1_MASK:
831   case RISCV::PseudoVLSE32_V_M2:
832   case RISCV::PseudoVLSE32_V_M2_MASK:
833   case RISCV::PseudoVLSE32_V_M4:
834   case RISCV::PseudoVLSE32_V_M4_MASK:
835   case RISCV::PseudoVLSE32_V_M8:
836   case RISCV::PseudoVLSE32_V_M8_MASK:
837   case RISCV::PseudoVLSE32_V_MF2:
838   case RISCV::PseudoVLSE32_V_MF2_MASK:
839   case RISCV::PseudoVSE32_V_M1:
840   case RISCV::PseudoVSE32_V_M1_MASK:
841   case RISCV::PseudoVSE32_V_M2:
842   case RISCV::PseudoVSE32_V_M2_MASK:
843   case RISCV::PseudoVSE32_V_M4:
844   case RISCV::PseudoVSE32_V_M4_MASK:
845   case RISCV::PseudoVSE32_V_M8:
846   case RISCV::PseudoVSE32_V_M8_MASK:
847   case RISCV::PseudoVSE32_V_MF2:
848   case RISCV::PseudoVSE32_V_MF2_MASK:
849   case RISCV::PseudoVSSE32_V_M1:
850   case RISCV::PseudoVSSE32_V_M1_MASK:
851   case RISCV::PseudoVSSE32_V_M2:
852   case RISCV::PseudoVSSE32_V_M2_MASK:
853   case RISCV::PseudoVSSE32_V_M4:
854   case RISCV::PseudoVSSE32_V_M4_MASK:
855   case RISCV::PseudoVSSE32_V_M8:
856   case RISCV::PseudoVSSE32_V_M8_MASK:
857   case RISCV::PseudoVSSE32_V_MF2:
858   case RISCV::PseudoVSSE32_V_MF2_MASK:
859     EEW = 32;
860     break;
861   case RISCV::PseudoVLE64_V_M1:
862   case RISCV::PseudoVLE64_V_M1_MASK:
863   case RISCV::PseudoVLE64_V_M2:
864   case RISCV::PseudoVLE64_V_M2_MASK:
865   case RISCV::PseudoVLE64_V_M4:
866   case RISCV::PseudoVLE64_V_M4_MASK:
867   case RISCV::PseudoVLE64_V_M8:
868   case RISCV::PseudoVLE64_V_M8_MASK:
869   case RISCV::PseudoVLSE64_V_M1:
870   case RISCV::PseudoVLSE64_V_M1_MASK:
871   case RISCV::PseudoVLSE64_V_M2:
872   case RISCV::PseudoVLSE64_V_M2_MASK:
873   case RISCV::PseudoVLSE64_V_M4:
874   case RISCV::PseudoVLSE64_V_M4_MASK:
875   case RISCV::PseudoVLSE64_V_M8:
876   case RISCV::PseudoVLSE64_V_M8_MASK:
877   case RISCV::PseudoVSE64_V_M1:
878   case RISCV::PseudoVSE64_V_M1_MASK:
879   case RISCV::PseudoVSE64_V_M2:
880   case RISCV::PseudoVSE64_V_M2_MASK:
881   case RISCV::PseudoVSE64_V_M4:
882   case RISCV::PseudoVSE64_V_M4_MASK:
883   case RISCV::PseudoVSE64_V_M8:
884   case RISCV::PseudoVSE64_V_M8_MASK:
885   case RISCV::PseudoVSSE64_V_M1:
886   case RISCV::PseudoVSSE64_V_M1_MASK:
887   case RISCV::PseudoVSSE64_V_M2:
888   case RISCV::PseudoVSSE64_V_M2_MASK:
889   case RISCV::PseudoVSSE64_V_M4:
890   case RISCV::PseudoVSSE64_V_M4_MASK:
891   case RISCV::PseudoVSSE64_V_M8:
892   case RISCV::PseudoVSSE64_V_M8_MASK:
893     EEW = 64;
894     break;
895   }
896 
897   // Stores can ignore the tail and mask policies.
898   const bool StoreOp = MI.getNumExplicitDefs() == 0;
899   if (!StoreOp && !CurInfo.hasSamePolicy(Require))
900     return false;
901 
902   return CurInfo.isCompatibleWithLoadStoreEEW(EEW, Require);
903 }
904 
905 /// Return true if a VSETVLI is required to transition from CurInfo to Require
906 /// before MI.  Require corresponds to the result of computeInfoForInstr(MI...)
907 /// *before* we clear VLOp in phase3.  We can't recompute and assert it here due
908 /// to that muation.
909 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
910                                      const VSETVLIInfo &Require,
911                                      const VSETVLIInfo &CurInfo) const {
912   if (CurInfo.isCompatible(MI, Require))
913     return false;
914 
915   // We didn't find a compatible value. If our AVL is a virtual register,
916   // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
917   // and the last VL/VTYPE we observed is the same, we don't need a
918   // VSETVLI here.
919   if (!CurInfo.isUnknown() && Require.hasAVLReg() &&
920       Require.getAVLReg().isVirtual() && !CurInfo.hasSEWLMULRatioOnly() &&
921       CurInfo.hasCompatibleVTYPE(MI, Require)) {
922     if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
923       if (isVectorConfigInstr(*DefMI)) {
924         VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
925         if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
926           return false;
927       }
928     }
929   }
930 
931   // If this is a unit-stride or strided load/store, we may be able to use the
932   // EMUL=(EEW/SEW)*LMUL relationship to avoid changing VTYPE.
933   return CurInfo.isUnknown() || !canSkipVSETVLIForLoadStore(MI, Require, CurInfo);
934 }
935 
936 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
937   bool HadVectorOp = false;
938 
939   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
940   BBInfo.Change = BBInfo.Pred;
941   for (const MachineInstr &MI : MBB) {
942     // If this is an explicit VSETVLI or VSETIVLI, update our state.
943     if (isVectorConfigInstr(MI)) {
944       HadVectorOp = true;
945       BBInfo.Change = getInfoForVSETVLI(MI);
946       continue;
947     }
948 
949     uint64_t TSFlags = MI.getDesc().TSFlags;
950     if (RISCVII::hasSEWOp(TSFlags)) {
951       HadVectorOp = true;
952 
953       VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
954 
955       if (!BBInfo.Change.isValid()) {
956         BBInfo.Change = NewInfo;
957       } else {
958         // If this instruction isn't compatible with the previous VL/VTYPE
959         // we need to insert a VSETVLI.
960         // NOTE: We only do this if the vtype we're comparing against was
961         // created in this block. We need the first and third phase to treat
962         // the store the same way.
963         if (needVSETVLI(MI, NewInfo, BBInfo.Change))
964           BBInfo.Change = NewInfo;
965       }
966     }
967 
968     // If this is something that updates VL/VTYPE that we don't know about, set
969     // the state to unknown.
970     if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
971         MI.modifiesRegister(RISCV::VTYPE))
972       BBInfo.Change = VSETVLIInfo::getUnknown();
973   }
974 
975   return HadVectorOp;
976 }
977 
978 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
979 
980   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
981 
982   BBInfo.InQueue = false;
983 
984   VSETVLIInfo InInfo;
985   if (MBB.pred_empty()) {
986     // There are no predecessors, so use the default starting status.
987     InInfo.setUnknown();
988   } else {
989     for (MachineBasicBlock *P : MBB.predecessors())
990       InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
991   }
992 
993   // If we don't have any valid predecessor value, wait until we do.
994   if (!InInfo.isValid())
995     return;
996 
997   // If no change, no need to rerun block
998   if (InInfo == BBInfo.Pred)
999     return;
1000 
1001   BBInfo.Pred = InInfo;
1002   LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1003                     << " changed to " << BBInfo.Pred << "\n");
1004 
1005   // Note: It's tempting to cache the state changes here, but due to the
1006   // compatibility checks performed a blocks output state can change based on
1007   // the input state.  To cache, we'd have to add logic for finding
1008   // never-compatible state changes.
1009   computeVLVTYPEChanges(MBB);
1010   VSETVLIInfo TmpStatus = BBInfo.Change;
1011 
1012   // If the new exit value matches the old exit value, we don't need to revisit
1013   // any blocks.
1014   if (BBInfo.Exit == TmpStatus)
1015     return;
1016 
1017   BBInfo.Exit = TmpStatus;
1018   LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1019                     << " changed to " << BBInfo.Exit << "\n");
1020 
1021   // Add the successors to the work list so we can propagate the changed exit
1022   // status.
1023   for (MachineBasicBlock *S : MBB.successors())
1024     if (!BlockInfo[S->getNumber()].InQueue)
1025       WorkList.push(S);
1026 }
1027 
1028 // If we weren't able to prove a vsetvli was directly unneeded, it might still
1029 // be unneeded if the AVL is a phi node where all incoming values are VL
1030 // outputs from the last VSETVLI in their respective basic blocks.
1031 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1032                                         const MachineBasicBlock &MBB) const {
1033   if (DisableInsertVSETVLPHIOpt)
1034     return true;
1035 
1036   if (!Require.hasAVLReg())
1037     return true;
1038 
1039   Register AVLReg = Require.getAVLReg();
1040   if (!AVLReg.isVirtual())
1041     return true;
1042 
1043   // We need the AVL to be produce by a PHI node in this basic block.
1044   MachineInstr *PHI = MRI->getVRegDef(AVLReg);
1045   if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
1046     return true;
1047 
1048   for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
1049        PHIOp += 2) {
1050     Register InReg = PHI->getOperand(PHIOp).getReg();
1051     MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
1052     const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
1053     // If the exit from the predecessor has the VTYPE we are looking for
1054     // we might be able to avoid a VSETVLI.
1055     if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
1056       return true;
1057 
1058     // We need the PHI input to the be the output of a VSET(I)VLI.
1059     MachineInstr *DefMI = MRI->getVRegDef(InReg);
1060     if (!DefMI || !isVectorConfigInstr(*DefMI))
1061       return true;
1062 
1063     // We found a VSET(I)VLI make sure it matches the output of the
1064     // predecessor block.
1065     VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1066     if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
1067         !DefInfo.hasSameVTYPE(PBBInfo.Exit))
1068       return true;
1069   }
1070 
1071   // If all the incoming values to the PHI checked out, we don't need
1072   // to insert a VSETVLI.
1073   return false;
1074 }
1075 
1076 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1077   VSETVLIInfo CurInfo;
1078   for (MachineInstr &MI : MBB) {
1079     // If this is an explicit VSETVLI or VSETIVLI, update our state.
1080     if (isVectorConfigInstr(MI)) {
1081       // Conservatively, mark the VL and VTYPE as live.
1082       assert(MI.getOperand(3).getReg() == RISCV::VL &&
1083              MI.getOperand(4).getReg() == RISCV::VTYPE &&
1084              "Unexpected operands where VL and VTYPE should be");
1085       MI.getOperand(3).setIsDead(false);
1086       MI.getOperand(4).setIsDead(false);
1087       CurInfo = getInfoForVSETVLI(MI);
1088       continue;
1089     }
1090 
1091     uint64_t TSFlags = MI.getDesc().TSFlags;
1092     if (RISCVII::hasSEWOp(TSFlags)) {
1093       VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
1094       if (RISCVII::hasVLOp(TSFlags)) {
1095         MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1096         if (VLOp.isReg()) {
1097           // Erase the AVL operand from the instruction.
1098           VLOp.setReg(RISCV::NoRegister);
1099           VLOp.setIsKill(false);
1100         }
1101         MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1102                                                 /*isImp*/ true));
1103       }
1104       MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1105                                               /*isImp*/ true));
1106 
1107       if (!CurInfo.isValid()) {
1108         // We haven't found any vector instructions or VL/VTYPE changes yet,
1109         // use the predecessor information.
1110         CurInfo = BlockInfo[MBB.getNumber()].Pred;
1111         assert(CurInfo.isValid() && "Expected a valid predecessor state.");
1112         if (needVSETVLI(MI, NewInfo, CurInfo)) {
1113           // If this is the first implicit state change, and the state change
1114           // requested can be proven to produce the same register contents, we
1115           // can skip emitting the actual state change and continue as if we
1116           // had since we know the GPR result of the implicit state change
1117           // wouldn't be used and VL/VTYPE registers are correct.  Note that
1118           // we *do* need to model the state as if it changed as while the
1119           // register contents are unchanged, the abstract model can change.
1120           if (needVSETVLIPHI(NewInfo, MBB))
1121             insertVSETVLI(MBB, MI, NewInfo, CurInfo);
1122           CurInfo = NewInfo;
1123         }
1124       } else {
1125         // If this instruction isn't compatible with the previous VL/VTYPE
1126         // we need to insert a VSETVLI.
1127         // NOTE: We can't use predecessor information for the store. We must
1128         // treat it the same as the first phase so that we produce the correct
1129         // vl/vtype for succesor blocks.
1130         if (needVSETVLI(MI, NewInfo, CurInfo)) {
1131           insertVSETVLI(MBB, MI, NewInfo, CurInfo);
1132           CurInfo = NewInfo;
1133         }
1134       }
1135     }
1136 
1137     // If this is something that updates VL/VTYPE that we don't know about, set
1138     // the state to unknown.
1139     if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1140         MI.modifiesRegister(RISCV::VTYPE)) {
1141       CurInfo = VSETVLIInfo::getUnknown();
1142     }
1143   }
1144 
1145   // If we reach the end of the block and our current info doesn't match the
1146   // expected info, insert a vsetvli to correct.
1147   if (!UseStrictAsserts) {
1148     const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
1149     if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
1150         CurInfo != ExitInfo) {
1151       // Note there's an implicit assumption here that terminators never use
1152       // or modify VL or VTYPE.  Also, fallthrough will return end().
1153       auto InsertPt = MBB.getFirstInstrTerminator();
1154       insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
1155                     CurInfo);
1156       CurInfo = ExitInfo;
1157     }
1158   }
1159 
1160   if (UseStrictAsserts && CurInfo.isValid()) {
1161     const auto &Info = BlockInfo[MBB.getNumber()];
1162     if (CurInfo != Info.Exit) {
1163       LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1164       LLVM_DEBUG(dbgs() << "  begin        state: " << Info.Pred << "\n");
1165       LLVM_DEBUG(dbgs() << "  expected end state: " << Info.Exit << "\n");
1166       LLVM_DEBUG(dbgs() << "  actual   end state: " << CurInfo << "\n");
1167     }
1168     assert(CurInfo == Info.Exit &&
1169            "InsertVSETVLI dataflow invariant violated");
1170   }
1171 }
1172 
1173 void RISCVInsertVSETVLI::doLocalPrepass(MachineBasicBlock &MBB) {
1174   VSETVLIInfo CurInfo = VSETVLIInfo::getUnknown();
1175   for (MachineInstr &MI : MBB) {
1176     // If this is an explicit VSETVLI or VSETIVLI, update our state.
1177     if (isVectorConfigInstr(MI)) {
1178       CurInfo = getInfoForVSETVLI(MI);
1179       continue;
1180     }
1181 
1182     const uint64_t TSFlags = MI.getDesc().TSFlags;
1183     if (isScalarMoveInstr(MI)) {
1184       assert(RISCVII::hasSEWOp(TSFlags) && RISCVII::hasVLOp(TSFlags));
1185       const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
1186 
1187       // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
1188       // VL > 0. We can discard the user requested AVL and just use the last
1189       // one if we can prove it equally zero.  This removes a vsetvli entirely
1190       // if the types match or allows use of cheaper avl preserving variant
1191       // if VLMAX doesn't change.  If VLMAX might change, we couldn't use
1192       // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to
1193       // prevent extending live range of an avl register operand.
1194       // TODO: We can probably relax this for immediates.
1195       if (((CurInfo.hasNonZeroAVL() && NewInfo.hasNonZeroAVL()) ||
1196            (CurInfo.hasZeroAVL() && NewInfo.hasZeroAVL())) &&
1197           NewInfo.hasSameVLMAX(CurInfo)) {
1198         MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1199         if (CurInfo.hasAVLImm())
1200           VLOp.ChangeToImmediate(CurInfo.getAVLImm());
1201         else
1202           VLOp.ChangeToRegister(CurInfo.getAVLReg(), /*IsDef*/ false);
1203         CurInfo = computeInfoForInstr(MI, TSFlags, MRI);
1204         continue;
1205       }
1206     }
1207 
1208     if (RISCVII::hasSEWOp(TSFlags)) {
1209       if (RISCVII::hasVLOp(TSFlags)) {
1210         const auto Require = computeInfoForInstr(MI, TSFlags, MRI);
1211         // If the AVL is the result of a previous vsetvli which has the
1212         // same AVL and VLMAX as our current state, we can reuse the AVL
1213         // from the current state for the new one.  This allows us to
1214         // generate 'vsetvli x0, x0, vtype" or possible skip the transition
1215         // entirely.
1216         if (!CurInfo.isUnknown() && Require.hasAVLReg() &&
1217             Require.getAVLReg().isVirtual()) {
1218           if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
1219             if (isVectorConfigInstr(*DefMI)) {
1220               VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1221               if (DefInfo.hasSameAVL(CurInfo) &&
1222                   DefInfo.hasSameVLMAX(CurInfo)) {
1223                 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1224                 if (CurInfo.hasAVLImm())
1225                   VLOp.ChangeToImmediate(CurInfo.getAVLImm());
1226                 else {
1227                   MRI->clearKillFlags(CurInfo.getAVLReg());
1228                   VLOp.ChangeToRegister(CurInfo.getAVLReg(), /*IsDef*/ false);
1229                 }
1230                 CurInfo = computeInfoForInstr(MI, TSFlags, MRI);
1231                 continue;
1232               }
1233             }
1234           }
1235         }
1236 
1237         // If AVL is defined by a vsetvli with the same VLMAX, we can
1238         // replace the AVL operand with the AVL of the defining vsetvli.
1239         // We avoid general register AVLs to avoid extending live ranges
1240         // without being sure we can kill the original source reg entirely.
1241         // TODO: We can ignore policy bits here, we only need VL to be the same.
1242         if (Require.hasAVLReg() && Require.getAVLReg().isVirtual()) {
1243           if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
1244             if (isVectorConfigInstr(*DefMI)) {
1245               VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1246               if (DefInfo.hasSameVLMAX(Require) &&
1247                   (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
1248                 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1249                 if (DefInfo.hasAVLImm())
1250                   VLOp.ChangeToImmediate(DefInfo.getAVLImm());
1251                 else
1252                   VLOp.ChangeToRegister(DefInfo.getAVLReg(), /*IsDef*/ false);
1253                 CurInfo = computeInfoForInstr(MI, TSFlags, MRI);
1254                 continue;
1255               }
1256             }
1257           }
1258         }
1259       }
1260       CurInfo = computeInfoForInstr(MI, TSFlags, MRI);
1261       continue;
1262     }
1263 
1264     // If this is something that updates VL/VTYPE that we don't know about,
1265     // set the state to unknown.
1266     if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1267         MI.modifiesRegister(RISCV::VTYPE))
1268       CurInfo = VSETVLIInfo::getUnknown();
1269   }
1270 }
1271 
1272 /// Return true if the VL value configured must be equal to the requested one.
1273 static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) {
1274   if (!Info.hasAVLImm())
1275     // VLMAX is always the same value.
1276     // TODO: Could extend to other registers by looking at the associated vreg
1277     // def placement.
1278     return RISCV::X0 == Info.getAVLReg();
1279 
1280   unsigned AVL = Info.getAVLImm();
1281   unsigned SEW = Info.getSEW();
1282   unsigned AVLInBits = AVL * SEW;
1283 
1284   unsigned LMul;
1285   bool Fractional;
1286   std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL());
1287 
1288   if (Fractional)
1289     return ST.getRealMinVLen() / LMul >= AVLInBits;
1290   return ST.getRealMinVLen() * LMul >= AVLInBits;
1291 }
1292 
1293 /// Perform simple partial redundancy elimination of the VSETVLI instructions
1294 /// we're about to insert by looking for cases where we can PRE from the
1295 /// beginning of one block to the end of one of its predecessors.  Specifically,
1296 /// this is geared to catch the common case of a fixed length vsetvl in a single
1297 /// block loop when it could execute once in the preheader instead.
1298 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1299   const MachineFunction &MF = *MBB.getParent();
1300   const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
1301 
1302   if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1303     return;
1304 
1305   MachineBasicBlock *UnavailablePred = nullptr;
1306   VSETVLIInfo AvailableInfo;
1307   for (MachineBasicBlock *P : MBB.predecessors()) {
1308     const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1309     if (PredInfo.isUnknown()) {
1310       if (UnavailablePred)
1311         return;
1312       UnavailablePred = P;
1313     } else if (!AvailableInfo.isValid()) {
1314       AvailableInfo = PredInfo;
1315     } else if (AvailableInfo != PredInfo) {
1316       return;
1317     }
1318   }
1319 
1320   // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1321   // phase 3.
1322   if (!UnavailablePred || !AvailableInfo.isValid())
1323     return;
1324 
1325   // Critical edge - TODO: consider splitting?
1326   if (UnavailablePred->succ_size() != 1)
1327     return;
1328 
1329   // If VL can be less than AVL, then we can't reduce the frequency of exec.
1330   if (!hasFixedResult(AvailableInfo, ST))
1331     return;
1332 
1333   // Does it actually let us remove an implicit transition in MBB?
1334   bool Found = false;
1335   for (auto &MI : MBB) {
1336     if (isVectorConfigInstr(MI))
1337       return;
1338 
1339     const uint64_t TSFlags = MI.getDesc().TSFlags;
1340     if (RISCVII::hasSEWOp(TSFlags)) {
1341       if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI))
1342         return;
1343       Found = true;
1344       break;
1345     }
1346   }
1347   if (!Found)
1348     return;
1349 
1350   // Finally, update both data flow state and insert the actual vsetvli.
1351   // Doing both keeps the code in sync with the dataflow results, which
1352   // is critical for correctness of phase 3.
1353   auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit;
1354   LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1355                     << UnavailablePred->getName() << " with state "
1356                     << AvailableInfo << "\n");
1357   BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1358   BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1359 
1360   // Note there's an implicit assumption here that terminators never use
1361   // or modify VL or VTYPE.  Also, fallthrough will return end().
1362   auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1363   insertVSETVLI(*UnavailablePred, InsertPt,
1364                 UnavailablePred->findDebugLoc(InsertPt),
1365                 AvailableInfo, OldInfo);
1366 }
1367 
1368 void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
1369   MachineInstr *PrevMI = nullptr;
1370   bool UsedVL = false, UsedVTYPE = false;
1371   SmallVector<MachineInstr*> ToDelete;
1372   for (MachineInstr &MI : MBB) {
1373     // Note: Must be *before* vsetvli handling to account for config cases
1374     // which only change some subfields.
1375     if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
1376       UsedVL = true;
1377     if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
1378       UsedVTYPE = true;
1379 
1380     if (!isVectorConfigInstr(MI))
1381       continue;
1382 
1383     if (PrevMI) {
1384       if (!UsedVL && !UsedVTYPE) {
1385         ToDelete.push_back(PrevMI);
1386         // fallthrough
1387       } else if (!UsedVTYPE && isVLPreservingConfig(MI)) {
1388         // Note: `vsetvli x0, x0, vtype' is the canonical instruction
1389         // for this case.  If you find yourself wanting to add other forms
1390         // to this "unused VTYPE" case, we're probably missing a
1391         // canonicalization earlier.
1392         // Note: We don't need to explicitly check vtype compatibility
1393         // here because this form is only legal (per ISA) when not
1394         // changing VL.
1395         PrevMI->getOperand(2).setImm(MI.getOperand(2).getImm());
1396         ToDelete.push_back(&MI);
1397         // Leave PrevMI unchanged
1398         continue;
1399       }
1400     }
1401     PrevMI = &MI;
1402     UsedVL = false;
1403     UsedVTYPE = false;
1404     Register VRegDef = MI.getOperand(0).getReg();
1405     if (VRegDef != RISCV::X0 &&
1406         !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
1407       UsedVL = true;
1408   }
1409 
1410   for (auto *MI : ToDelete)
1411     MI->eraseFromParent();
1412 }
1413 
1414 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1415   const MachineFunction *MF = MBB.getParent();
1416   const RISCVInstrInfo *TII = MF->getSubtarget<RISCVSubtarget>().getInstrInfo();
1417 
1418   for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1419     MachineInstr &MI = *I++;
1420     if (TII->isFaultFirstLoad(MI)) {
1421       Register VLOutput = MI.getOperand(1).getReg();
1422       if (!MRI->use_nodbg_empty(VLOutput))
1423         BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
1424                 VLOutput);
1425       // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1426       MI.getOperand(1).setReg(RISCV::X0);
1427     }
1428   }
1429 }
1430 
1431 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1432   // Skip if the vector extension is not enabled.
1433   const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
1434   if (!ST.hasVInstructions())
1435     return false;
1436 
1437   LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1438 
1439   TII = ST.getInstrInfo();
1440   MRI = &MF.getRegInfo();
1441 
1442   assert(BlockInfo.empty() && "Expect empty block infos");
1443   BlockInfo.resize(MF.getNumBlockIDs());
1444 
1445   // Scan the block locally for cases where we can mutate the operands
1446   // of the instructions to reduce state transitions.  Critically, this
1447   // must be done before we start propagating data flow states as these
1448   // transforms are allowed to change the contents of VTYPE and VL so
1449   // long as the semantics of the program stays the same.
1450   for (MachineBasicBlock &MBB : MF)
1451     doLocalPrepass(MBB);
1452 
1453   bool HaveVectorOp = false;
1454 
1455   // Phase 1 - determine how VL/VTYPE are affected by the each block.
1456   for (const MachineBasicBlock &MBB : MF) {
1457     HaveVectorOp |= computeVLVTYPEChanges(MBB);
1458     // Initial exit state is whatever change we found in the block.
1459     BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1460     BBInfo.Exit = BBInfo.Change;
1461     LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1462                       << " is " << BBInfo.Exit << "\n");
1463 
1464   }
1465 
1466   // If we didn't find any instructions that need VSETVLI, we're done.
1467   if (!HaveVectorOp) {
1468     BlockInfo.clear();
1469     return false;
1470   }
1471 
1472   // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1473   // blocks to the list here, but will also add any that need to be revisited
1474   // during Phase 2 processing.
1475   for (const MachineBasicBlock &MBB : MF) {
1476     WorkList.push(&MBB);
1477     BlockInfo[MBB.getNumber()].InQueue = true;
1478   }
1479   while (!WorkList.empty()) {
1480     const MachineBasicBlock &MBB = *WorkList.front();
1481     WorkList.pop();
1482     computeIncomingVLVTYPE(MBB);
1483   }
1484 
1485   // Perform partial redundancy elimination of vsetvli transitions.
1486   for (MachineBasicBlock &MBB : MF)
1487     doPRE(MBB);
1488 
1489   // Phase 3 - add any vsetvli instructions needed in the block. Use the
1490   // Phase 2 information to avoid adding vsetvlis before the first vector
1491   // instruction in the block if the VL/VTYPE is satisfied by its
1492   // predecessors.
1493   for (MachineBasicBlock &MBB : MF)
1494     emitVSETVLIs(MBB);
1495 
1496   // Now that all vsetvlis are explicit, go through and do block local
1497   // DSE and peephole based demanded fields based transforms.  Note that
1498   // this *must* be done outside the main dataflow so long as we allow
1499   // any cross block analysis within the dataflow.  We can't have both
1500   // demanded fields based mutation and non-local analysis in the
1501   // dataflow at the same time without introducing inconsistencies.
1502   for (MachineBasicBlock &MBB : MF)
1503     doLocalPostpass(MBB);
1504 
1505   // Once we're fully done rewriting all the instructions, do a final pass
1506   // through to check for VSETVLIs which write to an unused destination.
1507   // For the non X0, X0 variant, we can replace the destination register
1508   // with X0 to reduce register pressure.  This is really a generic
1509   // optimization which can be applied to any dead def (TODO: generalize).
1510   for (MachineBasicBlock &MBB : MF) {
1511     for (MachineInstr &MI : MBB) {
1512       if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
1513           MI.getOpcode() == RISCV::PseudoVSETIVLI) {
1514         Register VRegDef = MI.getOperand(0).getReg();
1515         if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef))
1516           MI.getOperand(0).setReg(RISCV::X0);
1517       }
1518     }
1519   }
1520 
1521   // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1522   // of VLEFF/VLSEGFF.
1523   for (MachineBasicBlock &MBB : MF)
1524     insertReadVL(MBB);
1525 
1526   BlockInfo.clear();
1527   return HaveVectorOp;
1528 }
1529 
1530 /// Returns an instance of the Insert VSETVLI pass.
1531 FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
1532   return new RISCVInsertVSETVLI();
1533 }
1534