1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a function pass that inserts VSETVLI instructions where
10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11 // instructions.
12 //
13 // This pass consists of 3 phases:
14 //
15 // Phase 1 collects how each basic block affects VL/VTYPE.
16 //
17 // Phase 2 uses the information from phase 1 to do a data flow analysis to
18 // propagate the VL/VTYPE changes through the function. This gives us the
19 // VL/VTYPE at the start of each basic block.
20 //
21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from
22 // phase 2 is used to prevent inserting a VSETVLI before the first vector
23 // instruction in the block if possible.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "RISCV.h"
28 #include "RISCVSubtarget.h"
29 #include "llvm/CodeGen/LiveIntervals.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include <queue>
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "riscv-insert-vsetvli"
35 #define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass"
36 
37 static cl::opt<bool> DisableInsertVSETVLPHIOpt(
38     "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
39     cl::desc("Disable looking through phis when inserting vsetvlis."));
40 
41 static cl::opt<bool> UseStrictAsserts(
42     "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
43     cl::desc("Enable strict assertion checking for the dataflow algorithm"));
44 
45 namespace {
46 
47 static unsigned getVLOpNum(const MachineInstr &MI) {
48   return RISCVII::getVLOpNum(MI.getDesc());
49 }
50 
51 static unsigned getSEWOpNum(const MachineInstr &MI) {
52   return RISCVII::getSEWOpNum(MI.getDesc());
53 }
54 
55 static bool isScalarMoveInstr(const MachineInstr &MI) {
56   switch (MI.getOpcode()) {
57   default:
58     return false;
59   case RISCV::PseudoVMV_S_X_M1:
60   case RISCV::PseudoVMV_S_X_M2:
61   case RISCV::PseudoVMV_S_X_M4:
62   case RISCV::PseudoVMV_S_X_M8:
63   case RISCV::PseudoVMV_S_X_MF2:
64   case RISCV::PseudoVMV_S_X_MF4:
65   case RISCV::PseudoVMV_S_X_MF8:
66   case RISCV::PseudoVFMV_S_F16_M1:
67   case RISCV::PseudoVFMV_S_F16_M2:
68   case RISCV::PseudoVFMV_S_F16_M4:
69   case RISCV::PseudoVFMV_S_F16_M8:
70   case RISCV::PseudoVFMV_S_F16_MF2:
71   case RISCV::PseudoVFMV_S_F16_MF4:
72   case RISCV::PseudoVFMV_S_F32_M1:
73   case RISCV::PseudoVFMV_S_F32_M2:
74   case RISCV::PseudoVFMV_S_F32_M4:
75   case RISCV::PseudoVFMV_S_F32_M8:
76   case RISCV::PseudoVFMV_S_F32_MF2:
77   case RISCV::PseudoVFMV_S_F64_M1:
78   case RISCV::PseudoVFMV_S_F64_M2:
79   case RISCV::PseudoVFMV_S_F64_M4:
80   case RISCV::PseudoVFMV_S_F64_M8:
81     return true;
82   }
83 }
84 
85 static bool isSplatMoveInstr(const MachineInstr &MI) {
86   switch (MI.getOpcode()) {
87   default:
88     return false;
89   case RISCV::PseudoVMV_V_X_M1:
90   case RISCV::PseudoVMV_V_X_M2:
91   case RISCV::PseudoVMV_V_X_M4:
92   case RISCV::PseudoVMV_V_X_M8:
93   case RISCV::PseudoVMV_V_X_MF2:
94   case RISCV::PseudoVMV_V_X_MF4:
95   case RISCV::PseudoVMV_V_X_MF8:
96   case RISCV::PseudoVMV_V_I_M1:
97   case RISCV::PseudoVMV_V_I_M2:
98   case RISCV::PseudoVMV_V_I_M4:
99   case RISCV::PseudoVMV_V_I_M8:
100   case RISCV::PseudoVMV_V_I_MF2:
101   case RISCV::PseudoVMV_V_I_MF4:
102   case RISCV::PseudoVMV_V_I_MF8:
103     return true;
104   }
105 }
106 
107 static bool isSplatOfZeroOrMinusOne(const MachineInstr &MI) {
108   if (!isSplatMoveInstr(MI))
109     return false;
110 
111   const MachineOperand &SrcMO = MI.getOperand(1);
112   if (SrcMO.isImm())
113     return SrcMO.getImm() == 0 || SrcMO.getImm() == -1;
114   return SrcMO.isReg() && SrcMO.getReg() == RISCV::X0;
115 }
116 
117 /// Get the EEW for a load or store instruction.  Return None if MI is not
118 /// a load or store which ignores SEW.
119 static Optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
120   switch (MI.getOpcode()) {
121   default:
122     return None;
123   case RISCV::PseudoVLE8_V_M1:
124   case RISCV::PseudoVLE8_V_M1_MASK:
125   case RISCV::PseudoVLE8_V_M2:
126   case RISCV::PseudoVLE8_V_M2_MASK:
127   case RISCV::PseudoVLE8_V_M4:
128   case RISCV::PseudoVLE8_V_M4_MASK:
129   case RISCV::PseudoVLE8_V_M8:
130   case RISCV::PseudoVLE8_V_M8_MASK:
131   case RISCV::PseudoVLE8_V_MF2:
132   case RISCV::PseudoVLE8_V_MF2_MASK:
133   case RISCV::PseudoVLE8_V_MF4:
134   case RISCV::PseudoVLE8_V_MF4_MASK:
135   case RISCV::PseudoVLE8_V_MF8:
136   case RISCV::PseudoVLE8_V_MF8_MASK:
137   case RISCV::PseudoVLSE8_V_M1:
138   case RISCV::PseudoVLSE8_V_M1_MASK:
139   case RISCV::PseudoVLSE8_V_M2:
140   case RISCV::PseudoVLSE8_V_M2_MASK:
141   case RISCV::PseudoVLSE8_V_M4:
142   case RISCV::PseudoVLSE8_V_M4_MASK:
143   case RISCV::PseudoVLSE8_V_M8:
144   case RISCV::PseudoVLSE8_V_M8_MASK:
145   case RISCV::PseudoVLSE8_V_MF2:
146   case RISCV::PseudoVLSE8_V_MF2_MASK:
147   case RISCV::PseudoVLSE8_V_MF4:
148   case RISCV::PseudoVLSE8_V_MF4_MASK:
149   case RISCV::PseudoVLSE8_V_MF8:
150   case RISCV::PseudoVLSE8_V_MF8_MASK:
151   case RISCV::PseudoVSE8_V_M1:
152   case RISCV::PseudoVSE8_V_M1_MASK:
153   case RISCV::PseudoVSE8_V_M2:
154   case RISCV::PseudoVSE8_V_M2_MASK:
155   case RISCV::PseudoVSE8_V_M4:
156   case RISCV::PseudoVSE8_V_M4_MASK:
157   case RISCV::PseudoVSE8_V_M8:
158   case RISCV::PseudoVSE8_V_M8_MASK:
159   case RISCV::PseudoVSE8_V_MF2:
160   case RISCV::PseudoVSE8_V_MF2_MASK:
161   case RISCV::PseudoVSE8_V_MF4:
162   case RISCV::PseudoVSE8_V_MF4_MASK:
163   case RISCV::PseudoVSE8_V_MF8:
164   case RISCV::PseudoVSE8_V_MF8_MASK:
165   case RISCV::PseudoVSSE8_V_M1:
166   case RISCV::PseudoVSSE8_V_M1_MASK:
167   case RISCV::PseudoVSSE8_V_M2:
168   case RISCV::PseudoVSSE8_V_M2_MASK:
169   case RISCV::PseudoVSSE8_V_M4:
170   case RISCV::PseudoVSSE8_V_M4_MASK:
171   case RISCV::PseudoVSSE8_V_M8:
172   case RISCV::PseudoVSSE8_V_M8_MASK:
173   case RISCV::PseudoVSSE8_V_MF2:
174   case RISCV::PseudoVSSE8_V_MF2_MASK:
175   case RISCV::PseudoVSSE8_V_MF4:
176   case RISCV::PseudoVSSE8_V_MF4_MASK:
177   case RISCV::PseudoVSSE8_V_MF8:
178   case RISCV::PseudoVSSE8_V_MF8_MASK:
179     return 8;
180   case RISCV::PseudoVLE16_V_M1:
181   case RISCV::PseudoVLE16_V_M1_MASK:
182   case RISCV::PseudoVLE16_V_M2:
183   case RISCV::PseudoVLE16_V_M2_MASK:
184   case RISCV::PseudoVLE16_V_M4:
185   case RISCV::PseudoVLE16_V_M4_MASK:
186   case RISCV::PseudoVLE16_V_M8:
187   case RISCV::PseudoVLE16_V_M8_MASK:
188   case RISCV::PseudoVLE16_V_MF2:
189   case RISCV::PseudoVLE16_V_MF2_MASK:
190   case RISCV::PseudoVLE16_V_MF4:
191   case RISCV::PseudoVLE16_V_MF4_MASK:
192   case RISCV::PseudoVLSE16_V_M1:
193   case RISCV::PseudoVLSE16_V_M1_MASK:
194   case RISCV::PseudoVLSE16_V_M2:
195   case RISCV::PseudoVLSE16_V_M2_MASK:
196   case RISCV::PseudoVLSE16_V_M4:
197   case RISCV::PseudoVLSE16_V_M4_MASK:
198   case RISCV::PseudoVLSE16_V_M8:
199   case RISCV::PseudoVLSE16_V_M8_MASK:
200   case RISCV::PseudoVLSE16_V_MF2:
201   case RISCV::PseudoVLSE16_V_MF2_MASK:
202   case RISCV::PseudoVLSE16_V_MF4:
203   case RISCV::PseudoVLSE16_V_MF4_MASK:
204   case RISCV::PseudoVSE16_V_M1:
205   case RISCV::PseudoVSE16_V_M1_MASK:
206   case RISCV::PseudoVSE16_V_M2:
207   case RISCV::PseudoVSE16_V_M2_MASK:
208   case RISCV::PseudoVSE16_V_M4:
209   case RISCV::PseudoVSE16_V_M4_MASK:
210   case RISCV::PseudoVSE16_V_M8:
211   case RISCV::PseudoVSE16_V_M8_MASK:
212   case RISCV::PseudoVSE16_V_MF2:
213   case RISCV::PseudoVSE16_V_MF2_MASK:
214   case RISCV::PseudoVSE16_V_MF4:
215   case RISCV::PseudoVSE16_V_MF4_MASK:
216   case RISCV::PseudoVSSE16_V_M1:
217   case RISCV::PseudoVSSE16_V_M1_MASK:
218   case RISCV::PseudoVSSE16_V_M2:
219   case RISCV::PseudoVSSE16_V_M2_MASK:
220   case RISCV::PseudoVSSE16_V_M4:
221   case RISCV::PseudoVSSE16_V_M4_MASK:
222   case RISCV::PseudoVSSE16_V_M8:
223   case RISCV::PseudoVSSE16_V_M8_MASK:
224   case RISCV::PseudoVSSE16_V_MF2:
225   case RISCV::PseudoVSSE16_V_MF2_MASK:
226   case RISCV::PseudoVSSE16_V_MF4:
227   case RISCV::PseudoVSSE16_V_MF4_MASK:
228     return 16;
229   case RISCV::PseudoVLE32_V_M1:
230   case RISCV::PseudoVLE32_V_M1_MASK:
231   case RISCV::PseudoVLE32_V_M2:
232   case RISCV::PseudoVLE32_V_M2_MASK:
233   case RISCV::PseudoVLE32_V_M4:
234   case RISCV::PseudoVLE32_V_M4_MASK:
235   case RISCV::PseudoVLE32_V_M8:
236   case RISCV::PseudoVLE32_V_M8_MASK:
237   case RISCV::PseudoVLE32_V_MF2:
238   case RISCV::PseudoVLE32_V_MF2_MASK:
239   case RISCV::PseudoVLSE32_V_M1:
240   case RISCV::PseudoVLSE32_V_M1_MASK:
241   case RISCV::PseudoVLSE32_V_M2:
242   case RISCV::PseudoVLSE32_V_M2_MASK:
243   case RISCV::PseudoVLSE32_V_M4:
244   case RISCV::PseudoVLSE32_V_M4_MASK:
245   case RISCV::PseudoVLSE32_V_M8:
246   case RISCV::PseudoVLSE32_V_M8_MASK:
247   case RISCV::PseudoVLSE32_V_MF2:
248   case RISCV::PseudoVLSE32_V_MF2_MASK:
249   case RISCV::PseudoVSE32_V_M1:
250   case RISCV::PseudoVSE32_V_M1_MASK:
251   case RISCV::PseudoVSE32_V_M2:
252   case RISCV::PseudoVSE32_V_M2_MASK:
253   case RISCV::PseudoVSE32_V_M4:
254   case RISCV::PseudoVSE32_V_M4_MASK:
255   case RISCV::PseudoVSE32_V_M8:
256   case RISCV::PseudoVSE32_V_M8_MASK:
257   case RISCV::PseudoVSE32_V_MF2:
258   case RISCV::PseudoVSE32_V_MF2_MASK:
259   case RISCV::PseudoVSSE32_V_M1:
260   case RISCV::PseudoVSSE32_V_M1_MASK:
261   case RISCV::PseudoVSSE32_V_M2:
262   case RISCV::PseudoVSSE32_V_M2_MASK:
263   case RISCV::PseudoVSSE32_V_M4:
264   case RISCV::PseudoVSSE32_V_M4_MASK:
265   case RISCV::PseudoVSSE32_V_M8:
266   case RISCV::PseudoVSSE32_V_M8_MASK:
267   case RISCV::PseudoVSSE32_V_MF2:
268   case RISCV::PseudoVSSE32_V_MF2_MASK:
269     return 32;
270   case RISCV::PseudoVLE64_V_M1:
271   case RISCV::PseudoVLE64_V_M1_MASK:
272   case RISCV::PseudoVLE64_V_M2:
273   case RISCV::PseudoVLE64_V_M2_MASK:
274   case RISCV::PseudoVLE64_V_M4:
275   case RISCV::PseudoVLE64_V_M4_MASK:
276   case RISCV::PseudoVLE64_V_M8:
277   case RISCV::PseudoVLE64_V_M8_MASK:
278   case RISCV::PseudoVLSE64_V_M1:
279   case RISCV::PseudoVLSE64_V_M1_MASK:
280   case RISCV::PseudoVLSE64_V_M2:
281   case RISCV::PseudoVLSE64_V_M2_MASK:
282   case RISCV::PseudoVLSE64_V_M4:
283   case RISCV::PseudoVLSE64_V_M4_MASK:
284   case RISCV::PseudoVLSE64_V_M8:
285   case RISCV::PseudoVLSE64_V_M8_MASK:
286   case RISCV::PseudoVSE64_V_M1:
287   case RISCV::PseudoVSE64_V_M1_MASK:
288   case RISCV::PseudoVSE64_V_M2:
289   case RISCV::PseudoVSE64_V_M2_MASK:
290   case RISCV::PseudoVSE64_V_M4:
291   case RISCV::PseudoVSE64_V_M4_MASK:
292   case RISCV::PseudoVSE64_V_M8:
293   case RISCV::PseudoVSE64_V_M8_MASK:
294   case RISCV::PseudoVSSE64_V_M1:
295   case RISCV::PseudoVSSE64_V_M1_MASK:
296   case RISCV::PseudoVSSE64_V_M2:
297   case RISCV::PseudoVSSE64_V_M2_MASK:
298   case RISCV::PseudoVSSE64_V_M4:
299   case RISCV::PseudoVSSE64_V_M4_MASK:
300   case RISCV::PseudoVSSE64_V_M8:
301   case RISCV::PseudoVSSE64_V_M8_MASK:
302     return 64;
303   }
304 }
305 
306 /// Return true if this is an operation on mask registers.  Note that
307 /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
308 static bool isMaskRegOp(const MachineInstr &MI) {
309   if (RISCVII::hasSEWOp(MI.getDesc().TSFlags)) {
310     const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
311     // A Log2SEW of 0 is an operation on mask registers only.
312     return Log2SEW == 0;
313   }
314   return false;
315 }
316 
317 static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) {
318   unsigned LMul;
319   bool Fractional;
320   std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul);
321 
322   // Convert LMul to a fixed point value with 3 fractional bits.
323   LMul = Fractional ? (8 / LMul) : (LMul * 8);
324 
325   assert(SEW >= 8 && "Unexpected SEW value");
326   return (SEW * 8) / LMul;
327 }
328 
329 /// Which subfields of VL or VTYPE have values we need to preserve?
330 struct DemandedFields {
331   bool VL = false;
332   bool SEW = false;
333   bool LMUL = false;
334   bool SEWLMULRatio = false;
335   bool TailPolicy = false;
336   bool MaskPolicy = false;
337 
338   // Return true if any part of VTYPE was used
339   bool usedVTYPE() {
340     return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
341   }
342 
343   // Mark all VTYPE subfields and properties as demanded
344   void demandVTYPE() {
345     SEW = true;
346     LMUL = true;
347     SEWLMULRatio = true;
348     TailPolicy = true;
349     MaskPolicy = true;
350   }
351 };
352 
353 /// Return true if the two values of the VTYPE register provided are
354 /// indistinguishable from the perspective of an instruction (or set of
355 /// instructions) which use only the Used subfields and properties.
356 static bool areCompatibleVTYPEs(uint64_t VType1,
357                                 uint64_t VType2,
358                                 const DemandedFields &Used) {
359   if (Used.SEW &&
360       RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2))
361     return false;
362 
363   if (Used.LMUL &&
364       RISCVVType::getVLMUL(VType1) != RISCVVType::getVLMUL(VType2))
365     return false;
366 
367   if (Used.SEWLMULRatio) {
368     auto Ratio1 = getSEWLMULRatio(RISCVVType::getSEW(VType1),
369                                   RISCVVType::getVLMUL(VType1));
370     auto Ratio2 = getSEWLMULRatio(RISCVVType::getSEW(VType2),
371                                   RISCVVType::getVLMUL(VType2));
372     if (Ratio1 != Ratio2)
373       return false;
374   }
375 
376   if (Used.TailPolicy &&
377       RISCVVType::isTailAgnostic(VType1) != RISCVVType::isTailAgnostic(VType2))
378     return false;
379   if (Used.MaskPolicy &&
380       RISCVVType::isMaskAgnostic(VType1) != RISCVVType::isMaskAgnostic(VType2))
381     return false;
382   return true;
383 }
384 
385 /// Return the fields and properties demanded by the provided instruction.
386 static DemandedFields getDemanded(const MachineInstr &MI) {
387   // Warning: This function has to work on both the lowered (i.e. post
388   // emitVSETVLIs) and pre-lowering forms.  The main implication of this is
389   // that it can't use the value of a SEW, VL, or Policy operand as they might
390   // be stale after lowering.
391 
392   // Most instructions don't use any of these subfeilds.
393   DemandedFields Res;
394   // Start conservative if registers are used
395   if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
396     Res.VL = true;
397   if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
398     Res.demandVTYPE();
399   // Start conservative on the unlowered form too
400   uint64_t TSFlags = MI.getDesc().TSFlags;
401   if (RISCVII::hasSEWOp(TSFlags)) {
402     Res.demandVTYPE();
403     if (RISCVII::hasVLOp(TSFlags))
404       Res.VL = true;
405   }
406 
407   // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
408   // They instead demand the ratio of the two which is used in computing
409   // EMUL, but which allows us the flexibility to change SEW and LMUL
410   // provided we don't change the ratio.
411   // Note: We assume that the instructions initial SEW is the EEW encoded
412   // in the opcode.  This is asserted when constructing the VSETVLIInfo.
413   if (getEEWForLoadStore(MI)) {
414     Res.SEW = false;
415     Res.LMUL = false;
416   }
417 
418   // Store instructions don't use the policy fields.
419   if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
420     Res.TailPolicy = false;
421     Res.MaskPolicy = false;
422   }
423 
424   // A splat of 0/-1 is always a splat of 0/-1, regardless of etype.
425   // TODO: We're currently demanding VL + SEWLMULRatio which is sufficient
426   // but not neccessary.  What we really need is VLInBytes.
427   if (isSplatOfZeroOrMinusOne(MI)) {
428     Res.SEW = false;
429     Res.LMUL = false;
430   }
431 
432   // If this is a mask reg operation, it only cares about VLMAX.
433   // TODO: Possible extensions to this logic
434   // * Probably ok if available VLMax is larger than demanded
435   // * The policy bits can probably be ignored..
436   if (isMaskRegOp(MI)) {
437     Res.SEW = false;
438     Res.LMUL = false;
439   }
440 
441   return Res;
442 }
443 
444 /// Defines the abstract state with which the forward dataflow models the
445 /// values of the VL and VTYPE registers after insertion.
446 class VSETVLIInfo {
447   union {
448     Register AVLReg;
449     unsigned AVLImm;
450   };
451 
452   enum : uint8_t {
453     Uninitialized,
454     AVLIsReg,
455     AVLIsImm,
456     Unknown,
457   } State = Uninitialized;
458 
459   // Fields from VTYPE.
460   RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
461   uint8_t SEW = 0;
462   uint8_t TailAgnostic : 1;
463   uint8_t MaskAgnostic : 1;
464   uint8_t SEWLMULRatioOnly : 1;
465 
466 public:
467   VSETVLIInfo()
468       : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
469         SEWLMULRatioOnly(false) {}
470 
471   static VSETVLIInfo getUnknown() {
472     VSETVLIInfo Info;
473     Info.setUnknown();
474     return Info;
475   }
476 
477   bool isValid() const { return State != Uninitialized; }
478   void setUnknown() { State = Unknown; }
479   bool isUnknown() const { return State == Unknown; }
480 
481   void setAVLReg(Register Reg) {
482     AVLReg = Reg;
483     State = AVLIsReg;
484   }
485 
486   void setAVLImm(unsigned Imm) {
487     AVLImm = Imm;
488     State = AVLIsImm;
489   }
490 
491   bool hasAVLImm() const { return State == AVLIsImm; }
492   bool hasAVLReg() const { return State == AVLIsReg; }
493   Register getAVLReg() const {
494     assert(hasAVLReg());
495     return AVLReg;
496   }
497   unsigned getAVLImm() const {
498     assert(hasAVLImm());
499     return AVLImm;
500   }
501 
502   unsigned getSEW() const { return SEW; }
503   RISCVII::VLMUL getVLMUL() const { return VLMul; }
504 
505   bool hasNonZeroAVL() const {
506     if (hasAVLImm())
507       return getAVLImm() > 0;
508     if (hasAVLReg())
509       return getAVLReg() == RISCV::X0;
510     return false;
511   }
512 
513   bool hasSameAVL(const VSETVLIInfo &Other) const {
514     assert(isValid() && Other.isValid() &&
515            "Can't compare invalid VSETVLIInfos");
516     assert(!isUnknown() && !Other.isUnknown() &&
517            "Can't compare AVL in unknown state");
518     if (hasAVLReg() && Other.hasAVLReg())
519       return getAVLReg() == Other.getAVLReg();
520 
521     if (hasAVLImm() && Other.hasAVLImm())
522       return getAVLImm() == Other.getAVLImm();
523 
524     return false;
525   }
526 
527   void setVTYPE(unsigned VType) {
528     assert(isValid() && !isUnknown() &&
529            "Can't set VTYPE for uninitialized or unknown");
530     VLMul = RISCVVType::getVLMUL(VType);
531     SEW = RISCVVType::getSEW(VType);
532     TailAgnostic = RISCVVType::isTailAgnostic(VType);
533     MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
534   }
535   void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
536     assert(isValid() && !isUnknown() &&
537            "Can't set VTYPE for uninitialized or unknown");
538     VLMul = L;
539     SEW = S;
540     TailAgnostic = TA;
541     MaskAgnostic = MA;
542   }
543 
544   unsigned encodeVTYPE() const {
545     assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
546            "Can't encode VTYPE for uninitialized or unknown");
547     return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
548   }
549 
550   bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
551 
552   bool hasSameSEW(const VSETVLIInfo &Other) const {
553     assert(isValid() && Other.isValid() &&
554            "Can't compare invalid VSETVLIInfos");
555     assert(!isUnknown() && !Other.isUnknown() &&
556            "Can't compare VTYPE in unknown state");
557     assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
558            "Can't compare when only LMUL/SEW ratio is valid.");
559     return SEW == Other.SEW;
560   }
561 
562   bool hasSameVTYPE(const VSETVLIInfo &Other) const {
563     assert(isValid() && Other.isValid() &&
564            "Can't compare invalid VSETVLIInfos");
565     assert(!isUnknown() && !Other.isUnknown() &&
566            "Can't compare VTYPE in unknown state");
567     assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
568            "Can't compare when only LMUL/SEW ratio is valid.");
569     return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
570            std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
571                     Other.MaskAgnostic);
572   }
573 
574   unsigned getSEWLMULRatio() const {
575     assert(isValid() && !isUnknown() &&
576            "Can't use VTYPE for uninitialized or unknown");
577     return ::getSEWLMULRatio(SEW, VLMul);
578   }
579 
580   // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
581   // Note that having the same VLMAX ensures that both share the same
582   // function from AVL to VL; that is, they must produce the same VL value
583   // for any given AVL value.
584   bool hasSameVLMAX(const VSETVLIInfo &Other) const {
585     assert(isValid() && Other.isValid() &&
586            "Can't compare invalid VSETVLIInfos");
587     assert(!isUnknown() && !Other.isUnknown() &&
588            "Can't compare VTYPE in unknown state");
589     return getSEWLMULRatio() == Other.getSEWLMULRatio();
590   }
591 
592   bool hasSamePolicy(const VSETVLIInfo &Other) const {
593     assert(isValid() && Other.isValid() &&
594            "Can't compare invalid VSETVLIInfos");
595     assert(!isUnknown() && !Other.isUnknown() &&
596            "Can't compare VTYPE in unknown state");
597     return TailAgnostic == Other.TailAgnostic &&
598            MaskAgnostic == Other.MaskAgnostic;
599   }
600 
601   bool hasCompatibleVTYPE(const MachineInstr &MI,
602                           const VSETVLIInfo &Require) const {
603     const DemandedFields Used = getDemanded(MI);
604     return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
605   }
606 
607   // Determine whether the vector instructions requirements represented by
608   // Require are compatible with the previous vsetvli instruction represented
609   // by this.  MI is the instruction whose requirements we're considering.
610   bool isCompatible(const MachineInstr &MI, const VSETVLIInfo &Require) const {
611     assert(isValid() && Require.isValid() &&
612            "Can't compare invalid VSETVLIInfos");
613     assert(!Require.SEWLMULRatioOnly &&
614            "Expected a valid VTYPE for instruction!");
615     // Nothing is compatible with Unknown.
616     if (isUnknown() || Require.isUnknown())
617       return false;
618 
619     // If only our VLMAX ratio is valid, then this isn't compatible.
620     if (SEWLMULRatioOnly)
621       return false;
622 
623     // If the instruction doesn't need an AVLReg and the SEW matches, consider
624     // it compatible.
625     if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister)
626       if (SEW == Require.SEW)
627         return true;
628 
629     return hasSameAVL(Require) && hasCompatibleVTYPE(MI, Require);
630   }
631 
632   bool operator==(const VSETVLIInfo &Other) const {
633     // Uninitialized is only equal to another Uninitialized.
634     if (!isValid())
635       return !Other.isValid();
636     if (!Other.isValid())
637       return !isValid();
638 
639     // Unknown is only equal to another Unknown.
640     if (isUnknown())
641       return Other.isUnknown();
642     if (Other.isUnknown())
643       return isUnknown();
644 
645     if (!hasSameAVL(Other))
646       return false;
647 
648     // If the SEWLMULRatioOnly bits are different, then they aren't equal.
649     if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
650       return false;
651 
652     // If only the VLMAX is valid, check that it is the same.
653     if (SEWLMULRatioOnly)
654       return hasSameVLMAX(Other);
655 
656     // If the full VTYPE is valid, check that it is the same.
657     return hasSameVTYPE(Other);
658   }
659 
660   bool operator!=(const VSETVLIInfo &Other) const {
661     return !(*this == Other);
662   }
663 
664   // Calculate the VSETVLIInfo visible to a block assuming this and Other are
665   // both predecessors.
666   VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
667     // If the new value isn't valid, ignore it.
668     if (!Other.isValid())
669       return *this;
670 
671     // If this value isn't valid, this must be the first predecessor, use it.
672     if (!isValid())
673       return Other;
674 
675     // If either is unknown, the result is unknown.
676     if (isUnknown() || Other.isUnknown())
677       return VSETVLIInfo::getUnknown();
678 
679     // If we have an exact, match return this.
680     if (*this == Other)
681       return *this;
682 
683     // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
684     // return an SEW/LMUL ratio only value.
685     if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
686       VSETVLIInfo MergeInfo = *this;
687       MergeInfo.SEWLMULRatioOnly = true;
688       return MergeInfo;
689     }
690 
691     // Otherwise the result is unknown.
692     return VSETVLIInfo::getUnknown();
693   }
694 
695 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
696   /// Support for debugging, callable in GDB: V->dump()
697   LLVM_DUMP_METHOD void dump() const {
698     print(dbgs());
699     dbgs() << "\n";
700   }
701 
702   /// Implement operator<<.
703   /// @{
704   void print(raw_ostream &OS) const {
705     OS << "{";
706     if (!isValid())
707       OS << "Uninitialized";
708     if (isUnknown())
709       OS << "unknown";;
710     if (hasAVLReg())
711       OS << "AVLReg=" << (unsigned)AVLReg;
712     if (hasAVLImm())
713       OS << "AVLImm=" << (unsigned)AVLImm;
714     OS << ", "
715        << "VLMul=" << (unsigned)VLMul << ", "
716        << "SEW=" << (unsigned)SEW << ", "
717        << "TailAgnostic=" << (bool)TailAgnostic << ", "
718        << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
719        << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
720   }
721 #endif
722 };
723 
724 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
725 LLVM_ATTRIBUTE_USED
726 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
727   V.print(OS);
728   return OS;
729 }
730 #endif
731 
732 struct BlockData {
733   // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers
734   // made by this block. Calculated in Phase 1.
735   VSETVLIInfo Change;
736 
737   // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
738   // block. Calculated in Phase 2.
739   VSETVLIInfo Exit;
740 
741   // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
742   // blocks. Calculated in Phase 2, and used by Phase 3.
743   VSETVLIInfo Pred;
744 
745   // Keeps track of whether the block is already in the queue.
746   bool InQueue = false;
747 
748   BlockData() = default;
749 };
750 
751 class RISCVInsertVSETVLI : public MachineFunctionPass {
752   const TargetInstrInfo *TII;
753   MachineRegisterInfo *MRI;
754 
755   std::vector<BlockData> BlockInfo;
756   std::queue<const MachineBasicBlock *> WorkList;
757 
758 public:
759   static char ID;
760 
761   RISCVInsertVSETVLI() : MachineFunctionPass(ID) {
762     initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry());
763   }
764   bool runOnMachineFunction(MachineFunction &MF) override;
765 
766   void getAnalysisUsage(AnalysisUsage &AU) const override {
767     AU.setPreservesCFG();
768     MachineFunctionPass::getAnalysisUsage(AU);
769   }
770 
771   StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
772 
773 private:
774   bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
775                    const VSETVLIInfo &CurInfo) const;
776   bool needVSETVLIPHI(const VSETVLIInfo &Require,
777                       const MachineBasicBlock &MBB) const;
778   void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
779                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
780   void insertVSETVLI(MachineBasicBlock &MBB,
781                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
782                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
783 
784   void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI);
785   void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI);
786   bool computeVLVTYPEChanges(const MachineBasicBlock &MBB);
787   void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
788   void emitVSETVLIs(MachineBasicBlock &MBB);
789   void doLocalPostpass(MachineBasicBlock &MBB);
790   void doPRE(MachineBasicBlock &MBB);
791   void insertReadVL(MachineBasicBlock &MBB);
792 };
793 
794 } // end anonymous namespace
795 
796 char RISCVInsertVSETVLI::ID = 0;
797 
798 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
799                 false, false)
800 
801 static bool isVectorConfigInstr(const MachineInstr &MI) {
802   return MI.getOpcode() == RISCV::PseudoVSETVLI ||
803          MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
804          MI.getOpcode() == RISCV::PseudoVSETIVLI;
805 }
806 
807 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves
808 /// VL and only sets VTYPE.
809 static bool isVLPreservingConfig(const MachineInstr &MI) {
810   if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
811     return false;
812   assert(RISCV::X0 == MI.getOperand(1).getReg());
813   return RISCV::X0 == MI.getOperand(0).getReg();
814 }
815 
816 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
817                                        const MachineRegisterInfo *MRI) {
818   VSETVLIInfo InstrInfo;
819 
820   // If the instruction has policy argument, use the argument.
821   // If there is no policy argument, default to tail agnostic unless the
822   // destination is tied to a source. Unless the source is undef. In that case
823   // the user would have some control over the policy values.
824   bool TailAgnostic = true;
825   bool UsesMaskPolicy = RISCVII::usesMaskPolicy(TSFlags);
826   // FIXME: Could we look at the above or below instructions to choose the
827   // matched mask policy to reduce vsetvli instructions? Default mask policy is
828   // agnostic if instructions use mask policy, otherwise is undisturbed. Because
829   // most mask operations are mask undisturbed, so we could possibly reduce the
830   // vsetvli between mask and nomasked instruction sequence.
831   bool MaskAgnostic = UsesMaskPolicy;
832   unsigned UseOpIdx;
833   if (RISCVII::hasVecPolicyOp(TSFlags)) {
834     const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
835     uint64_t Policy = Op.getImm();
836     assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
837            "Invalid Policy Value");
838     // Although in some cases, mismatched passthru/maskedoff with policy value
839     // does not make sense (ex. tied operand is IMPLICIT_DEF with non-TAMA
840     // policy, or tied operand is not IMPLICIT_DEF with TAMA policy), but users
841     // have set the policy value explicitly, so compiler would not fix it.
842     TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
843     MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
844   } else if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
845     TailAgnostic = false;
846     if (UsesMaskPolicy)
847       MaskAgnostic = false;
848     // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
849     const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
850     MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg());
851     if (UseMI && UseMI->isImplicitDef()) {
852       TailAgnostic = true;
853       if (UsesMaskPolicy)
854         MaskAgnostic = true;
855     }
856     // Some pseudo instructions force a tail agnostic policy despite having a
857     // tied def.
858     if (RISCVII::doesForceTailAgnostic(TSFlags))
859       TailAgnostic = true;
860   }
861 
862   RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
863 
864   unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
865   // A Log2SEW of 0 is an operation on mask registers only.
866   unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
867   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
868 
869   if (RISCVII::hasVLOp(TSFlags)) {
870     const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
871     if (VLOp.isImm()) {
872       int64_t Imm = VLOp.getImm();
873       // Conver the VLMax sentintel to X0 register.
874       if (Imm == RISCV::VLMaxSentinel)
875         InstrInfo.setAVLReg(RISCV::X0);
876       else
877         InstrInfo.setAVLImm(Imm);
878     } else {
879       InstrInfo.setAVLReg(VLOp.getReg());
880     }
881   } else {
882     InstrInfo.setAVLReg(RISCV::NoRegister);
883   }
884 #ifndef NDEBUG
885   if (Optional<unsigned> EEW = getEEWForLoadStore(MI)) {
886     assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
887   }
888 #endif
889   InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
890 
891   return InstrInfo;
892 }
893 
894 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
895                                        const VSETVLIInfo &Info,
896                                        const VSETVLIInfo &PrevInfo) {
897   DebugLoc DL = MI.getDebugLoc();
898   insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
899 }
900 
901 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
902                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
903                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
904 
905   // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
906   // VLMAX.
907   if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
908       Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
909     BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
910         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
911         .addReg(RISCV::X0, RegState::Kill)
912         .addImm(Info.encodeVTYPE())
913         .addReg(RISCV::VL, RegState::Implicit);
914     return;
915   }
916 
917   if (Info.hasAVLImm()) {
918     BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
919         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
920         .addImm(Info.getAVLImm())
921         .addImm(Info.encodeVTYPE());
922     return;
923   }
924 
925   Register AVLReg = Info.getAVLReg();
926   if (AVLReg == RISCV::NoRegister) {
927     // We can only use x0, x0 if there's no chance of the vtype change causing
928     // the previous vl to become invalid.
929     if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
930         Info.hasSameVLMAX(PrevInfo)) {
931       BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
932           .addReg(RISCV::X0, RegState::Define | RegState::Dead)
933           .addReg(RISCV::X0, RegState::Kill)
934           .addImm(Info.encodeVTYPE())
935           .addReg(RISCV::VL, RegState::Implicit);
936       return;
937     }
938     // Otherwise use an AVL of 0 to avoid depending on previous vl.
939     BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
940         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
941         .addImm(0)
942         .addImm(Info.encodeVTYPE());
943     return;
944   }
945 
946   if (AVLReg.isVirtual())
947     MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
948 
949   // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
950   // opcode if the AVLReg is X0 as they have different register classes for
951   // the AVL operand.
952   Register DestReg = RISCV::X0;
953   unsigned Opcode = RISCV::PseudoVSETVLI;
954   if (AVLReg == RISCV::X0) {
955     DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
956     Opcode = RISCV::PseudoVSETVLIX0;
957   }
958   BuildMI(MBB, InsertPt, DL, TII->get(Opcode))
959       .addReg(DestReg, RegState::Define | RegState::Dead)
960       .addReg(AVLReg)
961       .addImm(Info.encodeVTYPE());
962 }
963 
964 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
965 // VSETIVLI instruction.
966 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
967   VSETVLIInfo NewInfo;
968   if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
969     NewInfo.setAVLImm(MI.getOperand(1).getImm());
970   } else {
971     assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
972            MI.getOpcode() == RISCV::PseudoVSETVLIX0);
973     Register AVLReg = MI.getOperand(1).getReg();
974     assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
975            "Can't handle X0, X0 vsetvli yet");
976     NewInfo.setAVLReg(AVLReg);
977   }
978   NewInfo.setVTYPE(MI.getOperand(2).getImm());
979 
980   return NewInfo;
981 }
982 
983 /// Return true if a VSETVLI is required to transition from CurInfo to Require
984 /// before MI.
985 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
986                                      const VSETVLIInfo &Require,
987                                      const VSETVLIInfo &CurInfo) const {
988   assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI));
989 
990   if (CurInfo.isCompatible(MI, Require))
991     return false;
992 
993   if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
994     return true;
995 
996   // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0.
997   // VL=0 is uninteresting (as it should have been deleted already), so it is
998   // compatible if we can prove both are non-zero.  Additionally, if writing
999   // to an implicit_def operand, we don't need to preserve any other bits and
1000   // are thus compatible with any larger etype, and can disregard policy bits.
1001   if (isScalarMoveInstr(MI) &&
1002       CurInfo.hasNonZeroAVL() && Require.hasNonZeroAVL()) {
1003     auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg());
1004     if (VRegDef && VRegDef->isImplicitDef() &&
1005         CurInfo.getSEW() >= Require.getSEW())
1006       return false;
1007     if (CurInfo.hasSameSEW(Require) && CurInfo.hasSamePolicy(Require))
1008       return false;
1009   }
1010 
1011   // We didn't find a compatible value. If our AVL is a virtual register,
1012   // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
1013   // and the last VL/VTYPE we observed is the same, we don't need a
1014   // VSETVLI here.
1015   if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() &&
1016       CurInfo.hasCompatibleVTYPE(MI, Require)) {
1017     if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
1018       if (isVectorConfigInstr(*DefMI)) {
1019         VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1020         if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
1021           return false;
1022       }
1023     }
1024   }
1025 
1026   return true;
1027 }
1028 
1029 // Given an incoming state reaching MI, modifies that state so that it is minimally
1030 // compatible with MI.  The resulting state is guaranteed to be semantically legal
1031 // for MI, but may not be the state requested by MI.
1032 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) {
1033   uint64_t TSFlags = MI.getDesc().TSFlags;
1034   if (!RISCVII::hasSEWOp(TSFlags))
1035     return;
1036 
1037   const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
1038   if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
1039     return;
1040 
1041   const VSETVLIInfo PrevInfo = Info;
1042   Info = NewInfo;
1043 
1044   if (!RISCVII::hasVLOp(TSFlags))
1045     return;
1046 
1047   // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
1048   // VL > 0. We can discard the user requested AVL and just use the last
1049   // one if we can prove it equally zero.  This removes a vsetvli entirely
1050   // if the types match or allows use of cheaper avl preserving variant
1051   // if VLMAX doesn't change.  If VLMAX might change, we couldn't use
1052   // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to
1053   // prevent extending live range of an avl register operand.
1054   // TODO: We can probably relax this for immediates.
1055   if (isScalarMoveInstr(MI) && PrevInfo.isValid() &&
1056       PrevInfo.hasNonZeroAVL() && Info.hasNonZeroAVL() &&
1057       Info.hasSameVLMAX(PrevInfo)) {
1058     if (PrevInfo.hasAVLImm())
1059       Info.setAVLImm(PrevInfo.getAVLImm());
1060     else
1061       Info.setAVLReg(PrevInfo.getAVLReg());
1062     return;
1063   }
1064 
1065   // Two cases involving an AVL resulting from a previous vsetvli.
1066   // 1) If the AVL is the result of a previous vsetvli which has the
1067   //    same AVL and VLMAX as our current state, we can reuse the AVL
1068   //    from the current state for the new one.  This allows us to
1069   //    generate 'vsetvli x0, x0, vtype" or possible skip the transition
1070   //    entirely.
1071   // 2) If AVL is defined by a vsetvli with the same VLMAX, we can
1072   //    replace the AVL operand with the AVL of the defining vsetvli.
1073   //    We avoid general register AVLs to avoid extending live ranges
1074   //    without being sure we can kill the original source reg entirely.
1075   if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
1076     return;
1077   MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
1078   if (!DefMI || !isVectorConfigInstr(*DefMI))
1079     return;
1080 
1081   VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1082   // case 1
1083   if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
1084       DefInfo.hasSameAVL(PrevInfo) &&
1085       DefInfo.hasSameVLMAX(PrevInfo)) {
1086     if (PrevInfo.hasAVLImm())
1087       Info.setAVLImm(PrevInfo.getAVLImm());
1088     else
1089       Info.setAVLReg(PrevInfo.getAVLReg());
1090     return;
1091   }
1092   // case 2
1093   if (DefInfo.hasSameVLMAX(Info) &&
1094       (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
1095     if (DefInfo.hasAVLImm())
1096       Info.setAVLImm(DefInfo.getAVLImm());
1097     else
1098       Info.setAVLReg(DefInfo.getAVLReg());
1099     return;
1100   }
1101 }
1102 
1103 // Given a state with which we evaluated MI (see transferBefore above for why
1104 // this might be different that the state MI requested), modify the state to
1105 // reflect the changes MI might make.
1106 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) {
1107   if (isVectorConfigInstr(MI)) {
1108     Info = getInfoForVSETVLI(MI);
1109     return;
1110   }
1111 
1112   if (RISCV::isFaultFirstLoad(MI)) {
1113     // Update AVL to vl-output of the fault first load.
1114     Info.setAVLReg(MI.getOperand(1).getReg());
1115     return;
1116   }
1117 
1118   // If this is something that updates VL/VTYPE that we don't know about, set
1119   // the state to unknown.
1120   if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1121       MI.modifiesRegister(RISCV::VTYPE))
1122     Info = VSETVLIInfo::getUnknown();
1123 }
1124 
1125 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
1126   bool HadVectorOp = false;
1127 
1128   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1129   BBInfo.Change = BBInfo.Pred;
1130   for (const MachineInstr &MI : MBB) {
1131     transferBefore(BBInfo.Change, MI);
1132 
1133     if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1134       HadVectorOp = true;
1135 
1136     transferAfter(BBInfo.Change, MI);
1137   }
1138 
1139   return HadVectorOp;
1140 }
1141 
1142 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1143 
1144   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1145 
1146   BBInfo.InQueue = false;
1147 
1148   VSETVLIInfo InInfo;
1149   if (MBB.pred_empty()) {
1150     // There are no predecessors, so use the default starting status.
1151     InInfo.setUnknown();
1152   } else {
1153     for (MachineBasicBlock *P : MBB.predecessors())
1154       InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1155   }
1156 
1157   // If we don't have any valid predecessor value, wait until we do.
1158   if (!InInfo.isValid())
1159     return;
1160 
1161   // If no change, no need to rerun block
1162   if (InInfo == BBInfo.Pred)
1163     return;
1164 
1165   BBInfo.Pred = InInfo;
1166   LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1167                     << " changed to " << BBInfo.Pred << "\n");
1168 
1169   // Note: It's tempting to cache the state changes here, but due to the
1170   // compatibility checks performed a blocks output state can change based on
1171   // the input state.  To cache, we'd have to add logic for finding
1172   // never-compatible state changes.
1173   computeVLVTYPEChanges(MBB);
1174   VSETVLIInfo TmpStatus = BBInfo.Change;
1175 
1176   // If the new exit value matches the old exit value, we don't need to revisit
1177   // any blocks.
1178   if (BBInfo.Exit == TmpStatus)
1179     return;
1180 
1181   BBInfo.Exit = TmpStatus;
1182   LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1183                     << " changed to " << BBInfo.Exit << "\n");
1184 
1185   // Add the successors to the work list so we can propagate the changed exit
1186   // status.
1187   for (MachineBasicBlock *S : MBB.successors())
1188     if (!BlockInfo[S->getNumber()].InQueue)
1189       WorkList.push(S);
1190 }
1191 
1192 // If we weren't able to prove a vsetvli was directly unneeded, it might still
1193 // be unneeded if the AVL is a phi node where all incoming values are VL
1194 // outputs from the last VSETVLI in their respective basic blocks.
1195 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1196                                         const MachineBasicBlock &MBB) const {
1197   if (DisableInsertVSETVLPHIOpt)
1198     return true;
1199 
1200   if (!Require.hasAVLReg())
1201     return true;
1202 
1203   Register AVLReg = Require.getAVLReg();
1204   if (!AVLReg.isVirtual())
1205     return true;
1206 
1207   // We need the AVL to be produce by a PHI node in this basic block.
1208   MachineInstr *PHI = MRI->getVRegDef(AVLReg);
1209   if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
1210     return true;
1211 
1212   for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
1213        PHIOp += 2) {
1214     Register InReg = PHI->getOperand(PHIOp).getReg();
1215     MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
1216     const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
1217     // If the exit from the predecessor has the VTYPE we are looking for
1218     // we might be able to avoid a VSETVLI.
1219     if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
1220       return true;
1221 
1222     // We need the PHI input to the be the output of a VSET(I)VLI.
1223     MachineInstr *DefMI = MRI->getVRegDef(InReg);
1224     if (!DefMI || !isVectorConfigInstr(*DefMI))
1225       return true;
1226 
1227     // We found a VSET(I)VLI make sure it matches the output of the
1228     // predecessor block.
1229     VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1230     if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
1231         !DefInfo.hasSameVTYPE(PBBInfo.Exit))
1232       return true;
1233   }
1234 
1235   // If all the incoming values to the PHI checked out, we don't need
1236   // to insert a VSETVLI.
1237   return false;
1238 }
1239 
1240 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1241   VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1242   // Track whether the prefix of the block we've scanned is transparent
1243   // (meaning has not yet changed the abstract state).
1244   bool PrefixTransparent = true;
1245   for (MachineInstr &MI : MBB) {
1246     const VSETVLIInfo PrevInfo = CurInfo;
1247     transferBefore(CurInfo, MI);
1248 
1249     // If this is an explicit VSETVLI or VSETIVLI, update our state.
1250     if (isVectorConfigInstr(MI)) {
1251       // Conservatively, mark the VL and VTYPE as live.
1252       assert(MI.getOperand(3).getReg() == RISCV::VL &&
1253              MI.getOperand(4).getReg() == RISCV::VTYPE &&
1254              "Unexpected operands where VL and VTYPE should be");
1255       MI.getOperand(3).setIsDead(false);
1256       MI.getOperand(4).setIsDead(false);
1257       PrefixTransparent = false;
1258     }
1259 
1260     uint64_t TSFlags = MI.getDesc().TSFlags;
1261     if (RISCVII::hasSEWOp(TSFlags)) {
1262       if (PrevInfo != CurInfo) {
1263         // If this is the first implicit state change, and the state change
1264         // requested can be proven to produce the same register contents, we
1265         // can skip emitting the actual state change and continue as if we
1266         // had since we know the GPR result of the implicit state change
1267         // wouldn't be used and VL/VTYPE registers are correct.  Note that
1268         // we *do* need to model the state as if it changed as while the
1269         // register contents are unchanged, the abstract model can change.
1270         if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1271           insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
1272         PrefixTransparent = false;
1273       }
1274 
1275       if (RISCVII::hasVLOp(TSFlags)) {
1276         MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1277         if (VLOp.isReg()) {
1278           // Erase the AVL operand from the instruction.
1279           VLOp.setReg(RISCV::NoRegister);
1280           VLOp.setIsKill(false);
1281         }
1282         MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1283                                                 /*isImp*/ true));
1284       }
1285       MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1286                                               /*isImp*/ true));
1287     }
1288 
1289     if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1290         MI.modifiesRegister(RISCV::VTYPE))
1291       PrefixTransparent = false;
1292 
1293     transferAfter(CurInfo, MI);
1294   }
1295 
1296   // If we reach the end of the block and our current info doesn't match the
1297   // expected info, insert a vsetvli to correct.
1298   if (!UseStrictAsserts) {
1299     const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
1300     if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
1301         CurInfo != ExitInfo) {
1302       // Note there's an implicit assumption here that terminators never use
1303       // or modify VL or VTYPE.  Also, fallthrough will return end().
1304       auto InsertPt = MBB.getFirstInstrTerminator();
1305       insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
1306                     CurInfo);
1307       CurInfo = ExitInfo;
1308     }
1309   }
1310 
1311   if (UseStrictAsserts && CurInfo.isValid()) {
1312     const auto &Info = BlockInfo[MBB.getNumber()];
1313     if (CurInfo != Info.Exit) {
1314       LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1315       LLVM_DEBUG(dbgs() << "  begin        state: " << Info.Pred << "\n");
1316       LLVM_DEBUG(dbgs() << "  expected end state: " << Info.Exit << "\n");
1317       LLVM_DEBUG(dbgs() << "  actual   end state: " << CurInfo << "\n");
1318     }
1319     assert(CurInfo == Info.Exit &&
1320            "InsertVSETVLI dataflow invariant violated");
1321   }
1322 }
1323 
1324 /// Return true if the VL value configured must be equal to the requested one.
1325 static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) {
1326   if (!Info.hasAVLImm())
1327     // VLMAX is always the same value.
1328     // TODO: Could extend to other registers by looking at the associated vreg
1329     // def placement.
1330     return RISCV::X0 == Info.getAVLReg();
1331 
1332   unsigned AVL = Info.getAVLImm();
1333   unsigned SEW = Info.getSEW();
1334   unsigned AVLInBits = AVL * SEW;
1335 
1336   unsigned LMul;
1337   bool Fractional;
1338   std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL());
1339 
1340   if (Fractional)
1341     return ST.getRealMinVLen() / LMul >= AVLInBits;
1342   return ST.getRealMinVLen() * LMul >= AVLInBits;
1343 }
1344 
1345 /// Perform simple partial redundancy elimination of the VSETVLI instructions
1346 /// we're about to insert by looking for cases where we can PRE from the
1347 /// beginning of one block to the end of one of its predecessors.  Specifically,
1348 /// this is geared to catch the common case of a fixed length vsetvl in a single
1349 /// block loop when it could execute once in the preheader instead.
1350 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1351   const MachineFunction &MF = *MBB.getParent();
1352   const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
1353 
1354   if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1355     return;
1356 
1357   MachineBasicBlock *UnavailablePred = nullptr;
1358   VSETVLIInfo AvailableInfo;
1359   for (MachineBasicBlock *P : MBB.predecessors()) {
1360     const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1361     if (PredInfo.isUnknown()) {
1362       if (UnavailablePred)
1363         return;
1364       UnavailablePred = P;
1365     } else if (!AvailableInfo.isValid()) {
1366       AvailableInfo = PredInfo;
1367     } else if (AvailableInfo != PredInfo) {
1368       return;
1369     }
1370   }
1371 
1372   // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1373   // phase 3.
1374   if (!UnavailablePred || !AvailableInfo.isValid())
1375     return;
1376 
1377   // Critical edge - TODO: consider splitting?
1378   if (UnavailablePred->succ_size() != 1)
1379     return;
1380 
1381   // If VL can be less than AVL, then we can't reduce the frequency of exec.
1382   if (!hasFixedResult(AvailableInfo, ST))
1383     return;
1384 
1385   // Does it actually let us remove an implicit transition in MBB?
1386   bool Found = false;
1387   for (auto &MI : MBB) {
1388     if (isVectorConfigInstr(MI))
1389       return;
1390 
1391     const uint64_t TSFlags = MI.getDesc().TSFlags;
1392     if (RISCVII::hasSEWOp(TSFlags)) {
1393       if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI))
1394         return;
1395       Found = true;
1396       break;
1397     }
1398   }
1399   if (!Found)
1400     return;
1401 
1402   // Finally, update both data flow state and insert the actual vsetvli.
1403   // Doing both keeps the code in sync with the dataflow results, which
1404   // is critical for correctness of phase 3.
1405   auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit;
1406   LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1407                     << UnavailablePred->getName() << " with state "
1408                     << AvailableInfo << "\n");
1409   BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1410   BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1411 
1412   // Note there's an implicit assumption here that terminators never use
1413   // or modify VL or VTYPE.  Also, fallthrough will return end().
1414   auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1415   insertVSETVLI(*UnavailablePred, InsertPt,
1416                 UnavailablePred->findDebugLoc(InsertPt),
1417                 AvailableInfo, OldInfo);
1418 }
1419 
1420 static void doUnion(DemandedFields &A, DemandedFields B) {
1421   A.VL |= B.VL;
1422   A.SEW |= B.SEW;
1423   A.LMUL |= B.LMUL;
1424   A.SEWLMULRatio |= B.SEWLMULRatio;
1425   A.TailPolicy |= B.TailPolicy;
1426   A.MaskPolicy |= B.MaskPolicy;
1427 }
1428 
1429 // Return true if we can mutate PrevMI's VTYPE to match MI's
1430 // without changing any the fields which have been used.
1431 // TODO: Restructure code to allow code reuse between this and isCompatible
1432 // above.
1433 static bool canMutatePriorConfig(const MachineInstr &PrevMI,
1434                                  const MachineInstr &MI,
1435                                  const DemandedFields &Used) {
1436   // TODO: Extend this to handle cases where VL does change, but VL
1437   // has not been used.  (e.g. over a vmv.x.s)
1438   if (!isVLPreservingConfig(MI))
1439     // Note: `vsetvli x0, x0, vtype' is the canonical instruction
1440     // for this case.  If you find yourself wanting to add other forms
1441     // to this "unused VTYPE" case, we're probably missing a
1442     // canonicalization earlier.
1443     return false;
1444 
1445   if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
1446     return false;
1447 
1448   auto PriorVType = PrevMI.getOperand(2).getImm();
1449   auto VType = MI.getOperand(2).getImm();
1450   return areCompatibleVTYPEs(PriorVType, VType, Used);
1451 }
1452 
1453 void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
1454   MachineInstr *PrevMI = nullptr;
1455   DemandedFields Used;
1456   SmallVector<MachineInstr*> ToDelete;
1457   for (MachineInstr &MI : MBB) {
1458     // Note: Must be *before* vsetvli handling to account for config cases
1459     // which only change some subfields.
1460     doUnion(Used, getDemanded(MI));
1461 
1462     if (!isVectorConfigInstr(MI))
1463       continue;
1464 
1465     if (PrevMI) {
1466       if (!Used.VL && !Used.usedVTYPE()) {
1467         ToDelete.push_back(PrevMI);
1468         // fallthrough
1469       } else if (canMutatePriorConfig(*PrevMI, MI, Used)) {
1470         PrevMI->getOperand(2).setImm(MI.getOperand(2).getImm());
1471         ToDelete.push_back(&MI);
1472         // Leave PrevMI unchanged
1473         continue;
1474       }
1475     }
1476     PrevMI = &MI;
1477     Used = getDemanded(MI);
1478     Register VRegDef = MI.getOperand(0).getReg();
1479     if (VRegDef != RISCV::X0 &&
1480         !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
1481       Used.VL = true;
1482   }
1483 
1484   for (auto *MI : ToDelete)
1485     MI->eraseFromParent();
1486 }
1487 
1488 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1489   for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1490     MachineInstr &MI = *I++;
1491     if (RISCV::isFaultFirstLoad(MI)) {
1492       Register VLOutput = MI.getOperand(1).getReg();
1493       if (!MRI->use_nodbg_empty(VLOutput))
1494         BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
1495                 VLOutput);
1496       // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1497       MI.getOperand(1).setReg(RISCV::X0);
1498     }
1499   }
1500 }
1501 
1502 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1503   // Skip if the vector extension is not enabled.
1504   const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
1505   if (!ST.hasVInstructions())
1506     return false;
1507 
1508   LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1509 
1510   TII = ST.getInstrInfo();
1511   MRI = &MF.getRegInfo();
1512 
1513   assert(BlockInfo.empty() && "Expect empty block infos");
1514   BlockInfo.resize(MF.getNumBlockIDs());
1515 
1516   bool HaveVectorOp = false;
1517 
1518   // Phase 1 - determine how VL/VTYPE are affected by the each block.
1519   for (const MachineBasicBlock &MBB : MF) {
1520     HaveVectorOp |= computeVLVTYPEChanges(MBB);
1521     // Initial exit state is whatever change we found in the block.
1522     BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1523     BBInfo.Exit = BBInfo.Change;
1524     LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1525                       << " is " << BBInfo.Exit << "\n");
1526 
1527   }
1528 
1529   // If we didn't find any instructions that need VSETVLI, we're done.
1530   if (!HaveVectorOp) {
1531     BlockInfo.clear();
1532     return false;
1533   }
1534 
1535   // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1536   // blocks to the list here, but will also add any that need to be revisited
1537   // during Phase 2 processing.
1538   for (const MachineBasicBlock &MBB : MF) {
1539     WorkList.push(&MBB);
1540     BlockInfo[MBB.getNumber()].InQueue = true;
1541   }
1542   while (!WorkList.empty()) {
1543     const MachineBasicBlock &MBB = *WorkList.front();
1544     WorkList.pop();
1545     computeIncomingVLVTYPE(MBB);
1546   }
1547 
1548   // Perform partial redundancy elimination of vsetvli transitions.
1549   for (MachineBasicBlock &MBB : MF)
1550     doPRE(MBB);
1551 
1552   // Phase 3 - add any vsetvli instructions needed in the block. Use the
1553   // Phase 2 information to avoid adding vsetvlis before the first vector
1554   // instruction in the block if the VL/VTYPE is satisfied by its
1555   // predecessors.
1556   for (MachineBasicBlock &MBB : MF)
1557     emitVSETVLIs(MBB);
1558 
1559   // Now that all vsetvlis are explicit, go through and do block local
1560   // DSE and peephole based demanded fields based transforms.  Note that
1561   // this *must* be done outside the main dataflow so long as we allow
1562   // any cross block analysis within the dataflow.  We can't have both
1563   // demanded fields based mutation and non-local analysis in the
1564   // dataflow at the same time without introducing inconsistencies.
1565   for (MachineBasicBlock &MBB : MF)
1566     doLocalPostpass(MBB);
1567 
1568   // Once we're fully done rewriting all the instructions, do a final pass
1569   // through to check for VSETVLIs which write to an unused destination.
1570   // For the non X0, X0 variant, we can replace the destination register
1571   // with X0 to reduce register pressure.  This is really a generic
1572   // optimization which can be applied to any dead def (TODO: generalize).
1573   for (MachineBasicBlock &MBB : MF) {
1574     for (MachineInstr &MI : MBB) {
1575       if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
1576           MI.getOpcode() == RISCV::PseudoVSETIVLI) {
1577         Register VRegDef = MI.getOperand(0).getReg();
1578         if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef))
1579           MI.getOperand(0).setReg(RISCV::X0);
1580       }
1581     }
1582   }
1583 
1584   // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1585   // of VLEFF/VLSEGFF.
1586   for (MachineBasicBlock &MBB : MF)
1587     insertReadVL(MBB);
1588 
1589   BlockInfo.clear();
1590   return HaveVectorOp;
1591 }
1592 
1593 /// Returns an instance of the Insert VSETVLI pass.
1594 FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
1595   return new RISCVInsertVSETVLI();
1596 }
1597