1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a function pass that inserts VSETVLI instructions where
10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11 // instructions.
12 //
13 // This pass consists of 3 phases:
14 //
15 // Phase 1 collects how each basic block affects VL/VTYPE.
16 //
17 // Phase 2 uses the information from phase 1 to do a data flow analysis to
18 // propagate the VL/VTYPE changes through the function. This gives us the
19 // VL/VTYPE at the start of each basic block.
20 //
21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from
22 // phase 2 is used to prevent inserting a VSETVLI before the first vector
23 // instruction in the block if possible.
24 //
25 //===----------------------------------------------------------------------===//
26
27 #include "RISCV.h"
28 #include "RISCVSubtarget.h"
29 #include "llvm/CodeGen/LiveIntervals.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include <queue>
32 using namespace llvm;
33
34 #define DEBUG_TYPE "riscv-insert-vsetvli"
35 #define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass"
36
37 static cl::opt<bool> DisableInsertVSETVLPHIOpt(
38 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
39 cl::desc("Disable looking through phis when inserting vsetvlis."));
40
41 static cl::opt<bool> UseStrictAsserts(
42 "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
43 cl::desc("Enable strict assertion checking for the dataflow algorithm"));
44
45 namespace {
46
getVLOpNum(const MachineInstr & MI)47 static unsigned getVLOpNum(const MachineInstr &MI) {
48 return RISCVII::getVLOpNum(MI.getDesc());
49 }
50
getSEWOpNum(const MachineInstr & MI)51 static unsigned getSEWOpNum(const MachineInstr &MI) {
52 return RISCVII::getSEWOpNum(MI.getDesc());
53 }
54
isScalarMoveInstr(const MachineInstr & MI)55 static bool isScalarMoveInstr(const MachineInstr &MI) {
56 switch (MI.getOpcode()) {
57 default:
58 return false;
59 case RISCV::PseudoVMV_S_X_M1:
60 case RISCV::PseudoVMV_S_X_M2:
61 case RISCV::PseudoVMV_S_X_M4:
62 case RISCV::PseudoVMV_S_X_M8:
63 case RISCV::PseudoVMV_S_X_MF2:
64 case RISCV::PseudoVMV_S_X_MF4:
65 case RISCV::PseudoVMV_S_X_MF8:
66 case RISCV::PseudoVFMV_S_F16_M1:
67 case RISCV::PseudoVFMV_S_F16_M2:
68 case RISCV::PseudoVFMV_S_F16_M4:
69 case RISCV::PseudoVFMV_S_F16_M8:
70 case RISCV::PseudoVFMV_S_F16_MF2:
71 case RISCV::PseudoVFMV_S_F16_MF4:
72 case RISCV::PseudoVFMV_S_F32_M1:
73 case RISCV::PseudoVFMV_S_F32_M2:
74 case RISCV::PseudoVFMV_S_F32_M4:
75 case RISCV::PseudoVFMV_S_F32_M8:
76 case RISCV::PseudoVFMV_S_F32_MF2:
77 case RISCV::PseudoVFMV_S_F64_M1:
78 case RISCV::PseudoVFMV_S_F64_M2:
79 case RISCV::PseudoVFMV_S_F64_M4:
80 case RISCV::PseudoVFMV_S_F64_M8:
81 return true;
82 }
83 }
84
85 /// Get the EEW for a load or store instruction. Return None if MI is not
86 /// a load or store which ignores SEW.
getEEWForLoadStore(const MachineInstr & MI)87 static Optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
88 switch (MI.getOpcode()) {
89 default:
90 return None;
91 case RISCV::PseudoVLE8_V_M1:
92 case RISCV::PseudoVLE8_V_M1_MASK:
93 case RISCV::PseudoVLE8_V_M2:
94 case RISCV::PseudoVLE8_V_M2_MASK:
95 case RISCV::PseudoVLE8_V_M4:
96 case RISCV::PseudoVLE8_V_M4_MASK:
97 case RISCV::PseudoVLE8_V_M8:
98 case RISCV::PseudoVLE8_V_M8_MASK:
99 case RISCV::PseudoVLE8_V_MF2:
100 case RISCV::PseudoVLE8_V_MF2_MASK:
101 case RISCV::PseudoVLE8_V_MF4:
102 case RISCV::PseudoVLE8_V_MF4_MASK:
103 case RISCV::PseudoVLE8_V_MF8:
104 case RISCV::PseudoVLE8_V_MF8_MASK:
105 case RISCV::PseudoVLSE8_V_M1:
106 case RISCV::PseudoVLSE8_V_M1_MASK:
107 case RISCV::PseudoVLSE8_V_M2:
108 case RISCV::PseudoVLSE8_V_M2_MASK:
109 case RISCV::PseudoVLSE8_V_M4:
110 case RISCV::PseudoVLSE8_V_M4_MASK:
111 case RISCV::PseudoVLSE8_V_M8:
112 case RISCV::PseudoVLSE8_V_M8_MASK:
113 case RISCV::PseudoVLSE8_V_MF2:
114 case RISCV::PseudoVLSE8_V_MF2_MASK:
115 case RISCV::PseudoVLSE8_V_MF4:
116 case RISCV::PseudoVLSE8_V_MF4_MASK:
117 case RISCV::PseudoVLSE8_V_MF8:
118 case RISCV::PseudoVLSE8_V_MF8_MASK:
119 case RISCV::PseudoVSE8_V_M1:
120 case RISCV::PseudoVSE8_V_M1_MASK:
121 case RISCV::PseudoVSE8_V_M2:
122 case RISCV::PseudoVSE8_V_M2_MASK:
123 case RISCV::PseudoVSE8_V_M4:
124 case RISCV::PseudoVSE8_V_M4_MASK:
125 case RISCV::PseudoVSE8_V_M8:
126 case RISCV::PseudoVSE8_V_M8_MASK:
127 case RISCV::PseudoVSE8_V_MF2:
128 case RISCV::PseudoVSE8_V_MF2_MASK:
129 case RISCV::PseudoVSE8_V_MF4:
130 case RISCV::PseudoVSE8_V_MF4_MASK:
131 case RISCV::PseudoVSE8_V_MF8:
132 case RISCV::PseudoVSE8_V_MF8_MASK:
133 case RISCV::PseudoVSSE8_V_M1:
134 case RISCV::PseudoVSSE8_V_M1_MASK:
135 case RISCV::PseudoVSSE8_V_M2:
136 case RISCV::PseudoVSSE8_V_M2_MASK:
137 case RISCV::PseudoVSSE8_V_M4:
138 case RISCV::PseudoVSSE8_V_M4_MASK:
139 case RISCV::PseudoVSSE8_V_M8:
140 case RISCV::PseudoVSSE8_V_M8_MASK:
141 case RISCV::PseudoVSSE8_V_MF2:
142 case RISCV::PseudoVSSE8_V_MF2_MASK:
143 case RISCV::PseudoVSSE8_V_MF4:
144 case RISCV::PseudoVSSE8_V_MF4_MASK:
145 case RISCV::PseudoVSSE8_V_MF8:
146 case RISCV::PseudoVSSE8_V_MF8_MASK:
147 return 8;
148 case RISCV::PseudoVLE16_V_M1:
149 case RISCV::PseudoVLE16_V_M1_MASK:
150 case RISCV::PseudoVLE16_V_M2:
151 case RISCV::PseudoVLE16_V_M2_MASK:
152 case RISCV::PseudoVLE16_V_M4:
153 case RISCV::PseudoVLE16_V_M4_MASK:
154 case RISCV::PseudoVLE16_V_M8:
155 case RISCV::PseudoVLE16_V_M8_MASK:
156 case RISCV::PseudoVLE16_V_MF2:
157 case RISCV::PseudoVLE16_V_MF2_MASK:
158 case RISCV::PseudoVLE16_V_MF4:
159 case RISCV::PseudoVLE16_V_MF4_MASK:
160 case RISCV::PseudoVLSE16_V_M1:
161 case RISCV::PseudoVLSE16_V_M1_MASK:
162 case RISCV::PseudoVLSE16_V_M2:
163 case RISCV::PseudoVLSE16_V_M2_MASK:
164 case RISCV::PseudoVLSE16_V_M4:
165 case RISCV::PseudoVLSE16_V_M4_MASK:
166 case RISCV::PseudoVLSE16_V_M8:
167 case RISCV::PseudoVLSE16_V_M8_MASK:
168 case RISCV::PseudoVLSE16_V_MF2:
169 case RISCV::PseudoVLSE16_V_MF2_MASK:
170 case RISCV::PseudoVLSE16_V_MF4:
171 case RISCV::PseudoVLSE16_V_MF4_MASK:
172 case RISCV::PseudoVSE16_V_M1:
173 case RISCV::PseudoVSE16_V_M1_MASK:
174 case RISCV::PseudoVSE16_V_M2:
175 case RISCV::PseudoVSE16_V_M2_MASK:
176 case RISCV::PseudoVSE16_V_M4:
177 case RISCV::PseudoVSE16_V_M4_MASK:
178 case RISCV::PseudoVSE16_V_M8:
179 case RISCV::PseudoVSE16_V_M8_MASK:
180 case RISCV::PseudoVSE16_V_MF2:
181 case RISCV::PseudoVSE16_V_MF2_MASK:
182 case RISCV::PseudoVSE16_V_MF4:
183 case RISCV::PseudoVSE16_V_MF4_MASK:
184 case RISCV::PseudoVSSE16_V_M1:
185 case RISCV::PseudoVSSE16_V_M1_MASK:
186 case RISCV::PseudoVSSE16_V_M2:
187 case RISCV::PseudoVSSE16_V_M2_MASK:
188 case RISCV::PseudoVSSE16_V_M4:
189 case RISCV::PseudoVSSE16_V_M4_MASK:
190 case RISCV::PseudoVSSE16_V_M8:
191 case RISCV::PseudoVSSE16_V_M8_MASK:
192 case RISCV::PseudoVSSE16_V_MF2:
193 case RISCV::PseudoVSSE16_V_MF2_MASK:
194 case RISCV::PseudoVSSE16_V_MF4:
195 case RISCV::PseudoVSSE16_V_MF4_MASK:
196 return 16;
197 case RISCV::PseudoVLE32_V_M1:
198 case RISCV::PseudoVLE32_V_M1_MASK:
199 case RISCV::PseudoVLE32_V_M2:
200 case RISCV::PseudoVLE32_V_M2_MASK:
201 case RISCV::PseudoVLE32_V_M4:
202 case RISCV::PseudoVLE32_V_M4_MASK:
203 case RISCV::PseudoVLE32_V_M8:
204 case RISCV::PseudoVLE32_V_M8_MASK:
205 case RISCV::PseudoVLE32_V_MF2:
206 case RISCV::PseudoVLE32_V_MF2_MASK:
207 case RISCV::PseudoVLSE32_V_M1:
208 case RISCV::PseudoVLSE32_V_M1_MASK:
209 case RISCV::PseudoVLSE32_V_M2:
210 case RISCV::PseudoVLSE32_V_M2_MASK:
211 case RISCV::PseudoVLSE32_V_M4:
212 case RISCV::PseudoVLSE32_V_M4_MASK:
213 case RISCV::PseudoVLSE32_V_M8:
214 case RISCV::PseudoVLSE32_V_M8_MASK:
215 case RISCV::PseudoVLSE32_V_MF2:
216 case RISCV::PseudoVLSE32_V_MF2_MASK:
217 case RISCV::PseudoVSE32_V_M1:
218 case RISCV::PseudoVSE32_V_M1_MASK:
219 case RISCV::PseudoVSE32_V_M2:
220 case RISCV::PseudoVSE32_V_M2_MASK:
221 case RISCV::PseudoVSE32_V_M4:
222 case RISCV::PseudoVSE32_V_M4_MASK:
223 case RISCV::PseudoVSE32_V_M8:
224 case RISCV::PseudoVSE32_V_M8_MASK:
225 case RISCV::PseudoVSE32_V_MF2:
226 case RISCV::PseudoVSE32_V_MF2_MASK:
227 case RISCV::PseudoVSSE32_V_M1:
228 case RISCV::PseudoVSSE32_V_M1_MASK:
229 case RISCV::PseudoVSSE32_V_M2:
230 case RISCV::PseudoVSSE32_V_M2_MASK:
231 case RISCV::PseudoVSSE32_V_M4:
232 case RISCV::PseudoVSSE32_V_M4_MASK:
233 case RISCV::PseudoVSSE32_V_M8:
234 case RISCV::PseudoVSSE32_V_M8_MASK:
235 case RISCV::PseudoVSSE32_V_MF2:
236 case RISCV::PseudoVSSE32_V_MF2_MASK:
237 return 32;
238 case RISCV::PseudoVLE64_V_M1:
239 case RISCV::PseudoVLE64_V_M1_MASK:
240 case RISCV::PseudoVLE64_V_M2:
241 case RISCV::PseudoVLE64_V_M2_MASK:
242 case RISCV::PseudoVLE64_V_M4:
243 case RISCV::PseudoVLE64_V_M4_MASK:
244 case RISCV::PseudoVLE64_V_M8:
245 case RISCV::PseudoVLE64_V_M8_MASK:
246 case RISCV::PseudoVLSE64_V_M1:
247 case RISCV::PseudoVLSE64_V_M1_MASK:
248 case RISCV::PseudoVLSE64_V_M2:
249 case RISCV::PseudoVLSE64_V_M2_MASK:
250 case RISCV::PseudoVLSE64_V_M4:
251 case RISCV::PseudoVLSE64_V_M4_MASK:
252 case RISCV::PseudoVLSE64_V_M8:
253 case RISCV::PseudoVLSE64_V_M8_MASK:
254 case RISCV::PseudoVSE64_V_M1:
255 case RISCV::PseudoVSE64_V_M1_MASK:
256 case RISCV::PseudoVSE64_V_M2:
257 case RISCV::PseudoVSE64_V_M2_MASK:
258 case RISCV::PseudoVSE64_V_M4:
259 case RISCV::PseudoVSE64_V_M4_MASK:
260 case RISCV::PseudoVSE64_V_M8:
261 case RISCV::PseudoVSE64_V_M8_MASK:
262 case RISCV::PseudoVSSE64_V_M1:
263 case RISCV::PseudoVSSE64_V_M1_MASK:
264 case RISCV::PseudoVSSE64_V_M2:
265 case RISCV::PseudoVSSE64_V_M2_MASK:
266 case RISCV::PseudoVSSE64_V_M4:
267 case RISCV::PseudoVSSE64_V_M4_MASK:
268 case RISCV::PseudoVSSE64_V_M8:
269 case RISCV::PseudoVSSE64_V_M8_MASK:
270 return 64;
271 }
272 }
273
274 /// Return true if this is an operation on mask registers. Note that
275 /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
isMaskRegOp(const MachineInstr & MI)276 static bool isMaskRegOp(const MachineInstr &MI) {
277 if (RISCVII::hasSEWOp(MI.getDesc().TSFlags)) {
278 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
279 // A Log2SEW of 0 is an operation on mask registers only.
280 return Log2SEW == 0;
281 }
282 return false;
283 }
284
getSEWLMULRatio(unsigned SEW,RISCVII::VLMUL VLMul)285 static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) {
286 unsigned LMul;
287 bool Fractional;
288 std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul);
289
290 // Convert LMul to a fixed point value with 3 fractional bits.
291 LMul = Fractional ? (8 / LMul) : (LMul * 8);
292
293 assert(SEW >= 8 && "Unexpected SEW value");
294 return (SEW * 8) / LMul;
295 }
296
297 /// Which subfields of VL or VTYPE have values we need to preserve?
298 struct DemandedFields {
299 bool VL = false;
300 bool SEW = false;
301 bool LMUL = false;
302 bool SEWLMULRatio = false;
303 bool TailPolicy = false;
304 bool MaskPolicy = false;
305
306 // Return true if any part of VTYPE was used
usedVTYPE__anon1aeaf6f50111::DemandedFields307 bool usedVTYPE() {
308 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
309 }
310
311 // Mark all VTYPE subfields and properties as demanded
demandVTYPE__anon1aeaf6f50111::DemandedFields312 void demandVTYPE() {
313 SEW = true;
314 LMUL = true;
315 SEWLMULRatio = true;
316 TailPolicy = true;
317 MaskPolicy = true;
318 }
319 };
320
321 /// Return true if the two values of the VTYPE register provided are
322 /// indistinguishable from the perspective of an instruction (or set of
323 /// instructions) which use only the Used subfields and properties.
areCompatibleVTYPEs(uint64_t VType1,uint64_t VType2,const DemandedFields & Used)324 static bool areCompatibleVTYPEs(uint64_t VType1,
325 uint64_t VType2,
326 const DemandedFields &Used) {
327 if (Used.SEW &&
328 RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2))
329 return false;
330
331 if (Used.LMUL &&
332 RISCVVType::getVLMUL(VType1) != RISCVVType::getVLMUL(VType2))
333 return false;
334
335 if (Used.SEWLMULRatio) {
336 auto Ratio1 = getSEWLMULRatio(RISCVVType::getSEW(VType1),
337 RISCVVType::getVLMUL(VType1));
338 auto Ratio2 = getSEWLMULRatio(RISCVVType::getSEW(VType2),
339 RISCVVType::getVLMUL(VType2));
340 if (Ratio1 != Ratio2)
341 return false;
342 }
343
344 if (Used.TailPolicy &&
345 RISCVVType::isTailAgnostic(VType1) != RISCVVType::isTailAgnostic(VType2))
346 return false;
347 if (Used.MaskPolicy &&
348 RISCVVType::isMaskAgnostic(VType1) != RISCVVType::isMaskAgnostic(VType2))
349 return false;
350 return true;
351 }
352
353 /// Return the fields and properties demanded by the provided instruction.
getDemanded(const MachineInstr & MI)354 static DemandedFields getDemanded(const MachineInstr &MI) {
355 // Warning: This function has to work on both the lowered (i.e. post
356 // emitVSETVLIs) and pre-lowering forms. The main implication of this is
357 // that it can't use the value of a SEW, VL, or Policy operand as they might
358 // be stale after lowering.
359
360 // Most instructions don't use any of these subfeilds.
361 DemandedFields Res;
362 // Start conservative if registers are used
363 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
364 Res.VL = true;
365 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
366 Res.demandVTYPE();
367 // Start conservative on the unlowered form too
368 uint64_t TSFlags = MI.getDesc().TSFlags;
369 if (RISCVII::hasSEWOp(TSFlags)) {
370 Res.demandVTYPE();
371 if (RISCVII::hasVLOp(TSFlags))
372 Res.VL = true;
373 }
374
375 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
376 // They instead demand the ratio of the two which is used in computing
377 // EMUL, but which allows us the flexibility to change SEW and LMUL
378 // provided we don't change the ratio.
379 // Note: We assume that the instructions initial SEW is the EEW encoded
380 // in the opcode. This is asserted when constructing the VSETVLIInfo.
381 if (getEEWForLoadStore(MI)) {
382 Res.SEW = false;
383 Res.LMUL = false;
384 }
385
386 // Store instructions don't use the policy fields.
387 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
388 Res.TailPolicy = false;
389 Res.MaskPolicy = false;
390 }
391
392 // If this is a mask reg operation, it only cares about VLMAX.
393 // TODO: Possible extensions to this logic
394 // * Probably ok if available VLMax is larger than demanded
395 // * The policy bits can probably be ignored..
396 if (isMaskRegOp(MI)) {
397 Res.SEW = false;
398 Res.LMUL = false;
399 }
400
401 return Res;
402 }
403
404 /// Defines the abstract state with which the forward dataflow models the
405 /// values of the VL and VTYPE registers after insertion.
406 class VSETVLIInfo {
407 union {
408 Register AVLReg;
409 unsigned AVLImm;
410 };
411
412 enum : uint8_t {
413 Uninitialized,
414 AVLIsReg,
415 AVLIsImm,
416 Unknown,
417 } State = Uninitialized;
418
419 // Fields from VTYPE.
420 RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
421 uint8_t SEW = 0;
422 uint8_t TailAgnostic : 1;
423 uint8_t MaskAgnostic : 1;
424 uint8_t SEWLMULRatioOnly : 1;
425
426 public:
VSETVLIInfo()427 VSETVLIInfo()
428 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
429 SEWLMULRatioOnly(false) {}
430
getUnknown()431 static VSETVLIInfo getUnknown() {
432 VSETVLIInfo Info;
433 Info.setUnknown();
434 return Info;
435 }
436
isValid() const437 bool isValid() const { return State != Uninitialized; }
setUnknown()438 void setUnknown() { State = Unknown; }
isUnknown() const439 bool isUnknown() const { return State == Unknown; }
440
setAVLReg(Register Reg)441 void setAVLReg(Register Reg) {
442 AVLReg = Reg;
443 State = AVLIsReg;
444 }
445
setAVLImm(unsigned Imm)446 void setAVLImm(unsigned Imm) {
447 AVLImm = Imm;
448 State = AVLIsImm;
449 }
450
hasAVLImm() const451 bool hasAVLImm() const { return State == AVLIsImm; }
hasAVLReg() const452 bool hasAVLReg() const { return State == AVLIsReg; }
getAVLReg() const453 Register getAVLReg() const {
454 assert(hasAVLReg());
455 return AVLReg;
456 }
getAVLImm() const457 unsigned getAVLImm() const {
458 assert(hasAVLImm());
459 return AVLImm;
460 }
461
getSEW() const462 unsigned getSEW() const { return SEW; }
getVLMUL() const463 RISCVII::VLMUL getVLMUL() const { return VLMul; }
464
hasNonZeroAVL() const465 bool hasNonZeroAVL() const {
466 if (hasAVLImm())
467 return getAVLImm() > 0;
468 if (hasAVLReg())
469 return getAVLReg() == RISCV::X0;
470 return false;
471 }
472
hasSameAVL(const VSETVLIInfo & Other) const473 bool hasSameAVL(const VSETVLIInfo &Other) const {
474 assert(isValid() && Other.isValid() &&
475 "Can't compare invalid VSETVLIInfos");
476 assert(!isUnknown() && !Other.isUnknown() &&
477 "Can't compare AVL in unknown state");
478 if (hasAVLReg() && Other.hasAVLReg())
479 return getAVLReg() == Other.getAVLReg();
480
481 if (hasAVLImm() && Other.hasAVLImm())
482 return getAVLImm() == Other.getAVLImm();
483
484 return false;
485 }
486
setVTYPE(unsigned VType)487 void setVTYPE(unsigned VType) {
488 assert(isValid() && !isUnknown() &&
489 "Can't set VTYPE for uninitialized or unknown");
490 VLMul = RISCVVType::getVLMUL(VType);
491 SEW = RISCVVType::getSEW(VType);
492 TailAgnostic = RISCVVType::isTailAgnostic(VType);
493 MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
494 }
setVTYPE(RISCVII::VLMUL L,unsigned S,bool TA,bool MA)495 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
496 assert(isValid() && !isUnknown() &&
497 "Can't set VTYPE for uninitialized or unknown");
498 VLMul = L;
499 SEW = S;
500 TailAgnostic = TA;
501 MaskAgnostic = MA;
502 }
503
encodeVTYPE() const504 unsigned encodeVTYPE() const {
505 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
506 "Can't encode VTYPE for uninitialized or unknown");
507 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
508 }
509
hasSEWLMULRatioOnly() const510 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
511
hasSameSEW(const VSETVLIInfo & Other) const512 bool hasSameSEW(const VSETVLIInfo &Other) const {
513 assert(isValid() && Other.isValid() &&
514 "Can't compare invalid VSETVLIInfos");
515 assert(!isUnknown() && !Other.isUnknown() &&
516 "Can't compare VTYPE in unknown state");
517 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
518 "Can't compare when only LMUL/SEW ratio is valid.");
519 return SEW == Other.SEW;
520 }
521
hasSameVTYPE(const VSETVLIInfo & Other) const522 bool hasSameVTYPE(const VSETVLIInfo &Other) const {
523 assert(isValid() && Other.isValid() &&
524 "Can't compare invalid VSETVLIInfos");
525 assert(!isUnknown() && !Other.isUnknown() &&
526 "Can't compare VTYPE in unknown state");
527 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
528 "Can't compare when only LMUL/SEW ratio is valid.");
529 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
530 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
531 Other.MaskAgnostic);
532 }
533
getSEWLMULRatio() const534 unsigned getSEWLMULRatio() const {
535 assert(isValid() && !isUnknown() &&
536 "Can't use VTYPE for uninitialized or unknown");
537 return ::getSEWLMULRatio(SEW, VLMul);
538 }
539
540 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
541 // Note that having the same VLMAX ensures that both share the same
542 // function from AVL to VL; that is, they must produce the same VL value
543 // for any given AVL value.
hasSameVLMAX(const VSETVLIInfo & Other) const544 bool hasSameVLMAX(const VSETVLIInfo &Other) const {
545 assert(isValid() && Other.isValid() &&
546 "Can't compare invalid VSETVLIInfos");
547 assert(!isUnknown() && !Other.isUnknown() &&
548 "Can't compare VTYPE in unknown state");
549 return getSEWLMULRatio() == Other.getSEWLMULRatio();
550 }
551
hasSamePolicy(const VSETVLIInfo & Other) const552 bool hasSamePolicy(const VSETVLIInfo &Other) const {
553 assert(isValid() && Other.isValid() &&
554 "Can't compare invalid VSETVLIInfos");
555 assert(!isUnknown() && !Other.isUnknown() &&
556 "Can't compare VTYPE in unknown state");
557 return TailAgnostic == Other.TailAgnostic &&
558 MaskAgnostic == Other.MaskAgnostic;
559 }
560
hasCompatibleVTYPE(const MachineInstr & MI,const VSETVLIInfo & Require) const561 bool hasCompatibleVTYPE(const MachineInstr &MI,
562 const VSETVLIInfo &Require) const {
563 const DemandedFields Used = getDemanded(MI);
564 return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
565 }
566
567 // Determine whether the vector instructions requirements represented by
568 // Require are compatible with the previous vsetvli instruction represented
569 // by this. MI is the instruction whose requirements we're considering.
isCompatible(const MachineInstr & MI,const VSETVLIInfo & Require) const570 bool isCompatible(const MachineInstr &MI, const VSETVLIInfo &Require) const {
571 assert(isValid() && Require.isValid() &&
572 "Can't compare invalid VSETVLIInfos");
573 assert(!Require.SEWLMULRatioOnly &&
574 "Expected a valid VTYPE for instruction!");
575 // Nothing is compatible with Unknown.
576 if (isUnknown() || Require.isUnknown())
577 return false;
578
579 // If only our VLMAX ratio is valid, then this isn't compatible.
580 if (SEWLMULRatioOnly)
581 return false;
582
583 // If the instruction doesn't need an AVLReg and the SEW matches, consider
584 // it compatible.
585 if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister)
586 if (SEW == Require.SEW)
587 return true;
588
589 return hasSameAVL(Require) && hasCompatibleVTYPE(MI, Require);
590 }
591
operator ==(const VSETVLIInfo & Other) const592 bool operator==(const VSETVLIInfo &Other) const {
593 // Uninitialized is only equal to another Uninitialized.
594 if (!isValid())
595 return !Other.isValid();
596 if (!Other.isValid())
597 return !isValid();
598
599 // Unknown is only equal to another Unknown.
600 if (isUnknown())
601 return Other.isUnknown();
602 if (Other.isUnknown())
603 return isUnknown();
604
605 if (!hasSameAVL(Other))
606 return false;
607
608 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
609 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
610 return false;
611
612 // If only the VLMAX is valid, check that it is the same.
613 if (SEWLMULRatioOnly)
614 return hasSameVLMAX(Other);
615
616 // If the full VTYPE is valid, check that it is the same.
617 return hasSameVTYPE(Other);
618 }
619
operator !=(const VSETVLIInfo & Other) const620 bool operator!=(const VSETVLIInfo &Other) const {
621 return !(*this == Other);
622 }
623
624 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
625 // both predecessors.
intersect(const VSETVLIInfo & Other) const626 VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
627 // If the new value isn't valid, ignore it.
628 if (!Other.isValid())
629 return *this;
630
631 // If this value isn't valid, this must be the first predecessor, use it.
632 if (!isValid())
633 return Other;
634
635 // If either is unknown, the result is unknown.
636 if (isUnknown() || Other.isUnknown())
637 return VSETVLIInfo::getUnknown();
638
639 // If we have an exact, match return this.
640 if (*this == Other)
641 return *this;
642
643 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
644 // return an SEW/LMUL ratio only value.
645 if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
646 VSETVLIInfo MergeInfo = *this;
647 MergeInfo.SEWLMULRatioOnly = true;
648 return MergeInfo;
649 }
650
651 // Otherwise the result is unknown.
652 return VSETVLIInfo::getUnknown();
653 }
654
655 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
656 /// Support for debugging, callable in GDB: V->dump()
dump() const657 LLVM_DUMP_METHOD void dump() const {
658 print(dbgs());
659 dbgs() << "\n";
660 }
661
662 /// Implement operator<<.
663 /// @{
print(raw_ostream & OS) const664 void print(raw_ostream &OS) const {
665 OS << "{";
666 if (!isValid())
667 OS << "Uninitialized";
668 if (isUnknown())
669 OS << "unknown";
670 if (hasAVLReg())
671 OS << "AVLReg=" << (unsigned)AVLReg;
672 if (hasAVLImm())
673 OS << "AVLImm=" << (unsigned)AVLImm;
674 OS << ", "
675 << "VLMul=" << (unsigned)VLMul << ", "
676 << "SEW=" << (unsigned)SEW << ", "
677 << "TailAgnostic=" << (bool)TailAgnostic << ", "
678 << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
679 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
680 }
681 #endif
682 };
683
684 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
685 LLVM_ATTRIBUTE_USED
operator <<(raw_ostream & OS,const VSETVLIInfo & V)686 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
687 V.print(OS);
688 return OS;
689 }
690 #endif
691
692 struct BlockData {
693 // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers
694 // made by this block. Calculated in Phase 1.
695 VSETVLIInfo Change;
696
697 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
698 // block. Calculated in Phase 2.
699 VSETVLIInfo Exit;
700
701 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
702 // blocks. Calculated in Phase 2, and used by Phase 3.
703 VSETVLIInfo Pred;
704
705 // Keeps track of whether the block is already in the queue.
706 bool InQueue = false;
707
708 BlockData() = default;
709 };
710
711 class RISCVInsertVSETVLI : public MachineFunctionPass {
712 const TargetInstrInfo *TII;
713 MachineRegisterInfo *MRI;
714
715 std::vector<BlockData> BlockInfo;
716 std::queue<const MachineBasicBlock *> WorkList;
717
718 public:
719 static char ID;
720
RISCVInsertVSETVLI()721 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {
722 initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry());
723 }
724 bool runOnMachineFunction(MachineFunction &MF) override;
725
getAnalysisUsage(AnalysisUsage & AU) const726 void getAnalysisUsage(AnalysisUsage &AU) const override {
727 AU.setPreservesCFG();
728 MachineFunctionPass::getAnalysisUsage(AU);
729 }
730
getPassName() const731 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
732
733 private:
734 bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
735 const VSETVLIInfo &CurInfo) const;
736 bool needVSETVLIPHI(const VSETVLIInfo &Require,
737 const MachineBasicBlock &MBB) const;
738 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
739 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
740 void insertVSETVLI(MachineBasicBlock &MBB,
741 MachineBasicBlock::iterator InsertPt, DebugLoc DL,
742 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
743
744 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI);
745 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI);
746 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB);
747 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
748 void emitVSETVLIs(MachineBasicBlock &MBB);
749 void doLocalPostpass(MachineBasicBlock &MBB);
750 void doPRE(MachineBasicBlock &MBB);
751 void insertReadVL(MachineBasicBlock &MBB);
752 };
753
754 } // end anonymous namespace
755
756 char RISCVInsertVSETVLI::ID = 0;
757
INITIALIZE_PASS(RISCVInsertVSETVLI,DEBUG_TYPE,RISCV_INSERT_VSETVLI_NAME,false,false)758 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
759 false, false)
760
761 static bool isVectorConfigInstr(const MachineInstr &MI) {
762 return MI.getOpcode() == RISCV::PseudoVSETVLI ||
763 MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
764 MI.getOpcode() == RISCV::PseudoVSETIVLI;
765 }
766
767 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves
768 /// VL and only sets VTYPE.
isVLPreservingConfig(const MachineInstr & MI)769 static bool isVLPreservingConfig(const MachineInstr &MI) {
770 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
771 return false;
772 assert(RISCV::X0 == MI.getOperand(1).getReg());
773 return RISCV::X0 == MI.getOperand(0).getReg();
774 }
775
computeInfoForInstr(const MachineInstr & MI,uint64_t TSFlags,const MachineRegisterInfo * MRI)776 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
777 const MachineRegisterInfo *MRI) {
778 VSETVLIInfo InstrInfo;
779
780 // If the instruction has policy argument, use the argument.
781 // If there is no policy argument, default to tail agnostic unless the
782 // destination is tied to a source. Unless the source is undef. In that case
783 // the user would have some control over the policy values.
784 bool TailAgnostic = true;
785 bool UsesMaskPolicy = RISCVII::usesMaskPolicy(TSFlags);
786 // FIXME: Could we look at the above or below instructions to choose the
787 // matched mask policy to reduce vsetvli instructions? Default mask policy is
788 // agnostic if instructions use mask policy, otherwise is undisturbed. Because
789 // most mask operations are mask undisturbed, so we could possibly reduce the
790 // vsetvli between mask and nomasked instruction sequence.
791 bool MaskAgnostic = UsesMaskPolicy;
792 unsigned UseOpIdx;
793 if (RISCVII::hasVecPolicyOp(TSFlags)) {
794 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
795 uint64_t Policy = Op.getImm();
796 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
797 "Invalid Policy Value");
798 // Although in some cases, mismatched passthru/maskedoff with policy value
799 // does not make sense (ex. tied operand is IMPLICIT_DEF with non-TAMA
800 // policy, or tied operand is not IMPLICIT_DEF with TAMA policy), but users
801 // have set the policy value explicitly, so compiler would not fix it.
802 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
803 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
804 } else if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
805 TailAgnostic = false;
806 if (UsesMaskPolicy)
807 MaskAgnostic = false;
808 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
809 const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
810 MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg());
811 if (UseMI && UseMI->isImplicitDef()) {
812 TailAgnostic = true;
813 if (UsesMaskPolicy)
814 MaskAgnostic = true;
815 }
816 // Some pseudo instructions force a tail agnostic policy despite having a
817 // tied def.
818 if (RISCVII::doesForceTailAgnostic(TSFlags))
819 TailAgnostic = true;
820 }
821
822 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
823
824 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
825 // A Log2SEW of 0 is an operation on mask registers only.
826 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
827 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
828
829 if (RISCVII::hasVLOp(TSFlags)) {
830 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
831 if (VLOp.isImm()) {
832 int64_t Imm = VLOp.getImm();
833 // Conver the VLMax sentintel to X0 register.
834 if (Imm == RISCV::VLMaxSentinel)
835 InstrInfo.setAVLReg(RISCV::X0);
836 else
837 InstrInfo.setAVLImm(Imm);
838 } else {
839 InstrInfo.setAVLReg(VLOp.getReg());
840 }
841 } else {
842 InstrInfo.setAVLReg(RISCV::NoRegister);
843 }
844 #ifndef NDEBUG
845 if (Optional<unsigned> EEW = getEEWForLoadStore(MI)) {
846 assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
847 }
848 #endif
849 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
850
851 return InstrInfo;
852 }
853
insertVSETVLI(MachineBasicBlock & MBB,MachineInstr & MI,const VSETVLIInfo & Info,const VSETVLIInfo & PrevInfo)854 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
855 const VSETVLIInfo &Info,
856 const VSETVLIInfo &PrevInfo) {
857 DebugLoc DL = MI.getDebugLoc();
858 insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
859 }
860
insertVSETVLI(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertPt,DebugLoc DL,const VSETVLIInfo & Info,const VSETVLIInfo & PrevInfo)861 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
862 MachineBasicBlock::iterator InsertPt, DebugLoc DL,
863 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
864
865 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
866 // VLMAX.
867 if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
868 Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
869 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
870 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
871 .addReg(RISCV::X0, RegState::Kill)
872 .addImm(Info.encodeVTYPE())
873 .addReg(RISCV::VL, RegState::Implicit);
874 return;
875 }
876
877 if (Info.hasAVLImm()) {
878 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
879 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
880 .addImm(Info.getAVLImm())
881 .addImm(Info.encodeVTYPE());
882 return;
883 }
884
885 Register AVLReg = Info.getAVLReg();
886 if (AVLReg == RISCV::NoRegister) {
887 // We can only use x0, x0 if there's no chance of the vtype change causing
888 // the previous vl to become invalid.
889 if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
890 Info.hasSameVLMAX(PrevInfo)) {
891 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
892 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
893 .addReg(RISCV::X0, RegState::Kill)
894 .addImm(Info.encodeVTYPE())
895 .addReg(RISCV::VL, RegState::Implicit);
896 return;
897 }
898 // Otherwise use an AVL of 0 to avoid depending on previous vl.
899 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
900 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
901 .addImm(0)
902 .addImm(Info.encodeVTYPE());
903 return;
904 }
905
906 if (AVLReg.isVirtual())
907 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
908
909 // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
910 // opcode if the AVLReg is X0 as they have different register classes for
911 // the AVL operand.
912 Register DestReg = RISCV::X0;
913 unsigned Opcode = RISCV::PseudoVSETVLI;
914 if (AVLReg == RISCV::X0) {
915 DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
916 Opcode = RISCV::PseudoVSETVLIX0;
917 }
918 BuildMI(MBB, InsertPt, DL, TII->get(Opcode))
919 .addReg(DestReg, RegState::Define | RegState::Dead)
920 .addReg(AVLReg)
921 .addImm(Info.encodeVTYPE());
922 }
923
924 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
925 // VSETIVLI instruction.
getInfoForVSETVLI(const MachineInstr & MI)926 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
927 VSETVLIInfo NewInfo;
928 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
929 NewInfo.setAVLImm(MI.getOperand(1).getImm());
930 } else {
931 assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
932 MI.getOpcode() == RISCV::PseudoVSETVLIX0);
933 Register AVLReg = MI.getOperand(1).getReg();
934 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
935 "Can't handle X0, X0 vsetvli yet");
936 NewInfo.setAVLReg(AVLReg);
937 }
938 NewInfo.setVTYPE(MI.getOperand(2).getImm());
939
940 return NewInfo;
941 }
942
943 /// Return true if a VSETVLI is required to transition from CurInfo to Require
944 /// before MI.
needVSETVLI(const MachineInstr & MI,const VSETVLIInfo & Require,const VSETVLIInfo & CurInfo) const945 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
946 const VSETVLIInfo &Require,
947 const VSETVLIInfo &CurInfo) const {
948 assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI));
949
950 if (CurInfo.isCompatible(MI, Require))
951 return false;
952
953 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
954 return true;
955
956 // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0.
957 // VL=0 is uninteresting (as it should have been deleted already), so it is
958 // compatible if we can prove both are non-zero. Additionally, if writing
959 // to an implicit_def operand, we don't need to preserve any other bits and
960 // are thus compatible with any larger etype, and can disregard policy bits.
961 if (isScalarMoveInstr(MI) &&
962 CurInfo.hasNonZeroAVL() && Require.hasNonZeroAVL()) {
963 auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg());
964 if (VRegDef && VRegDef->isImplicitDef() &&
965 CurInfo.getSEW() >= Require.getSEW())
966 return false;
967 if (CurInfo.hasSameSEW(Require) && CurInfo.hasSamePolicy(Require))
968 return false;
969 }
970
971 // We didn't find a compatible value. If our AVL is a virtual register,
972 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
973 // and the last VL/VTYPE we observed is the same, we don't need a
974 // VSETVLI here.
975 if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() &&
976 CurInfo.hasCompatibleVTYPE(MI, Require)) {
977 if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
978 if (isVectorConfigInstr(*DefMI)) {
979 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
980 if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
981 return false;
982 }
983 }
984 }
985
986 return true;
987 }
988
989 // Given an incoming state reaching MI, modifies that state so that it is minimally
990 // compatible with MI. The resulting state is guaranteed to be semantically legal
991 // for MI, but may not be the state requested by MI.
transferBefore(VSETVLIInfo & Info,const MachineInstr & MI)992 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) {
993 uint64_t TSFlags = MI.getDesc().TSFlags;
994 if (!RISCVII::hasSEWOp(TSFlags))
995 return;
996
997 const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
998 if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
999 return;
1000
1001 const VSETVLIInfo PrevInfo = Info;
1002 Info = NewInfo;
1003
1004 if (!RISCVII::hasVLOp(TSFlags))
1005 return;
1006
1007 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
1008 // VL > 0. We can discard the user requested AVL and just use the last
1009 // one if we can prove it equally zero. This removes a vsetvli entirely
1010 // if the types match or allows use of cheaper avl preserving variant
1011 // if VLMAX doesn't change. If VLMAX might change, we couldn't use
1012 // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to
1013 // prevent extending live range of an avl register operand.
1014 // TODO: We can probably relax this for immediates.
1015 if (isScalarMoveInstr(MI) && PrevInfo.isValid() &&
1016 PrevInfo.hasNonZeroAVL() && Info.hasNonZeroAVL() &&
1017 Info.hasSameVLMAX(PrevInfo)) {
1018 if (PrevInfo.hasAVLImm())
1019 Info.setAVLImm(PrevInfo.getAVLImm());
1020 else
1021 Info.setAVLReg(PrevInfo.getAVLReg());
1022 return;
1023 }
1024
1025 // If AVL is defined by a vsetvli with the same VLMAX, we can
1026 // replace the AVL operand with the AVL of the defining vsetvli.
1027 // We avoid general register AVLs to avoid extending live ranges
1028 // without being sure we can kill the original source reg entirely.
1029 if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
1030 return;
1031 MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
1032 if (!DefMI || !isVectorConfigInstr(*DefMI))
1033 return;
1034
1035 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1036 if (DefInfo.hasSameVLMAX(Info) &&
1037 (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
1038 if (DefInfo.hasAVLImm())
1039 Info.setAVLImm(DefInfo.getAVLImm());
1040 else
1041 Info.setAVLReg(DefInfo.getAVLReg());
1042 return;
1043 }
1044 }
1045
1046 // Given a state with which we evaluated MI (see transferBefore above for why
1047 // this might be different that the state MI requested), modify the state to
1048 // reflect the changes MI might make.
transferAfter(VSETVLIInfo & Info,const MachineInstr & MI)1049 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) {
1050 if (isVectorConfigInstr(MI)) {
1051 Info = getInfoForVSETVLI(MI);
1052 return;
1053 }
1054
1055 if (RISCV::isFaultFirstLoad(MI)) {
1056 // Update AVL to vl-output of the fault first load.
1057 Info.setAVLReg(MI.getOperand(1).getReg());
1058 return;
1059 }
1060
1061 // If this is something that updates VL/VTYPE that we don't know about, set
1062 // the state to unknown.
1063 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1064 MI.modifiesRegister(RISCV::VTYPE))
1065 Info = VSETVLIInfo::getUnknown();
1066 }
1067
computeVLVTYPEChanges(const MachineBasicBlock & MBB)1068 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
1069 bool HadVectorOp = false;
1070
1071 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1072 BBInfo.Change = BBInfo.Pred;
1073 for (const MachineInstr &MI : MBB) {
1074 transferBefore(BBInfo.Change, MI);
1075
1076 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1077 HadVectorOp = true;
1078
1079 transferAfter(BBInfo.Change, MI);
1080 }
1081
1082 return HadVectorOp;
1083 }
1084
computeIncomingVLVTYPE(const MachineBasicBlock & MBB)1085 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1086
1087 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1088
1089 BBInfo.InQueue = false;
1090
1091 VSETVLIInfo InInfo;
1092 if (MBB.pred_empty()) {
1093 // There are no predecessors, so use the default starting status.
1094 InInfo.setUnknown();
1095 } else {
1096 for (MachineBasicBlock *P : MBB.predecessors())
1097 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1098 }
1099
1100 // If we don't have any valid predecessor value, wait until we do.
1101 if (!InInfo.isValid())
1102 return;
1103
1104 // If no change, no need to rerun block
1105 if (InInfo == BBInfo.Pred)
1106 return;
1107
1108 BBInfo.Pred = InInfo;
1109 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1110 << " changed to " << BBInfo.Pred << "\n");
1111
1112 // Note: It's tempting to cache the state changes here, but due to the
1113 // compatibility checks performed a blocks output state can change based on
1114 // the input state. To cache, we'd have to add logic for finding
1115 // never-compatible state changes.
1116 computeVLVTYPEChanges(MBB);
1117 VSETVLIInfo TmpStatus = BBInfo.Change;
1118
1119 // If the new exit value matches the old exit value, we don't need to revisit
1120 // any blocks.
1121 if (BBInfo.Exit == TmpStatus)
1122 return;
1123
1124 BBInfo.Exit = TmpStatus;
1125 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1126 << " changed to " << BBInfo.Exit << "\n");
1127
1128 // Add the successors to the work list so we can propagate the changed exit
1129 // status.
1130 for (MachineBasicBlock *S : MBB.successors())
1131 if (!BlockInfo[S->getNumber()].InQueue)
1132 WorkList.push(S);
1133 }
1134
1135 // If we weren't able to prove a vsetvli was directly unneeded, it might still
1136 // be unneeded if the AVL is a phi node where all incoming values are VL
1137 // outputs from the last VSETVLI in their respective basic blocks.
needVSETVLIPHI(const VSETVLIInfo & Require,const MachineBasicBlock & MBB) const1138 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1139 const MachineBasicBlock &MBB) const {
1140 if (DisableInsertVSETVLPHIOpt)
1141 return true;
1142
1143 if (!Require.hasAVLReg())
1144 return true;
1145
1146 Register AVLReg = Require.getAVLReg();
1147 if (!AVLReg.isVirtual())
1148 return true;
1149
1150 // We need the AVL to be produce by a PHI node in this basic block.
1151 MachineInstr *PHI = MRI->getVRegDef(AVLReg);
1152 if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
1153 return true;
1154
1155 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
1156 PHIOp += 2) {
1157 Register InReg = PHI->getOperand(PHIOp).getReg();
1158 MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
1159 const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
1160 // If the exit from the predecessor has the VTYPE we are looking for
1161 // we might be able to avoid a VSETVLI.
1162 if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
1163 return true;
1164
1165 // We need the PHI input to the be the output of a VSET(I)VLI.
1166 MachineInstr *DefMI = MRI->getVRegDef(InReg);
1167 if (!DefMI || !isVectorConfigInstr(*DefMI))
1168 return true;
1169
1170 // We found a VSET(I)VLI make sure it matches the output of the
1171 // predecessor block.
1172 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1173 if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
1174 !DefInfo.hasSameVTYPE(PBBInfo.Exit))
1175 return true;
1176 }
1177
1178 // If all the incoming values to the PHI checked out, we don't need
1179 // to insert a VSETVLI.
1180 return false;
1181 }
1182
emitVSETVLIs(MachineBasicBlock & MBB)1183 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1184 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1185 // Track whether the prefix of the block we've scanned is transparent
1186 // (meaning has not yet changed the abstract state).
1187 bool PrefixTransparent = true;
1188 for (MachineInstr &MI : MBB) {
1189 const VSETVLIInfo PrevInfo = CurInfo;
1190 transferBefore(CurInfo, MI);
1191
1192 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1193 if (isVectorConfigInstr(MI)) {
1194 // Conservatively, mark the VL and VTYPE as live.
1195 assert(MI.getOperand(3).getReg() == RISCV::VL &&
1196 MI.getOperand(4).getReg() == RISCV::VTYPE &&
1197 "Unexpected operands where VL and VTYPE should be");
1198 MI.getOperand(3).setIsDead(false);
1199 MI.getOperand(4).setIsDead(false);
1200 PrefixTransparent = false;
1201 }
1202
1203 uint64_t TSFlags = MI.getDesc().TSFlags;
1204 if (RISCVII::hasSEWOp(TSFlags)) {
1205 if (PrevInfo != CurInfo) {
1206 // If this is the first implicit state change, and the state change
1207 // requested can be proven to produce the same register contents, we
1208 // can skip emitting the actual state change and continue as if we
1209 // had since we know the GPR result of the implicit state change
1210 // wouldn't be used and VL/VTYPE registers are correct. Note that
1211 // we *do* need to model the state as if it changed as while the
1212 // register contents are unchanged, the abstract model can change.
1213 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1214 insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
1215 PrefixTransparent = false;
1216 }
1217
1218 if (RISCVII::hasVLOp(TSFlags)) {
1219 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1220 if (VLOp.isReg()) {
1221 // Erase the AVL operand from the instruction.
1222 VLOp.setReg(RISCV::NoRegister);
1223 VLOp.setIsKill(false);
1224 }
1225 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1226 /*isImp*/ true));
1227 }
1228 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1229 /*isImp*/ true));
1230 }
1231
1232 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1233 MI.modifiesRegister(RISCV::VTYPE))
1234 PrefixTransparent = false;
1235
1236 transferAfter(CurInfo, MI);
1237 }
1238
1239 // If we reach the end of the block and our current info doesn't match the
1240 // expected info, insert a vsetvli to correct.
1241 if (!UseStrictAsserts) {
1242 const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
1243 if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
1244 CurInfo != ExitInfo) {
1245 // Note there's an implicit assumption here that terminators never use
1246 // or modify VL or VTYPE. Also, fallthrough will return end().
1247 auto InsertPt = MBB.getFirstInstrTerminator();
1248 insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
1249 CurInfo);
1250 CurInfo = ExitInfo;
1251 }
1252 }
1253
1254 if (UseStrictAsserts && CurInfo.isValid()) {
1255 const auto &Info = BlockInfo[MBB.getNumber()];
1256 if (CurInfo != Info.Exit) {
1257 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1258 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
1259 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
1260 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
1261 }
1262 assert(CurInfo == Info.Exit &&
1263 "InsertVSETVLI dataflow invariant violated");
1264 }
1265 }
1266
1267 /// Return true if the VL value configured must be equal to the requested one.
hasFixedResult(const VSETVLIInfo & Info,const RISCVSubtarget & ST)1268 static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) {
1269 if (!Info.hasAVLImm())
1270 // VLMAX is always the same value.
1271 // TODO: Could extend to other registers by looking at the associated vreg
1272 // def placement.
1273 return RISCV::X0 == Info.getAVLReg();
1274
1275 unsigned AVL = Info.getAVLImm();
1276 unsigned SEW = Info.getSEW();
1277 unsigned AVLInBits = AVL * SEW;
1278
1279 unsigned LMul;
1280 bool Fractional;
1281 std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL());
1282
1283 if (Fractional)
1284 return ST.getRealMinVLen() / LMul >= AVLInBits;
1285 return ST.getRealMinVLen() * LMul >= AVLInBits;
1286 }
1287
1288 /// Perform simple partial redundancy elimination of the VSETVLI instructions
1289 /// we're about to insert by looking for cases where we can PRE from the
1290 /// beginning of one block to the end of one of its predecessors. Specifically,
1291 /// this is geared to catch the common case of a fixed length vsetvl in a single
1292 /// block loop when it could execute once in the preheader instead.
doPRE(MachineBasicBlock & MBB)1293 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1294 const MachineFunction &MF = *MBB.getParent();
1295 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
1296
1297 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1298 return;
1299
1300 MachineBasicBlock *UnavailablePred = nullptr;
1301 VSETVLIInfo AvailableInfo;
1302 for (MachineBasicBlock *P : MBB.predecessors()) {
1303 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1304 if (PredInfo.isUnknown()) {
1305 if (UnavailablePred)
1306 return;
1307 UnavailablePred = P;
1308 } else if (!AvailableInfo.isValid()) {
1309 AvailableInfo = PredInfo;
1310 } else if (AvailableInfo != PredInfo) {
1311 return;
1312 }
1313 }
1314
1315 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1316 // phase 3.
1317 if (!UnavailablePred || !AvailableInfo.isValid())
1318 return;
1319
1320 // Critical edge - TODO: consider splitting?
1321 if (UnavailablePred->succ_size() != 1)
1322 return;
1323
1324 // If VL can be less than AVL, then we can't reduce the frequency of exec.
1325 if (!hasFixedResult(AvailableInfo, ST))
1326 return;
1327
1328 // Does it actually let us remove an implicit transition in MBB?
1329 bool Found = false;
1330 for (auto &MI : MBB) {
1331 if (isVectorConfigInstr(MI))
1332 return;
1333
1334 const uint64_t TSFlags = MI.getDesc().TSFlags;
1335 if (RISCVII::hasSEWOp(TSFlags)) {
1336 if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI))
1337 return;
1338 Found = true;
1339 break;
1340 }
1341 }
1342 if (!Found)
1343 return;
1344
1345 // Finally, update both data flow state and insert the actual vsetvli.
1346 // Doing both keeps the code in sync with the dataflow results, which
1347 // is critical for correctness of phase 3.
1348 auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit;
1349 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1350 << UnavailablePred->getName() << " with state "
1351 << AvailableInfo << "\n");
1352 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1353 BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1354
1355 // Note there's an implicit assumption here that terminators never use
1356 // or modify VL or VTYPE. Also, fallthrough will return end().
1357 auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1358 insertVSETVLI(*UnavailablePred, InsertPt,
1359 UnavailablePred->findDebugLoc(InsertPt),
1360 AvailableInfo, OldInfo);
1361 }
1362
doUnion(DemandedFields & A,DemandedFields B)1363 static void doUnion(DemandedFields &A, DemandedFields B) {
1364 A.VL |= B.VL;
1365 A.SEW |= B.SEW;
1366 A.LMUL |= B.LMUL;
1367 A.SEWLMULRatio |= B.SEWLMULRatio;
1368 A.TailPolicy |= B.TailPolicy;
1369 A.MaskPolicy |= B.MaskPolicy;
1370 }
1371
1372 // Return true if we can mutate PrevMI's VTYPE to match MI's
1373 // without changing any the fields which have been used.
1374 // TODO: Restructure code to allow code reuse between this and isCompatible
1375 // above.
canMutatePriorConfig(const MachineInstr & PrevMI,const MachineInstr & MI,const DemandedFields & Used)1376 static bool canMutatePriorConfig(const MachineInstr &PrevMI,
1377 const MachineInstr &MI,
1378 const DemandedFields &Used) {
1379 // TODO: Extend this to handle cases where VL does change, but VL
1380 // has not been used. (e.g. over a vmv.x.s)
1381 if (!isVLPreservingConfig(MI))
1382 // Note: `vsetvli x0, x0, vtype' is the canonical instruction
1383 // for this case. If you find yourself wanting to add other forms
1384 // to this "unused VTYPE" case, we're probably missing a
1385 // canonicalization earlier.
1386 return false;
1387
1388 if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
1389 return false;
1390
1391 auto PriorVType = PrevMI.getOperand(2).getImm();
1392 auto VType = MI.getOperand(2).getImm();
1393 return areCompatibleVTYPEs(PriorVType, VType, Used);
1394 }
1395
doLocalPostpass(MachineBasicBlock & MBB)1396 void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
1397 MachineInstr *PrevMI = nullptr;
1398 DemandedFields Used;
1399 SmallVector<MachineInstr*> ToDelete;
1400 for (MachineInstr &MI : MBB) {
1401 // Note: Must be *before* vsetvli handling to account for config cases
1402 // which only change some subfields.
1403 doUnion(Used, getDemanded(MI));
1404
1405 if (!isVectorConfigInstr(MI))
1406 continue;
1407
1408 if (PrevMI) {
1409 if (!Used.VL && !Used.usedVTYPE()) {
1410 ToDelete.push_back(PrevMI);
1411 // fallthrough
1412 } else if (canMutatePriorConfig(*PrevMI, MI, Used)) {
1413 PrevMI->getOperand(2).setImm(MI.getOperand(2).getImm());
1414 ToDelete.push_back(&MI);
1415 // Leave PrevMI unchanged
1416 continue;
1417 }
1418 }
1419 PrevMI = &MI;
1420 Used = getDemanded(MI);
1421 Register VRegDef = MI.getOperand(0).getReg();
1422 if (VRegDef != RISCV::X0 &&
1423 !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
1424 Used.VL = true;
1425 }
1426
1427 for (auto *MI : ToDelete)
1428 MI->eraseFromParent();
1429 }
1430
insertReadVL(MachineBasicBlock & MBB)1431 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1432 for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1433 MachineInstr &MI = *I++;
1434 if (RISCV::isFaultFirstLoad(MI)) {
1435 Register VLOutput = MI.getOperand(1).getReg();
1436 if (!MRI->use_nodbg_empty(VLOutput))
1437 BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
1438 VLOutput);
1439 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1440 MI.getOperand(1).setReg(RISCV::X0);
1441 }
1442 }
1443 }
1444
runOnMachineFunction(MachineFunction & MF)1445 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1446 // Skip if the vector extension is not enabled.
1447 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
1448 if (!ST.hasVInstructions())
1449 return false;
1450
1451 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1452
1453 TII = ST.getInstrInfo();
1454 MRI = &MF.getRegInfo();
1455
1456 assert(BlockInfo.empty() && "Expect empty block infos");
1457 BlockInfo.resize(MF.getNumBlockIDs());
1458
1459 bool HaveVectorOp = false;
1460
1461 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1462 for (const MachineBasicBlock &MBB : MF) {
1463 HaveVectorOp |= computeVLVTYPEChanges(MBB);
1464 // Initial exit state is whatever change we found in the block.
1465 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1466 BBInfo.Exit = BBInfo.Change;
1467 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1468 << " is " << BBInfo.Exit << "\n");
1469
1470 }
1471
1472 // If we didn't find any instructions that need VSETVLI, we're done.
1473 if (!HaveVectorOp) {
1474 BlockInfo.clear();
1475 return false;
1476 }
1477
1478 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1479 // blocks to the list here, but will also add any that need to be revisited
1480 // during Phase 2 processing.
1481 for (const MachineBasicBlock &MBB : MF) {
1482 WorkList.push(&MBB);
1483 BlockInfo[MBB.getNumber()].InQueue = true;
1484 }
1485 while (!WorkList.empty()) {
1486 const MachineBasicBlock &MBB = *WorkList.front();
1487 WorkList.pop();
1488 computeIncomingVLVTYPE(MBB);
1489 }
1490
1491 // Perform partial redundancy elimination of vsetvli transitions.
1492 for (MachineBasicBlock &MBB : MF)
1493 doPRE(MBB);
1494
1495 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1496 // Phase 2 information to avoid adding vsetvlis before the first vector
1497 // instruction in the block if the VL/VTYPE is satisfied by its
1498 // predecessors.
1499 for (MachineBasicBlock &MBB : MF)
1500 emitVSETVLIs(MBB);
1501
1502 // Now that all vsetvlis are explicit, go through and do block local
1503 // DSE and peephole based demanded fields based transforms. Note that
1504 // this *must* be done outside the main dataflow so long as we allow
1505 // any cross block analysis within the dataflow. We can't have both
1506 // demanded fields based mutation and non-local analysis in the
1507 // dataflow at the same time without introducing inconsistencies.
1508 for (MachineBasicBlock &MBB : MF)
1509 doLocalPostpass(MBB);
1510
1511 // Once we're fully done rewriting all the instructions, do a final pass
1512 // through to check for VSETVLIs which write to an unused destination.
1513 // For the non X0, X0 variant, we can replace the destination register
1514 // with X0 to reduce register pressure. This is really a generic
1515 // optimization which can be applied to any dead def (TODO: generalize).
1516 for (MachineBasicBlock &MBB : MF) {
1517 for (MachineInstr &MI : MBB) {
1518 if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
1519 MI.getOpcode() == RISCV::PseudoVSETIVLI) {
1520 Register VRegDef = MI.getOperand(0).getReg();
1521 if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef))
1522 MI.getOperand(0).setReg(RISCV::X0);
1523 }
1524 }
1525 }
1526
1527 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1528 // of VLEFF/VLSEGFF.
1529 for (MachineBasicBlock &MBB : MF)
1530 insertReadVL(MBB);
1531
1532 BlockInfo.clear();
1533 return HaveVectorOp;
1534 }
1535
1536 /// Returns an instance of the Insert VSETVLI pass.
createRISCVInsertVSETVLIPass()1537 FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
1538 return new RISCVInsertVSETVLI();
1539 }
1540