1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a function pass that inserts VSETVLI instructions where 10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL 11 // instructions. 12 // 13 // This pass consists of 3 phases: 14 // 15 // Phase 1 collects how each basic block affects VL/VTYPE. 16 // 17 // Phase 2 uses the information from phase 1 to do a data flow analysis to 18 // propagate the VL/VTYPE changes through the function. This gives us the 19 // VL/VTYPE at the start of each basic block. 20 // 21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from 22 // phase 2 is used to prevent inserting a VSETVLI before the first vector 23 // instruction in the block if possible. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "RISCV.h" 28 #include "RISCVSubtarget.h" 29 #include "llvm/CodeGen/LiveIntervals.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include <queue> 32 using namespace llvm; 33 34 #define DEBUG_TYPE "riscv-insert-vsetvli" 35 #define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass" 36 37 static cl::opt<bool> DisableInsertVSETVLPHIOpt( 38 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, 39 cl::desc("Disable looking through phis when inserting vsetvlis.")); 40 41 static cl::opt<bool> UseStrictAsserts( 42 "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden, 43 cl::desc("Enable strict assertion checking for the dataflow algorithm")); 44 45 namespace { 46 47 static unsigned getVLOpNum(const MachineInstr &MI) { 48 return RISCVII::getVLOpNum(MI.getDesc()); 49 } 50 51 static unsigned getSEWOpNum(const MachineInstr &MI) { 52 return RISCVII::getSEWOpNum(MI.getDesc()); 53 } 54 55 static bool isScalarMoveInstr(const MachineInstr &MI) { 56 switch (MI.getOpcode()) { 57 default: 58 return false; 59 case RISCV::PseudoVMV_S_X_M1: 60 case RISCV::PseudoVMV_S_X_M2: 61 case RISCV::PseudoVMV_S_X_M4: 62 case RISCV::PseudoVMV_S_X_M8: 63 case RISCV::PseudoVMV_S_X_MF2: 64 case RISCV::PseudoVMV_S_X_MF4: 65 case RISCV::PseudoVMV_S_X_MF8: 66 case RISCV::PseudoVFMV_S_F16_M1: 67 case RISCV::PseudoVFMV_S_F16_M2: 68 case RISCV::PseudoVFMV_S_F16_M4: 69 case RISCV::PseudoVFMV_S_F16_M8: 70 case RISCV::PseudoVFMV_S_F16_MF2: 71 case RISCV::PseudoVFMV_S_F16_MF4: 72 case RISCV::PseudoVFMV_S_F32_M1: 73 case RISCV::PseudoVFMV_S_F32_M2: 74 case RISCV::PseudoVFMV_S_F32_M4: 75 case RISCV::PseudoVFMV_S_F32_M8: 76 case RISCV::PseudoVFMV_S_F32_MF2: 77 case RISCV::PseudoVFMV_S_F64_M1: 78 case RISCV::PseudoVFMV_S_F64_M2: 79 case RISCV::PseudoVFMV_S_F64_M4: 80 case RISCV::PseudoVFMV_S_F64_M8: 81 return true; 82 } 83 } 84 85 86 class VSETVLIInfo { 87 union { 88 Register AVLReg; 89 unsigned AVLImm; 90 }; 91 92 enum : uint8_t { 93 Uninitialized, 94 AVLIsReg, 95 AVLIsImm, 96 Unknown, 97 } State = Uninitialized; 98 99 // Fields from VTYPE. 100 RISCVII::VLMUL VLMul = RISCVII::LMUL_1; 101 uint8_t SEW = 0; 102 uint8_t TailAgnostic : 1; 103 uint8_t MaskAgnostic : 1; 104 uint8_t SEWLMULRatioOnly : 1; 105 106 public: 107 VSETVLIInfo() 108 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), 109 SEWLMULRatioOnly(false) {} 110 111 static VSETVLIInfo getUnknown() { 112 VSETVLIInfo Info; 113 Info.setUnknown(); 114 return Info; 115 } 116 117 bool isValid() const { return State != Uninitialized; } 118 void setUnknown() { State = Unknown; } 119 bool isUnknown() const { return State == Unknown; } 120 121 void setAVLReg(Register Reg) { 122 AVLReg = Reg; 123 State = AVLIsReg; 124 } 125 126 void setAVLImm(unsigned Imm) { 127 AVLImm = Imm; 128 State = AVLIsImm; 129 } 130 131 bool hasAVLImm() const { return State == AVLIsImm; } 132 bool hasAVLReg() const { return State == AVLIsReg; } 133 Register getAVLReg() const { 134 assert(hasAVLReg()); 135 return AVLReg; 136 } 137 unsigned getAVLImm() const { 138 assert(hasAVLImm()); 139 return AVLImm; 140 } 141 142 unsigned getSEW() const { return SEW; } 143 RISCVII::VLMUL getVLMUL() const { return VLMul; } 144 145 bool hasZeroAVL() const { 146 if (hasAVLImm()) 147 return getAVLImm() == 0; 148 return false; 149 } 150 bool hasNonZeroAVL() const { 151 if (hasAVLImm()) 152 return getAVLImm() > 0; 153 if (hasAVLReg()) 154 return getAVLReg() == RISCV::X0; 155 return false; 156 } 157 158 bool hasSameAVL(const VSETVLIInfo &Other) const { 159 assert(isValid() && Other.isValid() && 160 "Can't compare invalid VSETVLIInfos"); 161 assert(!isUnknown() && !Other.isUnknown() && 162 "Can't compare AVL in unknown state"); 163 if (hasAVLReg() && Other.hasAVLReg()) 164 return getAVLReg() == Other.getAVLReg(); 165 166 if (hasAVLImm() && Other.hasAVLImm()) 167 return getAVLImm() == Other.getAVLImm(); 168 169 return false; 170 } 171 172 void setVTYPE(unsigned VType) { 173 assert(isValid() && !isUnknown() && 174 "Can't set VTYPE for uninitialized or unknown"); 175 VLMul = RISCVVType::getVLMUL(VType); 176 SEW = RISCVVType::getSEW(VType); 177 TailAgnostic = RISCVVType::isTailAgnostic(VType); 178 MaskAgnostic = RISCVVType::isMaskAgnostic(VType); 179 } 180 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) { 181 assert(isValid() && !isUnknown() && 182 "Can't set VTYPE for uninitialized or unknown"); 183 VLMul = L; 184 SEW = S; 185 TailAgnostic = TA; 186 MaskAgnostic = MA; 187 } 188 189 unsigned encodeVTYPE() const { 190 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && 191 "Can't encode VTYPE for uninitialized or unknown"); 192 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 193 } 194 195 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } 196 197 bool hasSameSEW(const VSETVLIInfo &Other) const { 198 assert(isValid() && Other.isValid() && 199 "Can't compare invalid VSETVLIInfos"); 200 assert(!isUnknown() && !Other.isUnknown() && 201 "Can't compare VTYPE in unknown state"); 202 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && 203 "Can't compare when only LMUL/SEW ratio is valid."); 204 return SEW == Other.SEW; 205 } 206 207 bool hasSameVTYPE(const VSETVLIInfo &Other) const { 208 assert(isValid() && Other.isValid() && 209 "Can't compare invalid VSETVLIInfos"); 210 assert(!isUnknown() && !Other.isUnknown() && 211 "Can't compare VTYPE in unknown state"); 212 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && 213 "Can't compare when only LMUL/SEW ratio is valid."); 214 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == 215 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, 216 Other.MaskAgnostic); 217 } 218 219 static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) { 220 unsigned LMul; 221 bool Fractional; 222 std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul); 223 224 // Convert LMul to a fixed point value with 3 fractional bits. 225 LMul = Fractional ? (8 / LMul) : (LMul * 8); 226 227 assert(SEW >= 8 && "Unexpected SEW value"); 228 return (SEW * 8) / LMul; 229 } 230 231 unsigned getSEWLMULRatio() const { 232 assert(isValid() && !isUnknown() && 233 "Can't use VTYPE for uninitialized or unknown"); 234 return getSEWLMULRatio(SEW, VLMul); 235 } 236 237 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. 238 // Note that having the same VLMAX ensures that both share the same 239 // function from AVL to VL; that is, they must produce the same VL value 240 // for any given AVL value. 241 bool hasSameVLMAX(const VSETVLIInfo &Other) const { 242 assert(isValid() && Other.isValid() && 243 "Can't compare invalid VSETVLIInfos"); 244 assert(!isUnknown() && !Other.isUnknown() && 245 "Can't compare VTYPE in unknown state"); 246 return getSEWLMULRatio() == Other.getSEWLMULRatio(); 247 } 248 249 bool hasSamePolicy(const VSETVLIInfo &Other) const { 250 assert(isValid() && Other.isValid() && 251 "Can't compare invalid VSETVLIInfos"); 252 assert(!isUnknown() && !Other.isUnknown() && 253 "Can't compare VTYPE in unknown state"); 254 return TailAgnostic == Other.TailAgnostic && 255 MaskAgnostic == Other.MaskAgnostic; 256 } 257 258 bool hasCompatibleVTYPE(const MachineInstr &MI, 259 const VSETVLIInfo &Require) const { 260 // Simple case, see if full VTYPE matches. 261 if (hasSameVTYPE(Require)) 262 return true; 263 264 // If this is a mask reg operation, it only cares about VLMAX. 265 // FIXME: Mask reg operations are probably ok if "this" VLMAX is larger 266 // than "Require". 267 // FIXME: The policy bits can probably be ignored for mask reg operations. 268 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 269 // A Log2SEW of 0 is an operation on mask registers only. 270 const bool MaskRegOp = Log2SEW == 0; 271 if (MaskRegOp && hasSameVLMAX(Require) && 272 TailAgnostic == Require.TailAgnostic && 273 MaskAgnostic == Require.MaskAgnostic) 274 return true; 275 276 return false; 277 } 278 279 // Determine whether the vector instructions requirements represented by 280 // Require are compatible with the previous vsetvli instruction represented 281 // by this. MI is the instruction whose requirements we're considering. 282 bool isCompatible(const MachineInstr &MI, const VSETVLIInfo &Require) const { 283 assert(isValid() && Require.isValid() && 284 "Can't compare invalid VSETVLIInfos"); 285 assert(!Require.SEWLMULRatioOnly && 286 "Expected a valid VTYPE for instruction!"); 287 // Nothing is compatible with Unknown. 288 if (isUnknown() || Require.isUnknown()) 289 return false; 290 291 // If only our VLMAX ratio is valid, then this isn't compatible. 292 if (SEWLMULRatioOnly) 293 return false; 294 295 // If the instruction doesn't need an AVLReg and the SEW matches, consider 296 // it compatible. 297 if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister) 298 if (SEW == Require.SEW) 299 return true; 300 301 // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0. 302 // So it's compatible when we could make sure that both VL be the same 303 // situation. 304 if (isScalarMoveInstr(MI) && Require.hasAVLImm() && 305 ((hasNonZeroAVL() && Require.hasNonZeroAVL()) || 306 (hasZeroAVL() && Require.hasZeroAVL())) && 307 hasSameSEW(Require) && hasSamePolicy(Require)) 308 return true; 309 310 // The AVL must match. 311 if (!hasSameAVL(Require)) 312 return false; 313 314 if (hasCompatibleVTYPE(MI, Require)) 315 return true; 316 317 // Store instructions don't use the policy fields. 318 const bool StoreOp = MI.getNumExplicitDefs() == 0; 319 if (StoreOp && VLMul == Require.VLMul && SEW == Require.SEW) 320 return true; 321 322 // Anything else is not compatible. 323 return false; 324 } 325 326 bool isCompatibleWithLoadStoreEEW(unsigned EEW, 327 const VSETVLIInfo &Require) const { 328 assert(isValid() && Require.isValid() && 329 "Can't compare invalid VSETVLIInfos"); 330 assert(!Require.SEWLMULRatioOnly && 331 "Expected a valid VTYPE for instruction!"); 332 assert(EEW == Require.SEW && "Mismatched EEW/SEW for store"); 333 334 if (isUnknown() || hasSEWLMULRatioOnly()) 335 return false; 336 337 if (!hasSameAVL(Require)) 338 return false; 339 340 return getSEWLMULRatio() == getSEWLMULRatio(EEW, Require.VLMul); 341 } 342 343 bool operator==(const VSETVLIInfo &Other) const { 344 // Uninitialized is only equal to another Uninitialized. 345 if (!isValid()) 346 return !Other.isValid(); 347 if (!Other.isValid()) 348 return !isValid(); 349 350 // Unknown is only equal to another Unknown. 351 if (isUnknown()) 352 return Other.isUnknown(); 353 if (Other.isUnknown()) 354 return isUnknown(); 355 356 if (!hasSameAVL(Other)) 357 return false; 358 359 // If the SEWLMULRatioOnly bits are different, then they aren't equal. 360 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly) 361 return false; 362 363 // If only the VLMAX is valid, check that it is the same. 364 if (SEWLMULRatioOnly) 365 return hasSameVLMAX(Other); 366 367 // If the full VTYPE is valid, check that it is the same. 368 return hasSameVTYPE(Other); 369 } 370 371 bool operator!=(const VSETVLIInfo &Other) const { 372 return !(*this == Other); 373 } 374 375 // Calculate the VSETVLIInfo visible to a block assuming this and Other are 376 // both predecessors. 377 VSETVLIInfo intersect(const VSETVLIInfo &Other) const { 378 // If the new value isn't valid, ignore it. 379 if (!Other.isValid()) 380 return *this; 381 382 // If this value isn't valid, this must be the first predecessor, use it. 383 if (!isValid()) 384 return Other; 385 386 // If either is unknown, the result is unknown. 387 if (isUnknown() || Other.isUnknown()) 388 return VSETVLIInfo::getUnknown(); 389 390 // If we have an exact, match return this. 391 if (*this == Other) 392 return *this; 393 394 // Not an exact match, but maybe the AVL and VLMAX are the same. If so, 395 // return an SEW/LMUL ratio only value. 396 if (hasSameAVL(Other) && hasSameVLMAX(Other)) { 397 VSETVLIInfo MergeInfo = *this; 398 MergeInfo.SEWLMULRatioOnly = true; 399 return MergeInfo; 400 } 401 402 // Otherwise the result is unknown. 403 return VSETVLIInfo::getUnknown(); 404 } 405 406 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 407 /// Support for debugging, callable in GDB: V->dump() 408 LLVM_DUMP_METHOD void dump() const { 409 print(dbgs()); 410 dbgs() << "\n"; 411 } 412 413 /// Implement operator<<. 414 /// @{ 415 void print(raw_ostream &OS) const { 416 OS << "{"; 417 if (!isValid()) 418 OS << "Uninitialized"; 419 if (isUnknown()) 420 OS << "unknown";; 421 if (hasAVLReg()) 422 OS << "AVLReg=" << (unsigned)AVLReg; 423 if (hasAVLImm()) 424 OS << "AVLImm=" << (unsigned)AVLImm; 425 OS << ", " 426 << "VLMul=" << (unsigned)VLMul << ", " 427 << "SEW=" << (unsigned)SEW << ", " 428 << "TailAgnostic=" << (bool)TailAgnostic << ", " 429 << "MaskAgnostic=" << (bool)MaskAgnostic << ", " 430 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; 431 } 432 #endif 433 }; 434 435 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 436 LLVM_ATTRIBUTE_USED 437 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { 438 V.print(OS); 439 return OS; 440 } 441 #endif 442 443 struct BlockData { 444 // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers 445 // made by this block. Calculated in Phase 1. 446 VSETVLIInfo Change; 447 448 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this 449 // block. Calculated in Phase 2. 450 VSETVLIInfo Exit; 451 452 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor 453 // blocks. Calculated in Phase 2, and used by Phase 3. 454 VSETVLIInfo Pred; 455 456 // Keeps track of whether the block is already in the queue. 457 bool InQueue = false; 458 459 BlockData() = default; 460 }; 461 462 class RISCVInsertVSETVLI : public MachineFunctionPass { 463 const TargetInstrInfo *TII; 464 MachineRegisterInfo *MRI; 465 466 std::vector<BlockData> BlockInfo; 467 std::queue<const MachineBasicBlock *> WorkList; 468 469 public: 470 static char ID; 471 472 RISCVInsertVSETVLI() : MachineFunctionPass(ID) { 473 initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry()); 474 } 475 bool runOnMachineFunction(MachineFunction &MF) override; 476 477 void getAnalysisUsage(AnalysisUsage &AU) const override { 478 AU.setPreservesCFG(); 479 MachineFunctionPass::getAnalysisUsage(AU); 480 } 481 482 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } 483 484 private: 485 bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, 486 const VSETVLIInfo &CurInfo) const; 487 bool needVSETVLIPHI(const VSETVLIInfo &Require, 488 const MachineBasicBlock &MBB) const; 489 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 490 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 491 void insertVSETVLI(MachineBasicBlock &MBB, 492 MachineBasicBlock::iterator InsertPt, DebugLoc DL, 493 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 494 495 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB); 496 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); 497 void emitVSETVLIs(MachineBasicBlock &MBB); 498 void doLocalPrepass(MachineBasicBlock &MBB); 499 void doLocalPostpass(MachineBasicBlock &MBB); 500 void doPRE(MachineBasicBlock &MBB); 501 void insertReadVL(MachineBasicBlock &MBB); 502 }; 503 504 } // end anonymous namespace 505 506 char RISCVInsertVSETVLI::ID = 0; 507 508 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, 509 false, false) 510 511 static bool isVectorConfigInstr(const MachineInstr &MI) { 512 return MI.getOpcode() == RISCV::PseudoVSETVLI || 513 MI.getOpcode() == RISCV::PseudoVSETVLIX0 || 514 MI.getOpcode() == RISCV::PseudoVSETIVLI; 515 } 516 517 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves 518 /// VL and only sets VTYPE. 519 static bool isVLPreservingConfig(const MachineInstr &MI) { 520 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) 521 return false; 522 assert(RISCV::X0 == MI.getOperand(1).getReg()); 523 return RISCV::X0 == MI.getOperand(0).getReg(); 524 } 525 526 static MachineInstr *elideCopies(MachineInstr *MI, 527 const MachineRegisterInfo *MRI) { 528 while (true) { 529 if (!MI->isFullCopy()) 530 return MI; 531 if (!Register::isVirtualRegister(MI->getOperand(1).getReg())) 532 return nullptr; 533 MI = MRI->getVRegDef(MI->getOperand(1).getReg()); 534 if (!MI) 535 return nullptr; 536 } 537 } 538 539 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, 540 const MachineRegisterInfo *MRI) { 541 VSETVLIInfo InstrInfo; 542 543 // If the instruction has policy argument, use the argument. 544 // If there is no policy argument, default to tail agnostic unless the 545 // destination is tied to a source. Unless the source is undef. In that case 546 // the user would have some control over the policy values. 547 bool TailAgnostic = true; 548 bool UsesMaskPolicy = RISCVII::usesMaskPolicy(TSFlags); 549 // FIXME: Could we look at the above or below instructions to choose the 550 // matched mask policy to reduce vsetvli instructions? Default mask policy is 551 // agnostic if instructions use mask policy, otherwise is undisturbed. Because 552 // most mask operations are mask undisturbed, so we could possibly reduce the 553 // vsetvli between mask and nomasked instruction sequence. 554 bool MaskAgnostic = UsesMaskPolicy; 555 unsigned UseOpIdx; 556 if (RISCVII::hasVecPolicyOp(TSFlags)) { 557 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); 558 uint64_t Policy = Op.getImm(); 559 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && 560 "Invalid Policy Value"); 561 // Although in some cases, mismatched passthru/maskedoff with policy value 562 // does not make sense (ex. tied operand is IMPLICIT_DEF with non-TAMA 563 // policy, or tied operand is not IMPLICIT_DEF with TAMA policy), but users 564 // have set the policy value explicitly, so compiler would not fix it. 565 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; 566 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; 567 } else if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 568 TailAgnostic = false; 569 if (UsesMaskPolicy) 570 MaskAgnostic = false; 571 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 572 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 573 MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg()); 574 if (UseMI) { 575 UseMI = elideCopies(UseMI, MRI); 576 if (UseMI && UseMI->isImplicitDef()) { 577 TailAgnostic = true; 578 if (UsesMaskPolicy) 579 MaskAgnostic = true; 580 } 581 } 582 // Some pseudo instructions force a tail agnostic policy despite having a 583 // tied def. 584 if (RISCVII::doesForceTailAgnostic(TSFlags)) 585 TailAgnostic = true; 586 } 587 588 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); 589 590 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 591 // A Log2SEW of 0 is an operation on mask registers only. 592 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 593 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 594 595 if (RISCVII::hasVLOp(TSFlags)) { 596 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 597 if (VLOp.isImm()) { 598 int64_t Imm = VLOp.getImm(); 599 // Conver the VLMax sentintel to X0 register. 600 if (Imm == RISCV::VLMaxSentinel) 601 InstrInfo.setAVLReg(RISCV::X0); 602 else 603 InstrInfo.setAVLImm(Imm); 604 } else { 605 InstrInfo.setAVLReg(VLOp.getReg()); 606 } 607 } else { 608 InstrInfo.setAVLReg(RISCV::NoRegister); 609 } 610 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 611 612 return InstrInfo; 613 } 614 615 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 616 const VSETVLIInfo &Info, 617 const VSETVLIInfo &PrevInfo) { 618 DebugLoc DL = MI.getDebugLoc(); 619 insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo); 620 } 621 622 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, 623 MachineBasicBlock::iterator InsertPt, DebugLoc DL, 624 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { 625 626 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same 627 // VLMAX. 628 if (PrevInfo.isValid() && !PrevInfo.isUnknown() && 629 Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { 630 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 631 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 632 .addReg(RISCV::X0, RegState::Kill) 633 .addImm(Info.encodeVTYPE()) 634 .addReg(RISCV::VL, RegState::Implicit); 635 return; 636 } 637 638 if (Info.hasAVLImm()) { 639 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) 640 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 641 .addImm(Info.getAVLImm()) 642 .addImm(Info.encodeVTYPE()); 643 return; 644 } 645 646 Register AVLReg = Info.getAVLReg(); 647 if (AVLReg == RISCV::NoRegister) { 648 // We can only use x0, x0 if there's no chance of the vtype change causing 649 // the previous vl to become invalid. 650 if (PrevInfo.isValid() && !PrevInfo.isUnknown() && 651 Info.hasSameVLMAX(PrevInfo)) { 652 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 653 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 654 .addReg(RISCV::X0, RegState::Kill) 655 .addImm(Info.encodeVTYPE()) 656 .addReg(RISCV::VL, RegState::Implicit); 657 return; 658 } 659 // Otherwise use an AVL of 0 to avoid depending on previous vl. 660 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) 661 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 662 .addImm(0) 663 .addImm(Info.encodeVTYPE()); 664 return; 665 } 666 667 if (AVLReg.isVirtual()) 668 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); 669 670 // Use X0 as the DestReg unless AVLReg is X0. We also need to change the 671 // opcode if the AVLReg is X0 as they have different register classes for 672 // the AVL operand. 673 Register DestReg = RISCV::X0; 674 unsigned Opcode = RISCV::PseudoVSETVLI; 675 if (AVLReg == RISCV::X0) { 676 DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); 677 Opcode = RISCV::PseudoVSETVLIX0; 678 } 679 BuildMI(MBB, InsertPt, DL, TII->get(Opcode)) 680 .addReg(DestReg, RegState::Define | RegState::Dead) 681 .addReg(AVLReg) 682 .addImm(Info.encodeVTYPE()); 683 } 684 685 // Return a VSETVLIInfo representing the changes made by this VSETVLI or 686 // VSETIVLI instruction. 687 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { 688 VSETVLIInfo NewInfo; 689 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { 690 NewInfo.setAVLImm(MI.getOperand(1).getImm()); 691 } else { 692 assert(MI.getOpcode() == RISCV::PseudoVSETVLI || 693 MI.getOpcode() == RISCV::PseudoVSETVLIX0); 694 Register AVLReg = MI.getOperand(1).getReg(); 695 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && 696 "Can't handle X0, X0 vsetvli yet"); 697 NewInfo.setAVLReg(AVLReg); 698 } 699 NewInfo.setVTYPE(MI.getOperand(2).getImm()); 700 701 return NewInfo; 702 } 703 704 bool canSkipVSETVLIForLoadStore(const MachineInstr &MI, 705 const VSETVLIInfo &Require, 706 const VSETVLIInfo &CurInfo) { 707 unsigned EEW; 708 switch (MI.getOpcode()) { 709 default: 710 return false; 711 case RISCV::PseudoVLE8_V_M1: 712 case RISCV::PseudoVLE8_V_M1_MASK: 713 case RISCV::PseudoVLE8_V_M2: 714 case RISCV::PseudoVLE8_V_M2_MASK: 715 case RISCV::PseudoVLE8_V_M4: 716 case RISCV::PseudoVLE8_V_M4_MASK: 717 case RISCV::PseudoVLE8_V_M8: 718 case RISCV::PseudoVLE8_V_M8_MASK: 719 case RISCV::PseudoVLE8_V_MF2: 720 case RISCV::PseudoVLE8_V_MF2_MASK: 721 case RISCV::PseudoVLE8_V_MF4: 722 case RISCV::PseudoVLE8_V_MF4_MASK: 723 case RISCV::PseudoVLE8_V_MF8: 724 case RISCV::PseudoVLE8_V_MF8_MASK: 725 case RISCV::PseudoVLSE8_V_M1: 726 case RISCV::PseudoVLSE8_V_M1_MASK: 727 case RISCV::PseudoVLSE8_V_M2: 728 case RISCV::PseudoVLSE8_V_M2_MASK: 729 case RISCV::PseudoVLSE8_V_M4: 730 case RISCV::PseudoVLSE8_V_M4_MASK: 731 case RISCV::PseudoVLSE8_V_M8: 732 case RISCV::PseudoVLSE8_V_M8_MASK: 733 case RISCV::PseudoVLSE8_V_MF2: 734 case RISCV::PseudoVLSE8_V_MF2_MASK: 735 case RISCV::PseudoVLSE8_V_MF4: 736 case RISCV::PseudoVLSE8_V_MF4_MASK: 737 case RISCV::PseudoVLSE8_V_MF8: 738 case RISCV::PseudoVLSE8_V_MF8_MASK: 739 case RISCV::PseudoVSE8_V_M1: 740 case RISCV::PseudoVSE8_V_M1_MASK: 741 case RISCV::PseudoVSE8_V_M2: 742 case RISCV::PseudoVSE8_V_M2_MASK: 743 case RISCV::PseudoVSE8_V_M4: 744 case RISCV::PseudoVSE8_V_M4_MASK: 745 case RISCV::PseudoVSE8_V_M8: 746 case RISCV::PseudoVSE8_V_M8_MASK: 747 case RISCV::PseudoVSE8_V_MF2: 748 case RISCV::PseudoVSE8_V_MF2_MASK: 749 case RISCV::PseudoVSE8_V_MF4: 750 case RISCV::PseudoVSE8_V_MF4_MASK: 751 case RISCV::PseudoVSE8_V_MF8: 752 case RISCV::PseudoVSE8_V_MF8_MASK: 753 case RISCV::PseudoVSSE8_V_M1: 754 case RISCV::PseudoVSSE8_V_M1_MASK: 755 case RISCV::PseudoVSSE8_V_M2: 756 case RISCV::PseudoVSSE8_V_M2_MASK: 757 case RISCV::PseudoVSSE8_V_M4: 758 case RISCV::PseudoVSSE8_V_M4_MASK: 759 case RISCV::PseudoVSSE8_V_M8: 760 case RISCV::PseudoVSSE8_V_M8_MASK: 761 case RISCV::PseudoVSSE8_V_MF2: 762 case RISCV::PseudoVSSE8_V_MF2_MASK: 763 case RISCV::PseudoVSSE8_V_MF4: 764 case RISCV::PseudoVSSE8_V_MF4_MASK: 765 case RISCV::PseudoVSSE8_V_MF8: 766 case RISCV::PseudoVSSE8_V_MF8_MASK: 767 EEW = 8; 768 break; 769 case RISCV::PseudoVLE16_V_M1: 770 case RISCV::PseudoVLE16_V_M1_MASK: 771 case RISCV::PseudoVLE16_V_M2: 772 case RISCV::PseudoVLE16_V_M2_MASK: 773 case RISCV::PseudoVLE16_V_M4: 774 case RISCV::PseudoVLE16_V_M4_MASK: 775 case RISCV::PseudoVLE16_V_M8: 776 case RISCV::PseudoVLE16_V_M8_MASK: 777 case RISCV::PseudoVLE16_V_MF2: 778 case RISCV::PseudoVLE16_V_MF2_MASK: 779 case RISCV::PseudoVLE16_V_MF4: 780 case RISCV::PseudoVLE16_V_MF4_MASK: 781 case RISCV::PseudoVLSE16_V_M1: 782 case RISCV::PseudoVLSE16_V_M1_MASK: 783 case RISCV::PseudoVLSE16_V_M2: 784 case RISCV::PseudoVLSE16_V_M2_MASK: 785 case RISCV::PseudoVLSE16_V_M4: 786 case RISCV::PseudoVLSE16_V_M4_MASK: 787 case RISCV::PseudoVLSE16_V_M8: 788 case RISCV::PseudoVLSE16_V_M8_MASK: 789 case RISCV::PseudoVLSE16_V_MF2: 790 case RISCV::PseudoVLSE16_V_MF2_MASK: 791 case RISCV::PseudoVLSE16_V_MF4: 792 case RISCV::PseudoVLSE16_V_MF4_MASK: 793 case RISCV::PseudoVSE16_V_M1: 794 case RISCV::PseudoVSE16_V_M1_MASK: 795 case RISCV::PseudoVSE16_V_M2: 796 case RISCV::PseudoVSE16_V_M2_MASK: 797 case RISCV::PseudoVSE16_V_M4: 798 case RISCV::PseudoVSE16_V_M4_MASK: 799 case RISCV::PseudoVSE16_V_M8: 800 case RISCV::PseudoVSE16_V_M8_MASK: 801 case RISCV::PseudoVSE16_V_MF2: 802 case RISCV::PseudoVSE16_V_MF2_MASK: 803 case RISCV::PseudoVSE16_V_MF4: 804 case RISCV::PseudoVSE16_V_MF4_MASK: 805 case RISCV::PseudoVSSE16_V_M1: 806 case RISCV::PseudoVSSE16_V_M1_MASK: 807 case RISCV::PseudoVSSE16_V_M2: 808 case RISCV::PseudoVSSE16_V_M2_MASK: 809 case RISCV::PseudoVSSE16_V_M4: 810 case RISCV::PseudoVSSE16_V_M4_MASK: 811 case RISCV::PseudoVSSE16_V_M8: 812 case RISCV::PseudoVSSE16_V_M8_MASK: 813 case RISCV::PseudoVSSE16_V_MF2: 814 case RISCV::PseudoVSSE16_V_MF2_MASK: 815 case RISCV::PseudoVSSE16_V_MF4: 816 case RISCV::PseudoVSSE16_V_MF4_MASK: 817 EEW = 16; 818 break; 819 case RISCV::PseudoVLE32_V_M1: 820 case RISCV::PseudoVLE32_V_M1_MASK: 821 case RISCV::PseudoVLE32_V_M2: 822 case RISCV::PseudoVLE32_V_M2_MASK: 823 case RISCV::PseudoVLE32_V_M4: 824 case RISCV::PseudoVLE32_V_M4_MASK: 825 case RISCV::PseudoVLE32_V_M8: 826 case RISCV::PseudoVLE32_V_M8_MASK: 827 case RISCV::PseudoVLE32_V_MF2: 828 case RISCV::PseudoVLE32_V_MF2_MASK: 829 case RISCV::PseudoVLSE32_V_M1: 830 case RISCV::PseudoVLSE32_V_M1_MASK: 831 case RISCV::PseudoVLSE32_V_M2: 832 case RISCV::PseudoVLSE32_V_M2_MASK: 833 case RISCV::PseudoVLSE32_V_M4: 834 case RISCV::PseudoVLSE32_V_M4_MASK: 835 case RISCV::PseudoVLSE32_V_M8: 836 case RISCV::PseudoVLSE32_V_M8_MASK: 837 case RISCV::PseudoVLSE32_V_MF2: 838 case RISCV::PseudoVLSE32_V_MF2_MASK: 839 case RISCV::PseudoVSE32_V_M1: 840 case RISCV::PseudoVSE32_V_M1_MASK: 841 case RISCV::PseudoVSE32_V_M2: 842 case RISCV::PseudoVSE32_V_M2_MASK: 843 case RISCV::PseudoVSE32_V_M4: 844 case RISCV::PseudoVSE32_V_M4_MASK: 845 case RISCV::PseudoVSE32_V_M8: 846 case RISCV::PseudoVSE32_V_M8_MASK: 847 case RISCV::PseudoVSE32_V_MF2: 848 case RISCV::PseudoVSE32_V_MF2_MASK: 849 case RISCV::PseudoVSSE32_V_M1: 850 case RISCV::PseudoVSSE32_V_M1_MASK: 851 case RISCV::PseudoVSSE32_V_M2: 852 case RISCV::PseudoVSSE32_V_M2_MASK: 853 case RISCV::PseudoVSSE32_V_M4: 854 case RISCV::PseudoVSSE32_V_M4_MASK: 855 case RISCV::PseudoVSSE32_V_M8: 856 case RISCV::PseudoVSSE32_V_M8_MASK: 857 case RISCV::PseudoVSSE32_V_MF2: 858 case RISCV::PseudoVSSE32_V_MF2_MASK: 859 EEW = 32; 860 break; 861 case RISCV::PseudoVLE64_V_M1: 862 case RISCV::PseudoVLE64_V_M1_MASK: 863 case RISCV::PseudoVLE64_V_M2: 864 case RISCV::PseudoVLE64_V_M2_MASK: 865 case RISCV::PseudoVLE64_V_M4: 866 case RISCV::PseudoVLE64_V_M4_MASK: 867 case RISCV::PseudoVLE64_V_M8: 868 case RISCV::PseudoVLE64_V_M8_MASK: 869 case RISCV::PseudoVLSE64_V_M1: 870 case RISCV::PseudoVLSE64_V_M1_MASK: 871 case RISCV::PseudoVLSE64_V_M2: 872 case RISCV::PseudoVLSE64_V_M2_MASK: 873 case RISCV::PseudoVLSE64_V_M4: 874 case RISCV::PseudoVLSE64_V_M4_MASK: 875 case RISCV::PseudoVLSE64_V_M8: 876 case RISCV::PseudoVLSE64_V_M8_MASK: 877 case RISCV::PseudoVSE64_V_M1: 878 case RISCV::PseudoVSE64_V_M1_MASK: 879 case RISCV::PseudoVSE64_V_M2: 880 case RISCV::PseudoVSE64_V_M2_MASK: 881 case RISCV::PseudoVSE64_V_M4: 882 case RISCV::PseudoVSE64_V_M4_MASK: 883 case RISCV::PseudoVSE64_V_M8: 884 case RISCV::PseudoVSE64_V_M8_MASK: 885 case RISCV::PseudoVSSE64_V_M1: 886 case RISCV::PseudoVSSE64_V_M1_MASK: 887 case RISCV::PseudoVSSE64_V_M2: 888 case RISCV::PseudoVSSE64_V_M2_MASK: 889 case RISCV::PseudoVSSE64_V_M4: 890 case RISCV::PseudoVSSE64_V_M4_MASK: 891 case RISCV::PseudoVSSE64_V_M8: 892 case RISCV::PseudoVSSE64_V_M8_MASK: 893 EEW = 64; 894 break; 895 } 896 897 // Stores can ignore the tail and mask policies. 898 const bool StoreOp = MI.getNumExplicitDefs() == 0; 899 if (!StoreOp && !CurInfo.hasSamePolicy(Require)) 900 return false; 901 902 return CurInfo.isCompatibleWithLoadStoreEEW(EEW, Require); 903 } 904 905 /// Return true if a VSETVLI is required to transition from CurInfo to Require 906 /// before MI. Require corresponds to the result of computeInfoForInstr(MI...) 907 /// *before* we clear VLOp in phase3. We can't recompute and assert it here due 908 /// to that muation. 909 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, 910 const VSETVLIInfo &Require, 911 const VSETVLIInfo &CurInfo) const { 912 if (CurInfo.isCompatible(MI, Require)) 913 return false; 914 915 // We didn't find a compatible value. If our AVL is a virtual register, 916 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need 917 // and the last VL/VTYPE we observed is the same, we don't need a 918 // VSETVLI here. 919 if (!CurInfo.isUnknown() && Require.hasAVLReg() && 920 Require.getAVLReg().isVirtual() && !CurInfo.hasSEWLMULRatioOnly() && 921 CurInfo.hasCompatibleVTYPE(MI, Require)) { 922 if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { 923 if (isVectorConfigInstr(*DefMI)) { 924 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 925 if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo)) 926 return false; 927 } 928 } 929 } 930 931 // If this is a unit-stride or strided load/store, we may be able to use the 932 // EMUL=(EEW/SEW)*LMUL relationship to avoid changing VTYPE. 933 return CurInfo.isUnknown() || !canSkipVSETVLIForLoadStore(MI, Require, CurInfo); 934 } 935 936 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { 937 bool HadVectorOp = false; 938 939 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 940 BBInfo.Change = BBInfo.Pred; 941 for (const MachineInstr &MI : MBB) { 942 // If this is an explicit VSETVLI or VSETIVLI, update our state. 943 if (isVectorConfigInstr(MI)) { 944 HadVectorOp = true; 945 BBInfo.Change = getInfoForVSETVLI(MI); 946 continue; 947 } 948 949 uint64_t TSFlags = MI.getDesc().TSFlags; 950 if (RISCVII::hasSEWOp(TSFlags)) { 951 HadVectorOp = true; 952 953 VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); 954 955 if (!BBInfo.Change.isValid()) { 956 BBInfo.Change = NewInfo; 957 } else { 958 // If this instruction isn't compatible with the previous VL/VTYPE 959 // we need to insert a VSETVLI. 960 // NOTE: We only do this if the vtype we're comparing against was 961 // created in this block. We need the first and third phase to treat 962 // the store the same way. 963 if (needVSETVLI(MI, NewInfo, BBInfo.Change)) 964 BBInfo.Change = NewInfo; 965 } 966 } 967 968 // If this is something that updates VL/VTYPE that we don't know about, set 969 // the state to unknown. 970 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 971 MI.modifiesRegister(RISCV::VTYPE)) 972 BBInfo.Change = VSETVLIInfo::getUnknown(); 973 } 974 975 return HadVectorOp; 976 } 977 978 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { 979 980 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 981 982 BBInfo.InQueue = false; 983 984 VSETVLIInfo InInfo; 985 if (MBB.pred_empty()) { 986 // There are no predecessors, so use the default starting status. 987 InInfo.setUnknown(); 988 } else { 989 for (MachineBasicBlock *P : MBB.predecessors()) 990 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); 991 } 992 993 // If we don't have any valid predecessor value, wait until we do. 994 if (!InInfo.isValid()) 995 return; 996 997 // If no change, no need to rerun block 998 if (InInfo == BBInfo.Pred) 999 return; 1000 1001 BBInfo.Pred = InInfo; 1002 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) 1003 << " changed to " << BBInfo.Pred << "\n"); 1004 1005 // Note: It's tempting to cache the state changes here, but due to the 1006 // compatibility checks performed a blocks output state can change based on 1007 // the input state. To cache, we'd have to add logic for finding 1008 // never-compatible state changes. 1009 computeVLVTYPEChanges(MBB); 1010 VSETVLIInfo TmpStatus = BBInfo.Change; 1011 1012 // If the new exit value matches the old exit value, we don't need to revisit 1013 // any blocks. 1014 if (BBInfo.Exit == TmpStatus) 1015 return; 1016 1017 BBInfo.Exit = TmpStatus; 1018 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB) 1019 << " changed to " << BBInfo.Exit << "\n"); 1020 1021 // Add the successors to the work list so we can propagate the changed exit 1022 // status. 1023 for (MachineBasicBlock *S : MBB.successors()) 1024 if (!BlockInfo[S->getNumber()].InQueue) 1025 WorkList.push(S); 1026 } 1027 1028 // If we weren't able to prove a vsetvli was directly unneeded, it might still 1029 // be unneeded if the AVL is a phi node where all incoming values are VL 1030 // outputs from the last VSETVLI in their respective basic blocks. 1031 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, 1032 const MachineBasicBlock &MBB) const { 1033 if (DisableInsertVSETVLPHIOpt) 1034 return true; 1035 1036 if (!Require.hasAVLReg()) 1037 return true; 1038 1039 Register AVLReg = Require.getAVLReg(); 1040 if (!AVLReg.isVirtual()) 1041 return true; 1042 1043 // We need the AVL to be produce by a PHI node in this basic block. 1044 MachineInstr *PHI = MRI->getVRegDef(AVLReg); 1045 if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB) 1046 return true; 1047 1048 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; 1049 PHIOp += 2) { 1050 Register InReg = PHI->getOperand(PHIOp).getReg(); 1051 MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB(); 1052 const BlockData &PBBInfo = BlockInfo[PBB->getNumber()]; 1053 // If the exit from the predecessor has the VTYPE we are looking for 1054 // we might be able to avoid a VSETVLI. 1055 if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require)) 1056 return true; 1057 1058 // We need the PHI input to the be the output of a VSET(I)VLI. 1059 MachineInstr *DefMI = MRI->getVRegDef(InReg); 1060 if (!DefMI || !isVectorConfigInstr(*DefMI)) 1061 return true; 1062 1063 // We found a VSET(I)VLI make sure it matches the output of the 1064 // predecessor block. 1065 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1066 if (!DefInfo.hasSameAVL(PBBInfo.Exit) || 1067 !DefInfo.hasSameVTYPE(PBBInfo.Exit)) 1068 return true; 1069 } 1070 1071 // If all the incoming values to the PHI checked out, we don't need 1072 // to insert a VSETVLI. 1073 return false; 1074 } 1075 1076 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { 1077 VSETVLIInfo CurInfo; 1078 for (MachineInstr &MI : MBB) { 1079 // If this is an explicit VSETVLI or VSETIVLI, update our state. 1080 if (isVectorConfigInstr(MI)) { 1081 // Conservatively, mark the VL and VTYPE as live. 1082 assert(MI.getOperand(3).getReg() == RISCV::VL && 1083 MI.getOperand(4).getReg() == RISCV::VTYPE && 1084 "Unexpected operands where VL and VTYPE should be"); 1085 MI.getOperand(3).setIsDead(false); 1086 MI.getOperand(4).setIsDead(false); 1087 CurInfo = getInfoForVSETVLI(MI); 1088 continue; 1089 } 1090 1091 uint64_t TSFlags = MI.getDesc().TSFlags; 1092 if (RISCVII::hasSEWOp(TSFlags)) { 1093 VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); 1094 if (RISCVII::hasVLOp(TSFlags)) { 1095 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 1096 if (VLOp.isReg()) { 1097 // Erase the AVL operand from the instruction. 1098 VLOp.setReg(RISCV::NoRegister); 1099 VLOp.setIsKill(false); 1100 } 1101 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, 1102 /*isImp*/ true)); 1103 } 1104 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, 1105 /*isImp*/ true)); 1106 1107 if (!CurInfo.isValid()) { 1108 // We haven't found any vector instructions or VL/VTYPE changes yet, 1109 // use the predecessor information. 1110 CurInfo = BlockInfo[MBB.getNumber()].Pred; 1111 assert(CurInfo.isValid() && "Expected a valid predecessor state."); 1112 if (needVSETVLI(MI, NewInfo, CurInfo)) { 1113 // If this is the first implicit state change, and the state change 1114 // requested can be proven to produce the same register contents, we 1115 // can skip emitting the actual state change and continue as if we 1116 // had since we know the GPR result of the implicit state change 1117 // wouldn't be used and VL/VTYPE registers are correct. Note that 1118 // we *do* need to model the state as if it changed as while the 1119 // register contents are unchanged, the abstract model can change. 1120 if (needVSETVLIPHI(NewInfo, MBB)) 1121 insertVSETVLI(MBB, MI, NewInfo, CurInfo); 1122 CurInfo = NewInfo; 1123 } 1124 } else { 1125 // If this instruction isn't compatible with the previous VL/VTYPE 1126 // we need to insert a VSETVLI. 1127 // NOTE: We can't use predecessor information for the store. We must 1128 // treat it the same as the first phase so that we produce the correct 1129 // vl/vtype for succesor blocks. 1130 if (needVSETVLI(MI, NewInfo, CurInfo)) { 1131 insertVSETVLI(MBB, MI, NewInfo, CurInfo); 1132 CurInfo = NewInfo; 1133 } 1134 } 1135 } 1136 1137 // If this is something that updates VL/VTYPE that we don't know about, set 1138 // the state to unknown. 1139 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 1140 MI.modifiesRegister(RISCV::VTYPE)) { 1141 CurInfo = VSETVLIInfo::getUnknown(); 1142 } 1143 } 1144 1145 // If we reach the end of the block and our current info doesn't match the 1146 // expected info, insert a vsetvli to correct. 1147 if (!UseStrictAsserts) { 1148 const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit; 1149 if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() && 1150 CurInfo != ExitInfo) { 1151 // Note there's an implicit assumption here that terminators never use 1152 // or modify VL or VTYPE. Also, fallthrough will return end(). 1153 auto InsertPt = MBB.getFirstInstrTerminator(); 1154 insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo, 1155 CurInfo); 1156 CurInfo = ExitInfo; 1157 } 1158 } 1159 1160 if (UseStrictAsserts && CurInfo.isValid()) { 1161 const auto &Info = BlockInfo[MBB.getNumber()]; 1162 if (CurInfo != Info.Exit) { 1163 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n"); 1164 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n"); 1165 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n"); 1166 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n"); 1167 } 1168 assert(CurInfo == Info.Exit && 1169 "InsertVSETVLI dataflow invariant violated"); 1170 } 1171 } 1172 1173 void RISCVInsertVSETVLI::doLocalPrepass(MachineBasicBlock &MBB) { 1174 VSETVLIInfo CurInfo = VSETVLIInfo::getUnknown(); 1175 for (MachineInstr &MI : MBB) { 1176 // If this is an explicit VSETVLI or VSETIVLI, update our state. 1177 if (isVectorConfigInstr(MI)) { 1178 CurInfo = getInfoForVSETVLI(MI); 1179 continue; 1180 } 1181 1182 const uint64_t TSFlags = MI.getDesc().TSFlags; 1183 if (isScalarMoveInstr(MI)) { 1184 assert(RISCVII::hasSEWOp(TSFlags) && RISCVII::hasVLOp(TSFlags)); 1185 const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); 1186 1187 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and 1188 // VL > 0. We can discard the user requested AVL and just use the last 1189 // one if we can prove it equally zero. This removes a vsetvli entirely 1190 // if the types match or allows use of cheaper avl preserving variant 1191 // if VLMAX doesn't change. If VLMAX might change, we couldn't use 1192 // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to 1193 // prevent extending live range of an avl register operand. 1194 // TODO: We can probably relax this for immediates. 1195 if (((CurInfo.hasNonZeroAVL() && NewInfo.hasNonZeroAVL()) || 1196 (CurInfo.hasZeroAVL() && NewInfo.hasZeroAVL())) && 1197 NewInfo.hasSameVLMAX(CurInfo)) { 1198 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 1199 if (CurInfo.hasAVLImm()) 1200 VLOp.ChangeToImmediate(CurInfo.getAVLImm()); 1201 else 1202 VLOp.ChangeToRegister(CurInfo.getAVLReg(), /*IsDef*/ false); 1203 CurInfo = computeInfoForInstr(MI, TSFlags, MRI); 1204 continue; 1205 } 1206 } 1207 1208 if (RISCVII::hasSEWOp(TSFlags)) { 1209 if (RISCVII::hasVLOp(TSFlags)) { 1210 const auto Require = computeInfoForInstr(MI, TSFlags, MRI); 1211 // If the AVL is the result of a previous vsetvli which has the 1212 // same AVL and VLMAX as our current state, we can reuse the AVL 1213 // from the current state for the new one. This allows us to 1214 // generate 'vsetvli x0, x0, vtype" or possible skip the transition 1215 // entirely. 1216 if (!CurInfo.isUnknown() && Require.hasAVLReg() && 1217 Require.getAVLReg().isVirtual()) { 1218 if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { 1219 if (isVectorConfigInstr(*DefMI)) { 1220 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1221 if (DefInfo.hasSameAVL(CurInfo) && 1222 DefInfo.hasSameVLMAX(CurInfo)) { 1223 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 1224 if (CurInfo.hasAVLImm()) 1225 VLOp.ChangeToImmediate(CurInfo.getAVLImm()); 1226 else { 1227 MRI->clearKillFlags(CurInfo.getAVLReg()); 1228 VLOp.ChangeToRegister(CurInfo.getAVLReg(), /*IsDef*/ false); 1229 } 1230 CurInfo = computeInfoForInstr(MI, TSFlags, MRI); 1231 continue; 1232 } 1233 } 1234 } 1235 } 1236 1237 // If AVL is defined by a vsetvli with the same VLMAX, we can 1238 // replace the AVL operand with the AVL of the defining vsetvli. 1239 // We avoid general register AVLs to avoid extending live ranges 1240 // without being sure we can kill the original source reg entirely. 1241 // TODO: We can ignore policy bits here, we only need VL to be the same. 1242 if (Require.hasAVLReg() && Require.getAVLReg().isVirtual()) { 1243 if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { 1244 if (isVectorConfigInstr(*DefMI)) { 1245 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1246 if (DefInfo.hasSameVLMAX(Require) && 1247 (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) { 1248 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 1249 if (DefInfo.hasAVLImm()) 1250 VLOp.ChangeToImmediate(DefInfo.getAVLImm()); 1251 else 1252 VLOp.ChangeToRegister(DefInfo.getAVLReg(), /*IsDef*/ false); 1253 CurInfo = computeInfoForInstr(MI, TSFlags, MRI); 1254 continue; 1255 } 1256 } 1257 } 1258 } 1259 } 1260 CurInfo = computeInfoForInstr(MI, TSFlags, MRI); 1261 continue; 1262 } 1263 1264 // If this is something that updates VL/VTYPE that we don't know about, 1265 // set the state to unknown. 1266 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 1267 MI.modifiesRegister(RISCV::VTYPE)) 1268 CurInfo = VSETVLIInfo::getUnknown(); 1269 } 1270 } 1271 1272 /// Return true if the VL value configured must be equal to the requested one. 1273 static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) { 1274 if (!Info.hasAVLImm()) 1275 // VLMAX is always the same value. 1276 // TODO: Could extend to other registers by looking at the associated vreg 1277 // def placement. 1278 return RISCV::X0 == Info.getAVLReg(); 1279 1280 unsigned AVL = Info.getAVLImm(); 1281 unsigned SEW = Info.getSEW(); 1282 unsigned AVLInBits = AVL * SEW; 1283 1284 unsigned LMul; 1285 bool Fractional; 1286 std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL()); 1287 1288 if (Fractional) 1289 return ST.getRealMinVLen() / LMul >= AVLInBits; 1290 return ST.getRealMinVLen() * LMul >= AVLInBits; 1291 } 1292 1293 /// Perform simple partial redundancy elimination of the VSETVLI instructions 1294 /// we're about to insert by looking for cases where we can PRE from the 1295 /// beginning of one block to the end of one of its predecessors. Specifically, 1296 /// this is geared to catch the common case of a fixed length vsetvl in a single 1297 /// block loop when it could execute once in the preheader instead. 1298 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { 1299 const MachineFunction &MF = *MBB.getParent(); 1300 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); 1301 1302 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) 1303 return; 1304 1305 MachineBasicBlock *UnavailablePred = nullptr; 1306 VSETVLIInfo AvailableInfo; 1307 for (MachineBasicBlock *P : MBB.predecessors()) { 1308 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; 1309 if (PredInfo.isUnknown()) { 1310 if (UnavailablePred) 1311 return; 1312 UnavailablePred = P; 1313 } else if (!AvailableInfo.isValid()) { 1314 AvailableInfo = PredInfo; 1315 } else if (AvailableInfo != PredInfo) { 1316 return; 1317 } 1318 } 1319 1320 // Unreachable, single pred, or full redundancy. Note that FRE is handled by 1321 // phase 3. 1322 if (!UnavailablePred || !AvailableInfo.isValid()) 1323 return; 1324 1325 // Critical edge - TODO: consider splitting? 1326 if (UnavailablePred->succ_size() != 1) 1327 return; 1328 1329 // If VL can be less than AVL, then we can't reduce the frequency of exec. 1330 if (!hasFixedResult(AvailableInfo, ST)) 1331 return; 1332 1333 // Does it actually let us remove an implicit transition in MBB? 1334 bool Found = false; 1335 for (auto &MI : MBB) { 1336 if (isVectorConfigInstr(MI)) 1337 return; 1338 1339 const uint64_t TSFlags = MI.getDesc().TSFlags; 1340 if (RISCVII::hasSEWOp(TSFlags)) { 1341 if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI)) 1342 return; 1343 Found = true; 1344 break; 1345 } 1346 } 1347 if (!Found) 1348 return; 1349 1350 // Finally, update both data flow state and insert the actual vsetvli. 1351 // Doing both keeps the code in sync with the dataflow results, which 1352 // is critical for correctness of phase 3. 1353 auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit; 1354 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " 1355 << UnavailablePred->getName() << " with state " 1356 << AvailableInfo << "\n"); 1357 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; 1358 BlockInfo[MBB.getNumber()].Pred = AvailableInfo; 1359 1360 // Note there's an implicit assumption here that terminators never use 1361 // or modify VL or VTYPE. Also, fallthrough will return end(). 1362 auto InsertPt = UnavailablePred->getFirstInstrTerminator(); 1363 insertVSETVLI(*UnavailablePred, InsertPt, 1364 UnavailablePred->findDebugLoc(InsertPt), 1365 AvailableInfo, OldInfo); 1366 } 1367 1368 void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { 1369 MachineInstr *PrevMI = nullptr; 1370 bool UsedVL = false, UsedVTYPE = false; 1371 SmallVector<MachineInstr*> ToDelete; 1372 for (MachineInstr &MI : MBB) { 1373 // Note: Must be *before* vsetvli handling to account for config cases 1374 // which only change some subfields. 1375 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL)) 1376 UsedVL = true; 1377 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE)) 1378 UsedVTYPE = true; 1379 1380 if (!isVectorConfigInstr(MI)) 1381 continue; 1382 1383 if (PrevMI) { 1384 if (!UsedVL && !UsedVTYPE) { 1385 ToDelete.push_back(PrevMI); 1386 // fallthrough 1387 } else if (!UsedVTYPE && isVLPreservingConfig(MI)) { 1388 // Note: `vsetvli x0, x0, vtype' is the canonical instruction 1389 // for this case. If you find yourself wanting to add other forms 1390 // to this "unused VTYPE" case, we're probably missing a 1391 // canonicalization earlier. 1392 // Note: We don't need to explicitly check vtype compatibility 1393 // here because this form is only legal (per ISA) when not 1394 // changing VL. 1395 PrevMI->getOperand(2).setImm(MI.getOperand(2).getImm()); 1396 ToDelete.push_back(&MI); 1397 // Leave PrevMI unchanged 1398 continue; 1399 } 1400 } 1401 PrevMI = &MI; 1402 UsedVL = false; 1403 UsedVTYPE = false; 1404 Register VRegDef = MI.getOperand(0).getReg(); 1405 if (VRegDef != RISCV::X0 && 1406 !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) 1407 UsedVL = true; 1408 } 1409 1410 for (auto *MI : ToDelete) 1411 MI->eraseFromParent(); 1412 } 1413 1414 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { 1415 const MachineFunction *MF = MBB.getParent(); 1416 const RISCVInstrInfo *TII = MF->getSubtarget<RISCVSubtarget>().getInstrInfo(); 1417 1418 for (auto I = MBB.begin(), E = MBB.end(); I != E;) { 1419 MachineInstr &MI = *I++; 1420 if (TII->isFaultFirstLoad(MI)) { 1421 Register VLOutput = MI.getOperand(1).getReg(); 1422 if (!MRI->use_nodbg_empty(VLOutput)) 1423 BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL), 1424 VLOutput); 1425 // We don't use the vl output of the VLEFF/VLSEGFF anymore. 1426 MI.getOperand(1).setReg(RISCV::X0); 1427 } 1428 } 1429 } 1430 1431 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { 1432 // Skip if the vector extension is not enabled. 1433 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); 1434 if (!ST.hasVInstructions()) 1435 return false; 1436 1437 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n"); 1438 1439 TII = ST.getInstrInfo(); 1440 MRI = &MF.getRegInfo(); 1441 1442 assert(BlockInfo.empty() && "Expect empty block infos"); 1443 BlockInfo.resize(MF.getNumBlockIDs()); 1444 1445 // Scan the block locally for cases where we can mutate the operands 1446 // of the instructions to reduce state transitions. Critically, this 1447 // must be done before we start propagating data flow states as these 1448 // transforms are allowed to change the contents of VTYPE and VL so 1449 // long as the semantics of the program stays the same. 1450 for (MachineBasicBlock &MBB : MF) 1451 doLocalPrepass(MBB); 1452 1453 bool HaveVectorOp = false; 1454 1455 // Phase 1 - determine how VL/VTYPE are affected by the each block. 1456 for (const MachineBasicBlock &MBB : MF) { 1457 HaveVectorOp |= computeVLVTYPEChanges(MBB); 1458 // Initial exit state is whatever change we found in the block. 1459 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1460 BBInfo.Exit = BBInfo.Change; 1461 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) 1462 << " is " << BBInfo.Exit << "\n"); 1463 1464 } 1465 1466 // If we didn't find any instructions that need VSETVLI, we're done. 1467 if (!HaveVectorOp) { 1468 BlockInfo.clear(); 1469 return false; 1470 } 1471 1472 // Phase 2 - determine the exit VL/VTYPE from each block. We add all 1473 // blocks to the list here, but will also add any that need to be revisited 1474 // during Phase 2 processing. 1475 for (const MachineBasicBlock &MBB : MF) { 1476 WorkList.push(&MBB); 1477 BlockInfo[MBB.getNumber()].InQueue = true; 1478 } 1479 while (!WorkList.empty()) { 1480 const MachineBasicBlock &MBB = *WorkList.front(); 1481 WorkList.pop(); 1482 computeIncomingVLVTYPE(MBB); 1483 } 1484 1485 // Perform partial redundancy elimination of vsetvli transitions. 1486 for (MachineBasicBlock &MBB : MF) 1487 doPRE(MBB); 1488 1489 // Phase 3 - add any vsetvli instructions needed in the block. Use the 1490 // Phase 2 information to avoid adding vsetvlis before the first vector 1491 // instruction in the block if the VL/VTYPE is satisfied by its 1492 // predecessors. 1493 for (MachineBasicBlock &MBB : MF) 1494 emitVSETVLIs(MBB); 1495 1496 // Now that all vsetvlis are explicit, go through and do block local 1497 // DSE and peephole based demanded fields based transforms. Note that 1498 // this *must* be done outside the main dataflow so long as we allow 1499 // any cross block analysis within the dataflow. We can't have both 1500 // demanded fields based mutation and non-local analysis in the 1501 // dataflow at the same time without introducing inconsistencies. 1502 for (MachineBasicBlock &MBB : MF) 1503 doLocalPostpass(MBB); 1504 1505 // Once we're fully done rewriting all the instructions, do a final pass 1506 // through to check for VSETVLIs which write to an unused destination. 1507 // For the non X0, X0 variant, we can replace the destination register 1508 // with X0 to reduce register pressure. This is really a generic 1509 // optimization which can be applied to any dead def (TODO: generalize). 1510 for (MachineBasicBlock &MBB : MF) { 1511 for (MachineInstr &MI : MBB) { 1512 if (MI.getOpcode() == RISCV::PseudoVSETVLI || 1513 MI.getOpcode() == RISCV::PseudoVSETIVLI) { 1514 Register VRegDef = MI.getOperand(0).getReg(); 1515 if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef)) 1516 MI.getOperand(0).setReg(RISCV::X0); 1517 } 1518 } 1519 } 1520 1521 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output 1522 // of VLEFF/VLSEGFF. 1523 for (MachineBasicBlock &MBB : MF) 1524 insertReadVL(MBB); 1525 1526 BlockInfo.clear(); 1527 return HaveVectorOp; 1528 } 1529 1530 /// Returns an instance of the Insert VSETVLI pass. 1531 FunctionPass *llvm::createRISCVInsertVSETVLIPass() { 1532 return new RISCVInsertVSETVLI(); 1533 } 1534