1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISCV target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVMCTargetDesc.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCVISelLowering.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/IR/IntrinsicsRISCV.h" 20 #include "llvm/Support/Alignment.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/KnownBits.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "riscv-isel" 29 30 namespace llvm { 31 namespace RISCV { 32 #define GET_RISCVVSSEGTable_IMPL 33 #define GET_RISCVVLSEGTable_IMPL 34 #define GET_RISCVVLXSEGTable_IMPL 35 #define GET_RISCVVSXSEGTable_IMPL 36 #define GET_RISCVVLETable_IMPL 37 #define GET_RISCVVSETable_IMPL 38 #define GET_RISCVVLXTable_IMPL 39 #define GET_RISCVVSXTable_IMPL 40 #define GET_RISCVMaskedPseudosTable_IMPL 41 #include "RISCVGenSearchableTables.inc" 42 } // namespace RISCV 43 } // namespace llvm 44 45 void RISCVDAGToDAGISel::PreprocessISelDAG() { 46 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 47 E = CurDAG->allnodes_end(); 48 I != E;) { 49 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 50 51 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point 52 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. 53 if (N->getOpcode() == ISD::SPLAT_VECTOR) { 54 MVT VT = N->getSimpleValueType(0); 55 unsigned Opc = 56 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; 57 SDLoc DL(N); 58 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); 59 SDValue Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), 60 N->getOperand(0), VL); 61 62 --I; 63 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 64 ++I; 65 CurDAG->DeleteNode(N); 66 continue; 67 } 68 69 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 70 // load. Done after lowering and combining so that we have a chance to 71 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 72 if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) 73 continue; 74 75 assert(N->getNumOperands() == 4 && "Unexpected number of operands"); 76 MVT VT = N->getSimpleValueType(0); 77 SDValue Passthru = N->getOperand(0); 78 SDValue Lo = N->getOperand(1); 79 SDValue Hi = N->getOperand(2); 80 SDValue VL = N->getOperand(3); 81 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 82 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 83 "Unexpected VTs!"); 84 MachineFunction &MF = CurDAG->getMachineFunction(); 85 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 86 SDLoc DL(N); 87 88 // We use the same frame index we use for moving two i32s into 64-bit FPR. 89 // This is an analogous operation. 90 int FI = FuncInfo->getMoveF64FrameIndex(MF); 91 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 92 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); 93 SDValue StackSlot = 94 CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); 95 96 SDValue Chain = CurDAG->getEntryNode(); 97 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 98 99 SDValue OffsetSlot = 100 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); 101 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 102 Align(8)); 103 104 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 105 106 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 107 SDValue IntID = 108 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 109 SDValue Ops[] = {Chain, 110 IntID, 111 Passthru, 112 StackSlot, 113 CurDAG->getRegister(RISCV::X0, MVT::i64), 114 VL}; 115 116 SDValue Result = CurDAG->getMemIntrinsicNode( 117 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8), 118 MachineMemOperand::MOLoad); 119 120 // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the 121 // vlse we created. This will cause general havok on the dag because 122 // anything below the conversion could be folded into other existing nodes. 123 // To avoid invalidating 'I', back it up to the convert node. 124 --I; 125 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 126 127 // Now that we did that, the node is dead. Increment the iterator to the 128 // next node to process, then delete N. 129 ++I; 130 CurDAG->DeleteNode(N); 131 } 132 } 133 134 void RISCVDAGToDAGISel::PostprocessISelDAG() { 135 HandleSDNode Dummy(CurDAG->getRoot()); 136 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 137 138 bool MadeChange = false; 139 while (Position != CurDAG->allnodes_begin()) { 140 SDNode *N = &*--Position; 141 // Skip dead nodes and any non-machine opcodes. 142 if (N->use_empty() || !N->isMachineOpcode()) 143 continue; 144 145 MadeChange |= doPeepholeSExtW(N); 146 MadeChange |= doPeepholeLoadStoreADDI(N); 147 MadeChange |= doPeepholeMaskedRVV(N); 148 } 149 150 CurDAG->setRoot(Dummy.getValue()); 151 152 if (MadeChange) 153 CurDAG->RemoveDeadNodes(); 154 } 155 156 // Returns true if N is a MachineSDNode that has a reg and simm12 memory 157 // operand. The indices of the base pointer and offset are returned in BaseOpIdx 158 // and OffsetOpIdx. 159 static bool hasMemOffset(SDNode *N, unsigned &BaseOpIdx, 160 unsigned &OffsetOpIdx) { 161 switch (N->getMachineOpcode()) { 162 case RISCV::LB: 163 case RISCV::LH: 164 case RISCV::LW: 165 case RISCV::LBU: 166 case RISCV::LHU: 167 case RISCV::LWU: 168 case RISCV::LD: 169 case RISCV::FLH: 170 case RISCV::FLW: 171 case RISCV::FLD: 172 BaseOpIdx = 0; 173 OffsetOpIdx = 1; 174 return true; 175 case RISCV::SB: 176 case RISCV::SH: 177 case RISCV::SW: 178 case RISCV::SD: 179 case RISCV::FSH: 180 case RISCV::FSW: 181 case RISCV::FSD: 182 BaseOpIdx = 1; 183 OffsetOpIdx = 2; 184 return true; 185 } 186 187 return false; 188 } 189 190 static SDNode *selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 191 RISCVMatInt::InstSeq &Seq) { 192 SDNode *Result = nullptr; 193 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT); 194 for (RISCVMatInt::Inst &Inst : Seq) { 195 SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, VT); 196 switch (Inst.getOpndKind()) { 197 case RISCVMatInt::Imm: 198 Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SDImm); 199 break; 200 case RISCVMatInt::RegX0: 201 Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, 202 CurDAG->getRegister(RISCV::X0, VT)); 203 break; 204 case RISCVMatInt::RegReg: 205 Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SrcReg); 206 break; 207 case RISCVMatInt::RegImm: 208 Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SDImm); 209 break; 210 } 211 212 // Only the first instruction has X0 as its source. 213 SrcReg = SDValue(Result, 0); 214 } 215 216 return Result; 217 } 218 219 static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 220 int64_t Imm, const RISCVSubtarget &Subtarget) { 221 RISCVMatInt::InstSeq Seq = 222 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); 223 224 return selectImmSeq(CurDAG, DL, VT, Seq); 225 } 226 227 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 228 unsigned NF, RISCVII::VLMUL LMUL) { 229 static const unsigned M1TupleRegClassIDs[] = { 230 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, 231 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, 232 RISCV::VRN8M1RegClassID}; 233 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID, 234 RISCV::VRN3M2RegClassID, 235 RISCV::VRN4M2RegClassID}; 236 237 assert(Regs.size() >= 2 && Regs.size() <= 8); 238 239 unsigned RegClassID; 240 unsigned SubReg0; 241 switch (LMUL) { 242 default: 243 llvm_unreachable("Invalid LMUL."); 244 case RISCVII::VLMUL::LMUL_F8: 245 case RISCVII::VLMUL::LMUL_F4: 246 case RISCVII::VLMUL::LMUL_F2: 247 case RISCVII::VLMUL::LMUL_1: 248 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 249 "Unexpected subreg numbering"); 250 SubReg0 = RISCV::sub_vrm1_0; 251 RegClassID = M1TupleRegClassIDs[NF - 2]; 252 break; 253 case RISCVII::VLMUL::LMUL_2: 254 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 255 "Unexpected subreg numbering"); 256 SubReg0 = RISCV::sub_vrm2_0; 257 RegClassID = M2TupleRegClassIDs[NF - 2]; 258 break; 259 case RISCVII::VLMUL::LMUL_4: 260 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 261 "Unexpected subreg numbering"); 262 SubReg0 = RISCV::sub_vrm4_0; 263 RegClassID = RISCV::VRN2M4RegClassID; 264 break; 265 } 266 267 SDLoc DL(Regs[0]); 268 SmallVector<SDValue, 8> Ops; 269 270 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); 271 272 for (unsigned I = 0; I < Regs.size(); ++I) { 273 Ops.push_back(Regs[I]); 274 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); 275 } 276 SDNode *N = 277 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 278 return SDValue(N, 0); 279 } 280 281 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 282 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 283 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 284 bool IsLoad, MVT *IndexVT) { 285 SDValue Chain = Node->getOperand(0); 286 SDValue Glue; 287 288 SDValue Base; 289 SelectBaseAddr(Node->getOperand(CurOp++), Base); 290 Operands.push_back(Base); // Base pointer. 291 292 if (IsStridedOrIndexed) { 293 Operands.push_back(Node->getOperand(CurOp++)); // Index. 294 if (IndexVT) 295 *IndexVT = Operands.back()->getSimpleValueType(0); 296 } 297 298 if (IsMasked) { 299 // Mask needs to be copied to V0. 300 SDValue Mask = Node->getOperand(CurOp++); 301 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); 302 Glue = Chain.getValue(1); 303 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); 304 } 305 SDValue VL; 306 selectVLOp(Node->getOperand(CurOp++), VL); 307 Operands.push_back(VL); 308 309 MVT XLenVT = Subtarget->getXLenVT(); 310 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 311 Operands.push_back(SEWOp); 312 313 // Masked load has the tail policy argument. 314 if (IsMasked && IsLoad) { 315 // Policy must be a constant. 316 uint64_t Policy = Node->getConstantOperandVal(CurOp++); 317 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 318 Operands.push_back(PolicyOp); 319 } 320 321 Operands.push_back(Chain); // Chain. 322 if (Glue) 323 Operands.push_back(Glue); 324 } 325 326 static bool isAllUndef(ArrayRef<SDValue> Values) { 327 return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); }); 328 } 329 330 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, 331 bool IsStrided) { 332 SDLoc DL(Node); 333 unsigned NF = Node->getNumValues() - 1; 334 MVT VT = Node->getSimpleValueType(0); 335 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 336 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 337 338 unsigned CurOp = 2; 339 SmallVector<SDValue, 8> Operands; 340 341 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 342 Node->op_begin() + CurOp + NF); 343 bool IsTU = IsMasked || !isAllUndef(Regs); 344 if (IsTU) { 345 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL); 346 Operands.push_back(Merge); 347 } 348 CurOp += NF; 349 350 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 351 Operands, /*IsLoad=*/true); 352 353 const RISCV::VLSEGPseudo *P = 354 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 355 static_cast<unsigned>(LMUL)); 356 MachineSDNode *Load = 357 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 358 359 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 360 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 361 362 SDValue SuperReg = SDValue(Load, 0); 363 for (unsigned I = 0; I < NF; ++I) { 364 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 365 ReplaceUses(SDValue(Node, I), 366 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 367 } 368 369 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 370 CurDAG->RemoveDeadNode(Node); 371 } 372 373 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { 374 SDLoc DL(Node); 375 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. 376 MVT VT = Node->getSimpleValueType(0); 377 MVT XLenVT = Subtarget->getXLenVT(); 378 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 379 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 380 381 unsigned CurOp = 2; 382 SmallVector<SDValue, 7> Operands; 383 384 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 385 Node->op_begin() + CurOp + NF); 386 bool IsTU = IsMasked || !isAllUndef(Regs); 387 if (IsTU) { 388 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 389 Operands.push_back(MaskedOff); 390 } 391 CurOp += NF; 392 393 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 394 /*IsStridedOrIndexed*/ false, Operands, 395 /*IsLoad=*/true); 396 397 const RISCV::VLSEGPseudo *P = 398 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 399 Log2SEW, static_cast<unsigned>(LMUL)); 400 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 401 XLenVT, MVT::Other, Operands); 402 403 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 404 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 405 406 SDValue SuperReg = SDValue(Load, 0); 407 for (unsigned I = 0; I < NF; ++I) { 408 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 409 ReplaceUses(SDValue(Node, I), 410 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 411 } 412 413 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL 414 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain 415 CurDAG->RemoveDeadNode(Node); 416 } 417 418 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, 419 bool IsOrdered) { 420 SDLoc DL(Node); 421 unsigned NF = Node->getNumValues() - 1; 422 MVT VT = Node->getSimpleValueType(0); 423 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 424 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 425 426 unsigned CurOp = 2; 427 SmallVector<SDValue, 8> Operands; 428 429 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 430 Node->op_begin() + CurOp + NF); 431 bool IsTU = IsMasked || !isAllUndef(Regs); 432 if (IsTU) { 433 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 434 Operands.push_back(MaskedOff); 435 } 436 CurOp += NF; 437 438 MVT IndexVT; 439 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 440 /*IsStridedOrIndexed*/ true, Operands, 441 /*IsLoad=*/true, &IndexVT); 442 443 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 444 "Element count mismatch"); 445 446 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 447 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 448 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 449 report_fatal_error("The V extension does not support EEW=64 for index " 450 "values when XLEN=32"); 451 } 452 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 453 NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 454 static_cast<unsigned>(IndexLMUL)); 455 MachineSDNode *Load = 456 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 457 458 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 459 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 460 461 SDValue SuperReg = SDValue(Load, 0); 462 for (unsigned I = 0; I < NF; ++I) { 463 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 464 ReplaceUses(SDValue(Node, I), 465 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 466 } 467 468 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 469 CurDAG->RemoveDeadNode(Node); 470 } 471 472 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, 473 bool IsStrided) { 474 SDLoc DL(Node); 475 unsigned NF = Node->getNumOperands() - 4; 476 if (IsStrided) 477 NF--; 478 if (IsMasked) 479 NF--; 480 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 481 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 482 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 483 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 484 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 485 486 SmallVector<SDValue, 8> Operands; 487 Operands.push_back(StoreVal); 488 unsigned CurOp = 2 + NF; 489 490 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 491 Operands); 492 493 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 494 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 495 MachineSDNode *Store = 496 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 497 498 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 499 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 500 501 ReplaceNode(Node, Store); 502 } 503 504 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, 505 bool IsOrdered) { 506 SDLoc DL(Node); 507 unsigned NF = Node->getNumOperands() - 5; 508 if (IsMasked) 509 --NF; 510 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 511 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 512 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 513 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 514 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 515 516 SmallVector<SDValue, 8> Operands; 517 Operands.push_back(StoreVal); 518 unsigned CurOp = 2 + NF; 519 520 MVT IndexVT; 521 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 522 /*IsStridedOrIndexed*/ true, Operands, 523 /*IsLoad=*/false, &IndexVT); 524 525 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 526 "Element count mismatch"); 527 528 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 529 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 530 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 531 report_fatal_error("The V extension does not support EEW=64 for index " 532 "values when XLEN=32"); 533 } 534 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 535 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 536 static_cast<unsigned>(IndexLMUL)); 537 MachineSDNode *Store = 538 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 539 540 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 541 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 542 543 ReplaceNode(Node, Store); 544 } 545 546 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { 547 if (!Subtarget->hasVInstructions()) 548 return; 549 550 assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN || 551 Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) && 552 "Unexpected opcode"); 553 554 SDLoc DL(Node); 555 MVT XLenVT = Subtarget->getXLenVT(); 556 557 bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN; 558 unsigned IntNoOffset = HasChain ? 1 : 0; 559 unsigned IntNo = Node->getConstantOperandVal(IntNoOffset); 560 561 assert((IntNo == Intrinsic::riscv_vsetvli || 562 IntNo == Intrinsic::riscv_vsetvlimax || 563 IntNo == Intrinsic::riscv_vsetvli_opt || 564 IntNo == Intrinsic::riscv_vsetvlimax_opt) && 565 "Unexpected vsetvli intrinsic"); 566 567 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax || 568 IntNo == Intrinsic::riscv_vsetvlimax_opt; 569 unsigned Offset = IntNoOffset + (VLMax ? 1 : 2); 570 571 assert(Node->getNumOperands() == Offset + 2 && 572 "Unexpected number of operands"); 573 574 unsigned SEW = 575 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 576 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( 577 Node->getConstantOperandVal(Offset + 1) & 0x7); 578 579 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true, 580 /*MaskAgnostic*/ false); 581 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 582 583 SmallVector<EVT, 2> VTs = {XLenVT}; 584 if (HasChain) 585 VTs.push_back(MVT::Other); 586 587 SDValue VLOperand; 588 unsigned Opcode = RISCV::PseudoVSETVLI; 589 if (VLMax) { 590 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 591 Opcode = RISCV::PseudoVSETVLIX0; 592 } else { 593 VLOperand = Node->getOperand(IntNoOffset + 1); 594 595 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 596 uint64_t AVL = C->getZExtValue(); 597 if (isUInt<5>(AVL)) { 598 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 599 SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp}; 600 if (HasChain) 601 Ops.push_back(Node->getOperand(0)); 602 ReplaceNode( 603 Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops)); 604 return; 605 } 606 } 607 } 608 609 SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp}; 610 if (HasChain) 611 Ops.push_back(Node->getOperand(0)); 612 613 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops)); 614 } 615 616 void RISCVDAGToDAGISel::Select(SDNode *Node) { 617 // If we have a custom node, we have already selected. 618 if (Node->isMachineOpcode()) { 619 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 620 Node->setNodeId(-1); 621 return; 622 } 623 624 // Instruction Selection not handled by the auto-generated tablegen selection 625 // should be handled here. 626 unsigned Opcode = Node->getOpcode(); 627 MVT XLenVT = Subtarget->getXLenVT(); 628 SDLoc DL(Node); 629 MVT VT = Node->getSimpleValueType(0); 630 631 switch (Opcode) { 632 case ISD::Constant: { 633 auto *ConstNode = cast<ConstantSDNode>(Node); 634 if (VT == XLenVT && ConstNode->isZero()) { 635 SDValue New = 636 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT); 637 ReplaceNode(Node, New.getNode()); 638 return; 639 } 640 int64_t Imm = ConstNode->getSExtValue(); 641 // If the upper XLen-16 bits are not used, try to convert this to a simm12 642 // by sign extending bit 15. 643 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) && 644 hasAllHUsers(Node)) 645 Imm = SignExtend64<16>(Imm); 646 // If the upper 32-bits are not used try to convert this into a simm32 by 647 // sign extending bit 32. 648 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 649 Imm = SignExtend64<32>(Imm); 650 651 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); 652 return; 653 } 654 case ISD::ADD: { 655 // Try to select ADD + immediate used as memory addresses to 656 // (ADDI (ADD X, Imm-Lo12), Lo12) if it will allow the ADDI to be removed by 657 // doPeepholeLoadStoreADDI. 658 659 // LHS should be an immediate. 660 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 661 if (!N1C) 662 break; 663 664 int64_t Offset = N1C->getSExtValue(); 665 int64_t Lo12 = SignExtend64<12>(Offset); 666 667 // Don't do this if the lower 12 bits are 0 or we could use ADDI directly. 668 if (Lo12 == 0 || isInt<12>(Offset)) 669 break; 670 671 // Don't do this if we can use a pair of ADDIs. 672 if (isInt<12>(Offset / 2) && isInt<12>(Offset - Offset / 2)) 673 break; 674 675 RISCVMatInt::InstSeq Seq = 676 RISCVMatInt::generateInstSeq(Offset, Subtarget->getFeatureBits()); 677 678 Offset -= Lo12; 679 // Restore sign bits for RV32. 680 if (!Subtarget->is64Bit()) 681 Offset = SignExtend64<32>(Offset); 682 683 // We can fold if the last operation is an ADDI or its an ADDIW that could 684 // be treated as an ADDI. 685 if (Seq.back().Opc != RISCV::ADDI && 686 !(Seq.back().Opc == RISCV::ADDIW && isInt<32>(Offset))) 687 break; 688 assert(Seq.back().Imm == Lo12 && "Expected immediate to match Lo12"); 689 // Drop the last operation. 690 Seq.pop_back(); 691 assert(!Seq.empty() && "Expected more instructions in sequence"); 692 693 bool AllPointerUses = true; 694 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 695 SDNode *User = *UI; 696 697 // Is this user a memory instruction that uses a register and immediate 698 // that has this ADD as its pointer. 699 unsigned BaseOpIdx, OffsetOpIdx; 700 if (!User->isMachineOpcode() || 701 !hasMemOffset(User, BaseOpIdx, OffsetOpIdx) || 702 UI.getOperandNo() != BaseOpIdx) { 703 AllPointerUses = false; 704 break; 705 } 706 707 // If the memory instruction already has an offset, make sure the combined 708 // offset is foldable. 709 int64_t MemOffs = 710 cast<ConstantSDNode>(User->getOperand(OffsetOpIdx))->getSExtValue(); 711 MemOffs += Lo12; 712 if (!isInt<12>(MemOffs)) { 713 AllPointerUses = false; 714 break; 715 } 716 } 717 718 if (!AllPointerUses) 719 break; 720 721 // Emit (ADDI (ADD X, Hi), Lo) 722 SDNode *Imm = selectImmSeq(CurDAG, DL, VT, Seq); 723 SDNode *ADD = CurDAG->getMachineNode(RISCV::ADD, DL, VT, 724 Node->getOperand(0), SDValue(Imm, 0)); 725 SDNode *ADDI = 726 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, SDValue(ADD, 0), 727 CurDAG->getTargetConstant(Lo12, DL, VT)); 728 ReplaceNode(Node, ADDI); 729 return; 730 } 731 case ISD::SRL: { 732 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 733 if (!N1C) 734 break; 735 SDValue N0 = Node->getOperand(0); 736 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 737 !isa<ConstantSDNode>(N0.getOperand(1))) 738 break; 739 unsigned ShAmt = N1C->getZExtValue(); 740 uint64_t Mask = N0.getConstantOperandVal(1); 741 742 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has 743 // 32 leading zeros and C3 trailing zeros. 744 if (isShiftedMask_64(Mask)) { 745 unsigned XLen = Subtarget->getXLen(); 746 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask)); 747 unsigned TrailingZeros = countTrailingZeros(Mask); 748 if (LeadingZeros == 32 && TrailingZeros > ShAmt) { 749 SDNode *SRLIW = CurDAG->getMachineNode( 750 RISCV::SRLIW, DL, VT, N0->getOperand(0), 751 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 752 SDNode *SLLI = CurDAG->getMachineNode( 753 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 754 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT)); 755 ReplaceNode(Node, SLLI); 756 return; 757 } 758 } 759 760 // Optimize (srl (and X, C2), C) -> 761 // (srli (slli X, (XLen-C3), (XLen-C3) + C) 762 // Where C2 is a mask with C3 trailing ones. 763 // Taking into account that the C2 may have had lower bits unset by 764 // SimplifyDemandedBits. This avoids materializing the C2 immediate. 765 // This pattern occurs when type legalizing right shifts for types with 766 // less than XLen bits. 767 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 768 if (!isMask_64(Mask)) 769 break; 770 unsigned TrailingOnes = countTrailingOnes(Mask); 771 // 32 trailing ones should use srliw via tablegen pattern. 772 if (TrailingOnes == 32 || ShAmt >= TrailingOnes) 773 break; 774 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; 775 SDNode *SLLI = 776 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 777 CurDAG->getTargetConstant(LShAmt, DL, VT)); 778 SDNode *SRLI = CurDAG->getMachineNode( 779 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 780 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 781 ReplaceNode(Node, SRLI); 782 return; 783 } 784 case ISD::SRA: { 785 // Optimize (sra (sext_inreg X, i16), C) -> 786 // (srai (slli X, (XLen-16), (XLen-16) + C) 787 // And (sra (sext_inreg X, i8), C) -> 788 // (srai (slli X, (XLen-8), (XLen-8) + C) 789 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. 790 // This transform matches the code we get without Zbb. The shifts are more 791 // compressible, and this can help expose CSE opportunities in the sdiv by 792 // constant optimization. 793 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 794 if (!N1C) 795 break; 796 SDValue N0 = Node->getOperand(0); 797 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) 798 break; 799 unsigned ShAmt = N1C->getZExtValue(); 800 unsigned ExtSize = 801 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 802 // ExtSize of 32 should use sraiw via tablegen pattern. 803 if (ExtSize >= 32 || ShAmt >= ExtSize) 804 break; 805 unsigned LShAmt = Subtarget->getXLen() - ExtSize; 806 SDNode *SLLI = 807 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 808 CurDAG->getTargetConstant(LShAmt, DL, VT)); 809 SDNode *SRAI = CurDAG->getMachineNode( 810 RISCV::SRAI, DL, VT, SDValue(SLLI, 0), 811 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 812 ReplaceNode(Node, SRAI); 813 return; 814 } 815 case ISD::AND: { 816 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 817 if (!N1C) 818 break; 819 820 SDValue N0 = Node->getOperand(0); 821 822 bool LeftShift = N0.getOpcode() == ISD::SHL; 823 if (!LeftShift && N0.getOpcode() != ISD::SRL) 824 break; 825 826 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 827 if (!C) 828 break; 829 uint64_t C2 = C->getZExtValue(); 830 unsigned XLen = Subtarget->getXLen(); 831 if (!C2 || C2 >= XLen) 832 break; 833 834 uint64_t C1 = N1C->getZExtValue(); 835 836 // Keep track of whether this is a c.andi. If we can't use c.andi, the 837 // shift pair might offer more compression opportunities. 838 // TODO: We could check for C extension here, but we don't have many lit 839 // tests with the C extension enabled so not checking gets better coverage. 840 // TODO: What if ANDI faster than shift? 841 bool IsCANDI = isInt<6>(N1C->getSExtValue()); 842 843 // Clear irrelevant bits in the mask. 844 if (LeftShift) 845 C1 &= maskTrailingZeros<uint64_t>(C2); 846 else 847 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 848 849 // Some transforms should only be done if the shift has a single use or 850 // the AND would become (srli (slli X, 32), 32) 851 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 852 853 SDValue X = N0.getOperand(0); 854 855 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 856 // with c3 leading zeros. 857 if (!LeftShift && isMask_64(C1)) { 858 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 859 if (C2 < C3) { 860 // If the number of leading zeros is C2+32 this can be SRLIW. 861 if (C2 + 32 == C3) { 862 SDNode *SRLIW = CurDAG->getMachineNode( 863 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); 864 ReplaceNode(Node, SRLIW); 865 return; 866 } 867 868 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if 869 // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 870 // 871 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 872 // legalized and goes through DAG combine. 873 if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() && 874 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 875 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { 876 SDNode *SRAIW = 877 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0), 878 CurDAG->getTargetConstant(31, DL, VT)); 879 SDNode *SRLIW = CurDAG->getMachineNode( 880 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0), 881 CurDAG->getTargetConstant(C3 - 32, DL, VT)); 882 ReplaceNode(Node, SRLIW); 883 return; 884 } 885 886 // (srli (slli x, c3-c2), c3). 887 // Skip if we could use (zext.w (sraiw X, C2)). 888 bool Skip = Subtarget->hasStdExtZba() && C3 == 32 && 889 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 890 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32; 891 // Also Skip if we can use bexti. 892 Skip |= Subtarget->hasStdExtZbs() && C3 == XLen - 1; 893 if (OneUseOrZExtW && !Skip) { 894 SDNode *SLLI = CurDAG->getMachineNode( 895 RISCV::SLLI, DL, VT, X, 896 CurDAG->getTargetConstant(C3 - C2, DL, VT)); 897 SDNode *SRLI = 898 CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 899 CurDAG->getTargetConstant(C3, DL, VT)); 900 ReplaceNode(Node, SRLI); 901 return; 902 } 903 } 904 } 905 906 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 907 // shifted by c2 bits with c3 leading zeros. 908 if (LeftShift && isShiftedMask_64(C1)) { 909 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 910 911 if (C2 + C3 < XLen && 912 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) { 913 // Use slli.uw when possible. 914 if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) { 915 SDNode *SLLI_UW = CurDAG->getMachineNode( 916 RISCV::SLLI_UW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); 917 ReplaceNode(Node, SLLI_UW); 918 return; 919 } 920 921 // (srli (slli c2+c3), c3) 922 if (OneUseOrZExtW && !IsCANDI) { 923 SDNode *SLLI = CurDAG->getMachineNode( 924 RISCV::SLLI, DL, VT, X, 925 CurDAG->getTargetConstant(C2 + C3, DL, VT)); 926 SDNode *SRLI = 927 CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 928 CurDAG->getTargetConstant(C3, DL, VT)); 929 ReplaceNode(Node, SRLI); 930 return; 931 } 932 } 933 } 934 935 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 936 // shifted mask with c2 leading zeros and c3 trailing zeros. 937 if (!LeftShift && isShiftedMask_64(C1)) { 938 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 939 uint64_t C3 = countTrailingZeros(C1); 940 if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsCANDI) { 941 unsigned SrliOpc = RISCV::SRLI; 942 // If the input is zexti32 we should use SRLIW. 943 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) && 944 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) { 945 SrliOpc = RISCV::SRLIW; 946 X = X.getOperand(0); 947 } 948 SDNode *SRLI = CurDAG->getMachineNode( 949 SrliOpc, DL, VT, X, CurDAG->getTargetConstant(C2 + C3, DL, VT)); 950 SDNode *SLLI = 951 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 952 CurDAG->getTargetConstant(C3, DL, VT)); 953 ReplaceNode(Node, SLLI); 954 return; 955 } 956 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 957 if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 && 958 OneUseOrZExtW && !IsCANDI) { 959 SDNode *SRLIW = 960 CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X, 961 CurDAG->getTargetConstant(C2 + C3, DL, VT)); 962 SDNode *SLLI = 963 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 964 CurDAG->getTargetConstant(C3, DL, VT)); 965 ReplaceNode(Node, SLLI); 966 return; 967 } 968 } 969 970 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 971 // shifted mask with no leading zeros and c3 trailing zeros. 972 if (LeftShift && isShiftedMask_64(C1)) { 973 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 974 uint64_t C3 = countTrailingZeros(C1); 975 if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsCANDI) { 976 SDNode *SRLI = CurDAG->getMachineNode( 977 RISCV::SRLI, DL, VT, X, CurDAG->getTargetConstant(C3 - C2, DL, VT)); 978 SDNode *SLLI = 979 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 980 CurDAG->getTargetConstant(C3, DL, VT)); 981 ReplaceNode(Node, SLLI); 982 return; 983 } 984 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 985 if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { 986 SDNode *SRLIW = 987 CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X, 988 CurDAG->getTargetConstant(C3 - C2, DL, VT)); 989 SDNode *SLLI = 990 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 991 CurDAG->getTargetConstant(C3, DL, VT)); 992 ReplaceNode(Node, SLLI); 993 return; 994 } 995 } 996 997 break; 998 } 999 case ISD::MUL: { 1000 // Special case for calculating (mul (and X, C2), C1) where the full product 1001 // fits in XLen bits. We can shift X left by the number of leading zeros in 1002 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 1003 // product has XLen trailing zeros, putting it in the output of MULHU. This 1004 // can avoid materializing a constant in a register for C2. 1005 1006 // RHS should be a constant. 1007 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1008 if (!N1C || !N1C->hasOneUse()) 1009 break; 1010 1011 // LHS should be an AND with constant. 1012 SDValue N0 = Node->getOperand(0); 1013 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 1014 break; 1015 1016 uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 1017 1018 // Constant should be a mask. 1019 if (!isMask_64(C2)) 1020 break; 1021 1022 // This should be the only use of the AND unless we will use 1023 // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND 1024 // constants. 1025 if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF)) 1026 break; 1027 1028 // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this 1029 // optimization. 1030 if (isInt<12>(C2) || 1031 (C2 == UINT64_C(0xFFFF) && 1032 (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) || 1033 (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())) 1034 break; 1035 1036 // We need to shift left the AND input and C1 by a total of XLen bits. 1037 1038 // How far left do we need to shift the AND input? 1039 unsigned XLen = Subtarget->getXLen(); 1040 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2)); 1041 1042 // The constant gets shifted by the remaining amount unless that would 1043 // shift bits out. 1044 uint64_t C1 = N1C->getZExtValue(); 1045 unsigned ConstantShift = XLen - LeadingZeros; 1046 if (ConstantShift > (XLen - (64 - countLeadingZeros(C1)))) 1047 break; 1048 1049 uint64_t ShiftedC1 = C1 << ConstantShift; 1050 // If this RV32, we need to sign extend the constant. 1051 if (XLen == 32) 1052 ShiftedC1 = SignExtend64<32>(ShiftedC1); 1053 1054 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 1055 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); 1056 SDNode *SLLI = 1057 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 1058 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 1059 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 1060 SDValue(SLLI, 0), SDValue(Imm, 0)); 1061 ReplaceNode(Node, MULHU); 1062 return; 1063 } 1064 case ISD::INTRINSIC_WO_CHAIN: { 1065 unsigned IntNo = Node->getConstantOperandVal(0); 1066 switch (IntNo) { 1067 // By default we do not custom select any intrinsic. 1068 default: 1069 break; 1070 case Intrinsic::riscv_vmsgeu: 1071 case Intrinsic::riscv_vmsge: { 1072 SDValue Src1 = Node->getOperand(1); 1073 SDValue Src2 = Node->getOperand(2); 1074 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 1075 bool IsCmpUnsignedZero = false; 1076 // Only custom select scalar second operand. 1077 if (Src2.getValueType() != XLenVT) 1078 break; 1079 // Small constants are handled with patterns. 1080 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1081 int64_t CVal = C->getSExtValue(); 1082 if (CVal >= -15 && CVal <= 16) { 1083 if (!IsUnsigned || CVal != 0) 1084 break; 1085 IsCmpUnsignedZero = true; 1086 } 1087 } 1088 MVT Src1VT = Src1.getSimpleValueType(); 1089 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; 1090 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1091 default: 1092 llvm_unreachable("Unexpected LMUL!"); 1093 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 1094 case RISCVII::VLMUL::lmulenum: \ 1095 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1096 : RISCV::PseudoVMSLT_VX_##suffix; \ 1097 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 1098 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 1099 break; 1100 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1) 1101 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2) 1102 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4) 1103 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8) 1104 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16) 1105 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32) 1106 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64) 1107 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES 1108 } 1109 SDValue SEW = CurDAG->getTargetConstant( 1110 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1111 SDValue VL; 1112 selectVLOp(Node->getOperand(3), VL); 1113 1114 // If vmsgeu with 0 immediate, expand it to vmset. 1115 if (IsCmpUnsignedZero) { 1116 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW)); 1117 return; 1118 } 1119 1120 // Expand to 1121 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 1122 SDValue Cmp = SDValue( 1123 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1124 0); 1125 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 1126 {Cmp, Cmp, VL, SEW})); 1127 return; 1128 } 1129 case Intrinsic::riscv_vmsgeu_mask: 1130 case Intrinsic::riscv_vmsge_mask: { 1131 SDValue Src1 = Node->getOperand(2); 1132 SDValue Src2 = Node->getOperand(3); 1133 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 1134 bool IsCmpUnsignedZero = false; 1135 // Only custom select scalar second operand. 1136 if (Src2.getValueType() != XLenVT) 1137 break; 1138 // Small constants are handled with patterns. 1139 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1140 int64_t CVal = C->getSExtValue(); 1141 if (CVal >= -15 && CVal <= 16) { 1142 if (!IsUnsigned || CVal != 0) 1143 break; 1144 IsCmpUnsignedZero = true; 1145 } 1146 } 1147 MVT Src1VT = Src1.getSimpleValueType(); 1148 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, 1149 VMOROpcode; 1150 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1151 default: 1152 llvm_unreachable("Unexpected LMUL!"); 1153 #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \ 1154 case RISCVII::VLMUL::lmulenum: \ 1155 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1156 : RISCV::PseudoVMSLT_VX_##suffix; \ 1157 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 1158 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 1159 break; 1160 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1) 1161 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2) 1162 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4) 1163 CASE_VMSLT_OPCODES(LMUL_1, M1, B8) 1164 CASE_VMSLT_OPCODES(LMUL_2, M2, B16) 1165 CASE_VMSLT_OPCODES(LMUL_4, M4, B32) 1166 CASE_VMSLT_OPCODES(LMUL_8, M8, B64) 1167 #undef CASE_VMSLT_OPCODES 1168 } 1169 // Mask operations use the LMUL from the mask type. 1170 switch (RISCVTargetLowering::getLMUL(VT)) { 1171 default: 1172 llvm_unreachable("Unexpected LMUL!"); 1173 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ 1174 case RISCVII::VLMUL::lmulenum: \ 1175 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 1176 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 1177 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ 1178 break; 1179 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8) 1180 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4) 1181 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2) 1182 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1) 1183 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2) 1184 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4) 1185 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8) 1186 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES 1187 } 1188 SDValue SEW = CurDAG->getTargetConstant( 1189 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1190 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 1191 SDValue VL; 1192 selectVLOp(Node->getOperand(5), VL); 1193 SDValue MaskedOff = Node->getOperand(1); 1194 SDValue Mask = Node->getOperand(4); 1195 1196 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff. 1197 if (IsCmpUnsignedZero) { 1198 // We don't need vmor if the MaskedOff and the Mask are the same 1199 // value. 1200 if (Mask == MaskedOff) { 1201 ReplaceUses(Node, Mask.getNode()); 1202 return; 1203 } 1204 ReplaceNode(Node, 1205 CurDAG->getMachineNode(VMOROpcode, DL, VT, 1206 {Mask, MaskedOff, VL, MaskSEW})); 1207 return; 1208 } 1209 1210 // If the MaskedOff value and the Mask are the same value use 1211 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 1212 // This avoids needing to copy v0 to vd before starting the next sequence. 1213 if (Mask == MaskedOff) { 1214 SDValue Cmp = SDValue( 1215 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1216 0); 1217 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 1218 {Mask, Cmp, VL, MaskSEW})); 1219 return; 1220 } 1221 1222 // Mask needs to be copied to V0. 1223 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1224 RISCV::V0, Mask, SDValue()); 1225 SDValue Glue = Chain.getValue(1); 1226 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); 1227 1228 // Otherwise use 1229 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 1230 // The result is mask undisturbed. 1231 // We use the same instructions to emulate mask agnostic behavior, because 1232 // the agnostic result can be either undisturbed or all 1. 1233 SDValue Cmp = SDValue( 1234 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 1235 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 1236 0); 1237 // vmxor.mm vd, vd, v0 is used to update active value. 1238 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 1239 {Cmp, Mask, VL, MaskSEW})); 1240 return; 1241 } 1242 case Intrinsic::riscv_vsetvli_opt: 1243 case Intrinsic::riscv_vsetvlimax_opt: 1244 return selectVSETVLI(Node); 1245 } 1246 break; 1247 } 1248 case ISD::INTRINSIC_W_CHAIN: { 1249 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1250 switch (IntNo) { 1251 // By default we do not custom select any intrinsic. 1252 default: 1253 break; 1254 case Intrinsic::riscv_vsetvli: 1255 case Intrinsic::riscv_vsetvlimax: 1256 return selectVSETVLI(Node); 1257 case Intrinsic::riscv_vlseg2: 1258 case Intrinsic::riscv_vlseg3: 1259 case Intrinsic::riscv_vlseg4: 1260 case Intrinsic::riscv_vlseg5: 1261 case Intrinsic::riscv_vlseg6: 1262 case Intrinsic::riscv_vlseg7: 1263 case Intrinsic::riscv_vlseg8: { 1264 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1265 return; 1266 } 1267 case Intrinsic::riscv_vlseg2_mask: 1268 case Intrinsic::riscv_vlseg3_mask: 1269 case Intrinsic::riscv_vlseg4_mask: 1270 case Intrinsic::riscv_vlseg5_mask: 1271 case Intrinsic::riscv_vlseg6_mask: 1272 case Intrinsic::riscv_vlseg7_mask: 1273 case Intrinsic::riscv_vlseg8_mask: { 1274 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1275 return; 1276 } 1277 case Intrinsic::riscv_vlsseg2: 1278 case Intrinsic::riscv_vlsseg3: 1279 case Intrinsic::riscv_vlsseg4: 1280 case Intrinsic::riscv_vlsseg5: 1281 case Intrinsic::riscv_vlsseg6: 1282 case Intrinsic::riscv_vlsseg7: 1283 case Intrinsic::riscv_vlsseg8: { 1284 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1285 return; 1286 } 1287 case Intrinsic::riscv_vlsseg2_mask: 1288 case Intrinsic::riscv_vlsseg3_mask: 1289 case Intrinsic::riscv_vlsseg4_mask: 1290 case Intrinsic::riscv_vlsseg5_mask: 1291 case Intrinsic::riscv_vlsseg6_mask: 1292 case Intrinsic::riscv_vlsseg7_mask: 1293 case Intrinsic::riscv_vlsseg8_mask: { 1294 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1295 return; 1296 } 1297 case Intrinsic::riscv_vloxseg2: 1298 case Intrinsic::riscv_vloxseg3: 1299 case Intrinsic::riscv_vloxseg4: 1300 case Intrinsic::riscv_vloxseg5: 1301 case Intrinsic::riscv_vloxseg6: 1302 case Intrinsic::riscv_vloxseg7: 1303 case Intrinsic::riscv_vloxseg8: 1304 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1305 return; 1306 case Intrinsic::riscv_vluxseg2: 1307 case Intrinsic::riscv_vluxseg3: 1308 case Intrinsic::riscv_vluxseg4: 1309 case Intrinsic::riscv_vluxseg5: 1310 case Intrinsic::riscv_vluxseg6: 1311 case Intrinsic::riscv_vluxseg7: 1312 case Intrinsic::riscv_vluxseg8: 1313 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1314 return; 1315 case Intrinsic::riscv_vloxseg2_mask: 1316 case Intrinsic::riscv_vloxseg3_mask: 1317 case Intrinsic::riscv_vloxseg4_mask: 1318 case Intrinsic::riscv_vloxseg5_mask: 1319 case Intrinsic::riscv_vloxseg6_mask: 1320 case Intrinsic::riscv_vloxseg7_mask: 1321 case Intrinsic::riscv_vloxseg8_mask: 1322 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1323 return; 1324 case Intrinsic::riscv_vluxseg2_mask: 1325 case Intrinsic::riscv_vluxseg3_mask: 1326 case Intrinsic::riscv_vluxseg4_mask: 1327 case Intrinsic::riscv_vluxseg5_mask: 1328 case Intrinsic::riscv_vluxseg6_mask: 1329 case Intrinsic::riscv_vluxseg7_mask: 1330 case Intrinsic::riscv_vluxseg8_mask: 1331 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1332 return; 1333 case Intrinsic::riscv_vlseg8ff: 1334 case Intrinsic::riscv_vlseg7ff: 1335 case Intrinsic::riscv_vlseg6ff: 1336 case Intrinsic::riscv_vlseg5ff: 1337 case Intrinsic::riscv_vlseg4ff: 1338 case Intrinsic::riscv_vlseg3ff: 1339 case Intrinsic::riscv_vlseg2ff: { 1340 selectVLSEGFF(Node, /*IsMasked*/ false); 1341 return; 1342 } 1343 case Intrinsic::riscv_vlseg8ff_mask: 1344 case Intrinsic::riscv_vlseg7ff_mask: 1345 case Intrinsic::riscv_vlseg6ff_mask: 1346 case Intrinsic::riscv_vlseg5ff_mask: 1347 case Intrinsic::riscv_vlseg4ff_mask: 1348 case Intrinsic::riscv_vlseg3ff_mask: 1349 case Intrinsic::riscv_vlseg2ff_mask: { 1350 selectVLSEGFF(Node, /*IsMasked*/ true); 1351 return; 1352 } 1353 case Intrinsic::riscv_vloxei: 1354 case Intrinsic::riscv_vloxei_mask: 1355 case Intrinsic::riscv_vluxei: 1356 case Intrinsic::riscv_vluxei_mask: { 1357 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 1358 IntNo == Intrinsic::riscv_vluxei_mask; 1359 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 1360 IntNo == Intrinsic::riscv_vloxei_mask; 1361 1362 MVT VT = Node->getSimpleValueType(0); 1363 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1364 1365 unsigned CurOp = 2; 1366 // Masked intrinsic only have TU version pseduo instructions. 1367 bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); 1368 SmallVector<SDValue, 8> Operands; 1369 if (IsTU) 1370 Operands.push_back(Node->getOperand(CurOp++)); 1371 else 1372 // Skip the undef passthru operand for nomask TA version pseudo 1373 CurOp++; 1374 1375 MVT IndexVT; 1376 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1377 /*IsStridedOrIndexed*/ true, Operands, 1378 /*IsLoad=*/true, &IndexVT); 1379 1380 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1381 "Element count mismatch"); 1382 1383 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1384 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1385 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1386 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1387 report_fatal_error("The V extension does not support EEW=64 for index " 1388 "values when XLEN=32"); 1389 } 1390 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 1391 IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1392 static_cast<unsigned>(IndexLMUL)); 1393 MachineSDNode *Load = 1394 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1395 1396 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1397 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1398 1399 ReplaceNode(Node, Load); 1400 return; 1401 } 1402 case Intrinsic::riscv_vlm: 1403 case Intrinsic::riscv_vle: 1404 case Intrinsic::riscv_vle_mask: 1405 case Intrinsic::riscv_vlse: 1406 case Intrinsic::riscv_vlse_mask: { 1407 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 1408 IntNo == Intrinsic::riscv_vlse_mask; 1409 bool IsStrided = 1410 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 1411 1412 MVT VT = Node->getSimpleValueType(0); 1413 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1414 1415 unsigned CurOp = 2; 1416 // The riscv_vlm intrinsic are always tail agnostic and no passthru operand. 1417 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; 1418 // Masked intrinsic only have TU version pseduo instructions. 1419 bool IsTU = 1420 HasPassthruOperand && 1421 ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked); 1422 SmallVector<SDValue, 8> Operands; 1423 if (IsTU) 1424 Operands.push_back(Node->getOperand(CurOp++)); 1425 else if (HasPassthruOperand) 1426 // Skip the undef passthru operand for nomask TA version pseudo 1427 CurOp++; 1428 1429 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1430 Operands, /*IsLoad=*/true); 1431 1432 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1433 const RISCV::VLEPseudo *P = 1434 RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 1435 static_cast<unsigned>(LMUL)); 1436 MachineSDNode *Load = 1437 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1438 1439 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1440 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1441 1442 ReplaceNode(Node, Load); 1443 return; 1444 } 1445 case Intrinsic::riscv_vleff: 1446 case Intrinsic::riscv_vleff_mask: { 1447 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 1448 1449 MVT VT = Node->getSimpleValueType(0); 1450 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1451 1452 unsigned CurOp = 2; 1453 // Masked intrinsic only have TU version pseduo instructions. 1454 bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); 1455 SmallVector<SDValue, 7> Operands; 1456 if (IsTU) 1457 Operands.push_back(Node->getOperand(CurOp++)); 1458 else 1459 // Skip the undef passthru operand for nomask TA version pseudo 1460 CurOp++; 1461 1462 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1463 /*IsStridedOrIndexed*/ false, Operands, 1464 /*IsLoad=*/true); 1465 1466 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1467 const RISCV::VLEPseudo *P = 1468 RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 1469 Log2SEW, static_cast<unsigned>(LMUL)); 1470 MachineSDNode *Load = CurDAG->getMachineNode( 1471 P->Pseudo, DL, Node->getVTList(), Operands); 1472 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1473 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1474 1475 ReplaceNode(Node, Load); 1476 return; 1477 } 1478 } 1479 break; 1480 } 1481 case ISD::INTRINSIC_VOID: { 1482 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1483 switch (IntNo) { 1484 case Intrinsic::riscv_vsseg2: 1485 case Intrinsic::riscv_vsseg3: 1486 case Intrinsic::riscv_vsseg4: 1487 case Intrinsic::riscv_vsseg5: 1488 case Intrinsic::riscv_vsseg6: 1489 case Intrinsic::riscv_vsseg7: 1490 case Intrinsic::riscv_vsseg8: { 1491 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1492 return; 1493 } 1494 case Intrinsic::riscv_vsseg2_mask: 1495 case Intrinsic::riscv_vsseg3_mask: 1496 case Intrinsic::riscv_vsseg4_mask: 1497 case Intrinsic::riscv_vsseg5_mask: 1498 case Intrinsic::riscv_vsseg6_mask: 1499 case Intrinsic::riscv_vsseg7_mask: 1500 case Intrinsic::riscv_vsseg8_mask: { 1501 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1502 return; 1503 } 1504 case Intrinsic::riscv_vssseg2: 1505 case Intrinsic::riscv_vssseg3: 1506 case Intrinsic::riscv_vssseg4: 1507 case Intrinsic::riscv_vssseg5: 1508 case Intrinsic::riscv_vssseg6: 1509 case Intrinsic::riscv_vssseg7: 1510 case Intrinsic::riscv_vssseg8: { 1511 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1512 return; 1513 } 1514 case Intrinsic::riscv_vssseg2_mask: 1515 case Intrinsic::riscv_vssseg3_mask: 1516 case Intrinsic::riscv_vssseg4_mask: 1517 case Intrinsic::riscv_vssseg5_mask: 1518 case Intrinsic::riscv_vssseg6_mask: 1519 case Intrinsic::riscv_vssseg7_mask: 1520 case Intrinsic::riscv_vssseg8_mask: { 1521 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1522 return; 1523 } 1524 case Intrinsic::riscv_vsoxseg2: 1525 case Intrinsic::riscv_vsoxseg3: 1526 case Intrinsic::riscv_vsoxseg4: 1527 case Intrinsic::riscv_vsoxseg5: 1528 case Intrinsic::riscv_vsoxseg6: 1529 case Intrinsic::riscv_vsoxseg7: 1530 case Intrinsic::riscv_vsoxseg8: 1531 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1532 return; 1533 case Intrinsic::riscv_vsuxseg2: 1534 case Intrinsic::riscv_vsuxseg3: 1535 case Intrinsic::riscv_vsuxseg4: 1536 case Intrinsic::riscv_vsuxseg5: 1537 case Intrinsic::riscv_vsuxseg6: 1538 case Intrinsic::riscv_vsuxseg7: 1539 case Intrinsic::riscv_vsuxseg8: 1540 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1541 return; 1542 case Intrinsic::riscv_vsoxseg2_mask: 1543 case Intrinsic::riscv_vsoxseg3_mask: 1544 case Intrinsic::riscv_vsoxseg4_mask: 1545 case Intrinsic::riscv_vsoxseg5_mask: 1546 case Intrinsic::riscv_vsoxseg6_mask: 1547 case Intrinsic::riscv_vsoxseg7_mask: 1548 case Intrinsic::riscv_vsoxseg8_mask: 1549 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1550 return; 1551 case Intrinsic::riscv_vsuxseg2_mask: 1552 case Intrinsic::riscv_vsuxseg3_mask: 1553 case Intrinsic::riscv_vsuxseg4_mask: 1554 case Intrinsic::riscv_vsuxseg5_mask: 1555 case Intrinsic::riscv_vsuxseg6_mask: 1556 case Intrinsic::riscv_vsuxseg7_mask: 1557 case Intrinsic::riscv_vsuxseg8_mask: 1558 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1559 return; 1560 case Intrinsic::riscv_vsoxei: 1561 case Intrinsic::riscv_vsoxei_mask: 1562 case Intrinsic::riscv_vsuxei: 1563 case Intrinsic::riscv_vsuxei_mask: { 1564 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 1565 IntNo == Intrinsic::riscv_vsuxei_mask; 1566 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 1567 IntNo == Intrinsic::riscv_vsoxei_mask; 1568 1569 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1570 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1571 1572 unsigned CurOp = 2; 1573 SmallVector<SDValue, 8> Operands; 1574 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1575 1576 MVT IndexVT; 1577 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1578 /*IsStridedOrIndexed*/ true, Operands, 1579 /*IsLoad=*/false, &IndexVT); 1580 1581 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1582 "Element count mismatch"); 1583 1584 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1585 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1586 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1587 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1588 report_fatal_error("The V extension does not support EEW=64 for index " 1589 "values when XLEN=32"); 1590 } 1591 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 1592 IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW, 1593 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); 1594 MachineSDNode *Store = 1595 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1596 1597 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1598 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1599 1600 ReplaceNode(Node, Store); 1601 return; 1602 } 1603 case Intrinsic::riscv_vsm: 1604 case Intrinsic::riscv_vse: 1605 case Intrinsic::riscv_vse_mask: 1606 case Intrinsic::riscv_vsse: 1607 case Intrinsic::riscv_vsse_mask: { 1608 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 1609 IntNo == Intrinsic::riscv_vsse_mask; 1610 bool IsStrided = 1611 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 1612 1613 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1614 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1615 1616 unsigned CurOp = 2; 1617 SmallVector<SDValue, 8> Operands; 1618 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1619 1620 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1621 Operands); 1622 1623 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1624 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 1625 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 1626 MachineSDNode *Store = 1627 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1628 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1629 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1630 1631 ReplaceNode(Node, Store); 1632 return; 1633 } 1634 } 1635 break; 1636 } 1637 case ISD::BITCAST: { 1638 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 1639 // Just drop bitcasts between vectors if both are fixed or both are 1640 // scalable. 1641 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 1642 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 1643 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 1644 CurDAG->RemoveDeadNode(Node); 1645 return; 1646 } 1647 break; 1648 } 1649 case ISD::INSERT_SUBVECTOR: { 1650 SDValue V = Node->getOperand(0); 1651 SDValue SubV = Node->getOperand(1); 1652 SDLoc DL(SubV); 1653 auto Idx = Node->getConstantOperandVal(2); 1654 MVT SubVecVT = SubV.getSimpleValueType(); 1655 1656 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1657 MVT SubVecContainerVT = SubVecVT; 1658 // Establish the correct scalable-vector types for any fixed-length type. 1659 if (SubVecVT.isFixedLengthVector()) 1660 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 1661 if (VT.isFixedLengthVector()) 1662 VT = TLI.getContainerForFixedLengthVector(VT); 1663 1664 const auto *TRI = Subtarget->getRegisterInfo(); 1665 unsigned SubRegIdx; 1666 std::tie(SubRegIdx, Idx) = 1667 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1668 VT, SubVecContainerVT, Idx, TRI); 1669 1670 // If the Idx hasn't been completely eliminated then this is a subvector 1671 // insert which doesn't naturally align to a vector register. These must 1672 // be handled using instructions to manipulate the vector registers. 1673 if (Idx != 0) 1674 break; 1675 1676 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); 1677 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 1678 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 1679 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 1680 (void)IsSubVecPartReg; // Silence unused variable warning without asserts. 1681 assert((!IsSubVecPartReg || V.isUndef()) && 1682 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 1683 "the subvector is smaller than a full-sized register"); 1684 1685 // If we haven't set a SubRegIdx, then we must be going between 1686 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 1687 if (SubRegIdx == RISCV::NoSubRegister) { 1688 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT); 1689 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1690 InRegClassID && 1691 "Unexpected subvector extraction"); 1692 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1693 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 1694 DL, VT, SubV, RC); 1695 ReplaceNode(Node, NewNode); 1696 return; 1697 } 1698 1699 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 1700 ReplaceNode(Node, Insert.getNode()); 1701 return; 1702 } 1703 case ISD::EXTRACT_SUBVECTOR: { 1704 SDValue V = Node->getOperand(0); 1705 auto Idx = Node->getConstantOperandVal(1); 1706 MVT InVT = V.getSimpleValueType(); 1707 SDLoc DL(V); 1708 1709 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1710 MVT SubVecContainerVT = VT; 1711 // Establish the correct scalable-vector types for any fixed-length type. 1712 if (VT.isFixedLengthVector()) 1713 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 1714 if (InVT.isFixedLengthVector()) 1715 InVT = TLI.getContainerForFixedLengthVector(InVT); 1716 1717 const auto *TRI = Subtarget->getRegisterInfo(); 1718 unsigned SubRegIdx; 1719 std::tie(SubRegIdx, Idx) = 1720 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1721 InVT, SubVecContainerVT, Idx, TRI); 1722 1723 // If the Idx hasn't been completely eliminated then this is a subvector 1724 // extract which doesn't naturally align to a vector register. These must 1725 // be handled using instructions to manipulate the vector registers. 1726 if (Idx != 0) 1727 break; 1728 1729 // If we haven't set a SubRegIdx, then we must be going between 1730 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 1731 if (SubRegIdx == RISCV::NoSubRegister) { 1732 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 1733 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1734 InRegClassID && 1735 "Unexpected subvector extraction"); 1736 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1737 SDNode *NewNode = 1738 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 1739 ReplaceNode(Node, NewNode); 1740 return; 1741 } 1742 1743 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 1744 ReplaceNode(Node, Extract.getNode()); 1745 return; 1746 } 1747 case ISD::SPLAT_VECTOR: 1748 case RISCVISD::VMV_S_X_VL: 1749 case RISCVISD::VFMV_S_F_VL: 1750 case RISCVISD::VMV_V_X_VL: 1751 case RISCVISD::VFMV_V_F_VL: { 1752 // Try to match splat of a scalar load to a strided load with stride of x0. 1753 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || 1754 Node->getOpcode() == RISCVISD::VFMV_S_F_VL; 1755 bool HasPassthruOperand = Node->getOpcode() != ISD::SPLAT_VECTOR; 1756 if (HasPassthruOperand && !Node->getOperand(0).isUndef()) 1757 break; 1758 SDValue Src = HasPassthruOperand ? Node->getOperand(1) : Node->getOperand(0); 1759 auto *Ld = dyn_cast<LoadSDNode>(Src); 1760 if (!Ld) 1761 break; 1762 EVT MemVT = Ld->getMemoryVT(); 1763 // The memory VT should be the same size as the element type. 1764 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 1765 break; 1766 if (!IsProfitableToFold(Src, Node, Node) || 1767 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 1768 break; 1769 1770 SDValue VL; 1771 if (Node->getOpcode() == ISD::SPLAT_VECTOR) 1772 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); 1773 else if (IsScalarMove) { 1774 // We could deal with more VL if we update the VSETVLI insert pass to 1775 // avoid introducing more VSETVLI. 1776 if (!isOneConstant(Node->getOperand(2))) 1777 break; 1778 selectVLOp(Node->getOperand(2), VL); 1779 } else 1780 selectVLOp(Node->getOperand(2), VL); 1781 1782 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1783 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 1784 1785 SDValue Operands[] = {Ld->getBasePtr(), 1786 CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW, 1787 Ld->getChain()}; 1788 1789 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1790 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 1791 /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false, 1792 Log2SEW, static_cast<unsigned>(LMUL)); 1793 MachineSDNode *Load = 1794 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1795 1796 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()}); 1797 1798 ReplaceNode(Node, Load); 1799 return; 1800 } 1801 } 1802 1803 // Select the default instruction. 1804 SelectCode(Node); 1805 } 1806 1807 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 1808 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 1809 switch (ConstraintID) { 1810 case InlineAsm::Constraint_m: 1811 // We just support simple memory operands that have a single address 1812 // operand and need no special handling. 1813 OutOps.push_back(Op); 1814 return false; 1815 case InlineAsm::Constraint_A: 1816 OutOps.push_back(Op); 1817 return false; 1818 default: 1819 break; 1820 } 1821 1822 return true; 1823 } 1824 1825 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base, 1826 SDValue &Offset) { 1827 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 1828 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1829 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT()); 1830 return true; 1831 } 1832 1833 return false; 1834 } 1835 1836 // Select a frame index and an optional immediate offset from an ADD or OR. 1837 bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, 1838 SDValue &Offset) { 1839 if (SelectAddrFrameIndex(Addr, Base, Offset)) 1840 return true; 1841 1842 if (!CurDAG->isBaseWithConstantOffset(Addr)) 1843 return false; 1844 1845 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { 1846 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 1847 if (isInt<12>(CVal)) { 1848 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), 1849 Subtarget->getXLenVT()); 1850 Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr), 1851 Subtarget->getXLenVT()); 1852 return true; 1853 } 1854 } 1855 1856 return false; 1857 } 1858 1859 bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { 1860 // If this is FrameIndex, select it directly. Otherwise just let it get 1861 // selected to a register independently. 1862 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) 1863 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1864 else 1865 Base = Addr; 1866 return true; 1867 } 1868 1869 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, 1870 SDValue &Offset) { 1871 if (SelectAddrFrameIndex(Addr, Base, Offset)) 1872 return true; 1873 1874 SDLoc DL(Addr); 1875 MVT VT = Addr.getSimpleValueType(); 1876 1877 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1878 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 1879 if (isInt<12>(CVal)) { 1880 Base = Addr.getOperand(0); 1881 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base)) 1882 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); 1883 Offset = CurDAG->getTargetConstant(CVal, DL, VT); 1884 return true; 1885 } 1886 } 1887 1888 // Handle ADD with large immediates. 1889 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) { 1890 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 1891 assert(!isInt<12>(CVal) && "simm12 not already handled?"); 1892 1893 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) { 1894 // We can use an ADDI for part of the offset and fold the rest into the 1895 // load/store. This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. 1896 int64_t Adj = CVal < 0 ? -2048 : 2047; 1897 Base = SDValue( 1898 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0), 1899 CurDAG->getTargetConstant(Adj, DL, VT)), 1900 0); 1901 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT); 1902 return true; 1903 } 1904 } 1905 1906 Base = Addr; 1907 Offset = CurDAG->getTargetConstant(0, DL, VT); 1908 return true; 1909 } 1910 1911 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 1912 SDValue &ShAmt) { 1913 // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift 1914 // amount. If there is an AND on the shift amount, we can bypass it if it 1915 // doesn't affect any of those bits. 1916 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 1917 const APInt &AndMask = N->getConstantOperandAPInt(1); 1918 1919 // Since the max shift amount is a power of 2 we can subtract 1 to make a 1920 // mask that covers the bits needed to represent all shift amounts. 1921 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 1922 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 1923 1924 if (ShMask.isSubsetOf(AndMask)) { 1925 ShAmt = N.getOperand(0); 1926 return true; 1927 } 1928 1929 // SimplifyDemandedBits may have optimized the mask so try restoring any 1930 // bits that are known zero. 1931 KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0)); 1932 if (ShMask.isSubsetOf(AndMask | Known.Zero)) { 1933 ShAmt = N.getOperand(0); 1934 return true; 1935 } 1936 } else if (N.getOpcode() == ISD::SUB && 1937 isa<ConstantSDNode>(N.getOperand(0))) { 1938 uint64_t Imm = N.getConstantOperandVal(0); 1939 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 1940 // generate a NEG instead of a SUB of a constant. 1941 if (Imm != 0 && Imm % ShiftWidth == 0) { 1942 SDLoc DL(N); 1943 EVT VT = N.getValueType(); 1944 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT); 1945 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; 1946 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, 1947 N.getOperand(1)); 1948 ShAmt = SDValue(Neg, 0); 1949 return true; 1950 } 1951 } 1952 1953 ShAmt = N; 1954 return true; 1955 } 1956 1957 bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { 1958 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 1959 cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { 1960 Val = N.getOperand(0); 1961 return true; 1962 } 1963 MVT VT = N.getSimpleValueType(); 1964 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { 1965 Val = N; 1966 return true; 1967 } 1968 1969 return false; 1970 } 1971 1972 bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { 1973 if (N.getOpcode() == ISD::AND) { 1974 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1975 if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) { 1976 Val = N.getOperand(0); 1977 return true; 1978 } 1979 } 1980 MVT VT = N.getSimpleValueType(); 1981 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32); 1982 if (CurDAG->MaskedValueIsZero(N, Mask)) { 1983 Val = N; 1984 return true; 1985 } 1986 1987 return false; 1988 } 1989 1990 // Return true if all users of this SDNode* only consume the lower \p Bits. 1991 // This can be used to form W instructions for add/sub/mul/shl even when the 1992 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 1993 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 1994 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 1995 // the add/sub/mul/shl to become non-W instructions. By checking the users we 1996 // may be able to use a W instruction and CSE with the other instruction if 1997 // this has happened. We could try to detect that the CSE opportunity exists 1998 // before doing this, but that would be more complicated. 1999 // TODO: Does this need to look through AND/OR/XOR to their users to find more 2000 // opportunities. 2001 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { 2002 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 2003 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 2004 Node->getOpcode() == ISD::SRL || 2005 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 2006 Node->getOpcode() == RISCVISD::GREV || 2007 Node->getOpcode() == RISCVISD::GORC || 2008 isa<ConstantSDNode>(Node)) && 2009 "Unexpected opcode"); 2010 2011 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 2012 SDNode *User = *UI; 2013 // Users of this node should have already been instruction selected 2014 if (!User->isMachineOpcode()) 2015 return false; 2016 2017 // TODO: Add more opcodes? 2018 switch (User->getMachineOpcode()) { 2019 default: 2020 return false; 2021 case RISCV::ADDW: 2022 case RISCV::ADDIW: 2023 case RISCV::SUBW: 2024 case RISCV::MULW: 2025 case RISCV::SLLW: 2026 case RISCV::SLLIW: 2027 case RISCV::SRAW: 2028 case RISCV::SRAIW: 2029 case RISCV::SRLW: 2030 case RISCV::SRLIW: 2031 case RISCV::DIVW: 2032 case RISCV::DIVUW: 2033 case RISCV::REMW: 2034 case RISCV::REMUW: 2035 case RISCV::ROLW: 2036 case RISCV::RORW: 2037 case RISCV::RORIW: 2038 case RISCV::CLZW: 2039 case RISCV::CTZW: 2040 case RISCV::CPOPW: 2041 case RISCV::SLLI_UW: 2042 case RISCV::FMV_W_X: 2043 case RISCV::FCVT_H_W: 2044 case RISCV::FCVT_H_WU: 2045 case RISCV::FCVT_S_W: 2046 case RISCV::FCVT_S_WU: 2047 case RISCV::FCVT_D_W: 2048 case RISCV::FCVT_D_WU: 2049 if (Bits < 32) 2050 return false; 2051 break; 2052 case RISCV::SLLI: 2053 // SLLI only uses the lower (XLen - ShAmt) bits. 2054 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1)) 2055 return false; 2056 break; 2057 case RISCV::ANDI: 2058 if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1)))) 2059 return false; 2060 break; 2061 case RISCV::SEXT_B: 2062 if (Bits < 8) 2063 return false; 2064 break; 2065 case RISCV::SEXT_H: 2066 case RISCV::FMV_H_X: 2067 case RISCV::ZEXT_H_RV32: 2068 case RISCV::ZEXT_H_RV64: 2069 if (Bits < 16) 2070 return false; 2071 break; 2072 case RISCV::ADD_UW: 2073 case RISCV::SH1ADD_UW: 2074 case RISCV::SH2ADD_UW: 2075 case RISCV::SH3ADD_UW: 2076 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 2077 // 32 bits. 2078 if (UI.getOperandNo() != 0 || Bits < 32) 2079 return false; 2080 break; 2081 case RISCV::SB: 2082 if (UI.getOperandNo() != 0 || Bits < 8) 2083 return false; 2084 break; 2085 case RISCV::SH: 2086 if (UI.getOperandNo() != 0 || Bits < 16) 2087 return false; 2088 break; 2089 case RISCV::SW: 2090 if (UI.getOperandNo() != 0 || Bits < 32) 2091 return false; 2092 break; 2093 } 2094 } 2095 2096 return true; 2097 } 2098 2099 // Select VL as a 5 bit immediate or a value that will become a register. This 2100 // allows us to choose betwen VSETIVLI or VSETVLI later. 2101 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 2102 auto *C = dyn_cast<ConstantSDNode>(N); 2103 if (C && isUInt<5>(C->getZExtValue())) { 2104 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 2105 N->getValueType(0)); 2106 } else if (C && C->isAllOnesValue()) { 2107 // Treat all ones as VLMax. 2108 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 2109 N->getValueType(0)); 2110 } else if (isa<RegisterSDNode>(N) && 2111 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) { 2112 // All our VL operands use an operand that allows GPRNoX0 or an immediate 2113 // as the register class. Convert X0 to a special immediate to pass the 2114 // MachineVerifier. This is recognized specially by the vsetvli insertion 2115 // pass. 2116 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 2117 N->getValueType(0)); 2118 } else { 2119 VL = N; 2120 } 2121 2122 return true; 2123 } 2124 2125 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 2126 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef()) 2127 return false; 2128 SplatVal = N.getOperand(1); 2129 return true; 2130 } 2131 2132 using ValidateFn = bool (*)(int64_t); 2133 2134 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, 2135 SelectionDAG &DAG, 2136 const RISCVSubtarget &Subtarget, 2137 ValidateFn ValidateImm) { 2138 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 2139 !isa<ConstantSDNode>(N.getOperand(1))) 2140 return false; 2141 2142 int64_t SplatImm = 2143 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 2144 2145 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand 2146 // type is wider than the resulting vector element type: an implicit 2147 // truncation first takes place. Therefore, perform a manual 2148 // truncation/sign-extension in order to ignore any truncated bits and catch 2149 // any zero-extended immediate. 2150 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 2151 // sign-extending to (XLenVT -1). 2152 MVT XLenVT = Subtarget.getXLenVT(); 2153 assert(XLenVT == N.getOperand(1).getSimpleValueType() && 2154 "Unexpected splat operand type"); 2155 MVT EltVT = N.getSimpleValueType().getVectorElementType(); 2156 if (EltVT.bitsLT(XLenVT)) 2157 SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits()); 2158 2159 if (!ValidateImm(SplatImm)) 2160 return false; 2161 2162 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT); 2163 return true; 2164 } 2165 2166 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 2167 return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget, 2168 [](int64_t Imm) { return isInt<5>(Imm); }); 2169 } 2170 2171 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 2172 return selectVSplatSimmHelper( 2173 N, SplatVal, *CurDAG, *Subtarget, 2174 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); 2175 } 2176 2177 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 2178 SDValue &SplatVal) { 2179 return selectVSplatSimmHelper( 2180 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { 2181 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 2182 }); 2183 } 2184 2185 bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { 2186 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 2187 !isa<ConstantSDNode>(N.getOperand(1))) 2188 return false; 2189 2190 int64_t SplatImm = 2191 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 2192 2193 if (!isUInt<5>(SplatImm)) 2194 return false; 2195 2196 SplatVal = 2197 CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT()); 2198 2199 return true; 2200 } 2201 2202 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 2203 SDValue &Imm) { 2204 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 2205 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 2206 2207 if (!isInt<5>(ImmVal)) 2208 return false; 2209 2210 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); 2211 return true; 2212 } 2213 2214 return false; 2215 } 2216 2217 // Merge an ADDI into the offset of a load/store instruction where possible. 2218 // (load (addi base, off1), off2) -> (load base, off1+off2) 2219 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) 2220 // (load (add base, (addi src, off1)), off2) 2221 // -> (load (add base, src), off1+off2) 2222 // (store val, (add base, (addi src, off1)), off2) 2223 // -> (store val, (add base, src), off1+off2) 2224 // This is possible when off1+off2 fits a 12-bit immediate. 2225 bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { 2226 unsigned OffsetOpIdx, BaseOpIdx; 2227 if (!hasMemOffset(N, BaseOpIdx, OffsetOpIdx)) 2228 return false; 2229 2230 if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx))) 2231 return false; 2232 2233 SDValue Base = N->getOperand(BaseOpIdx); 2234 2235 if (!Base.isMachineOpcode()) 2236 return false; 2237 2238 if (Base.getMachineOpcode() == RISCV::ADDI) { 2239 // If the base is an ADDI, we can merge it in to the load/store. 2240 } else if (Base.getMachineOpcode() == RISCV::ADDIW && 2241 isa<ConstantSDNode>(Base.getOperand(1)) && 2242 Base.getOperand(0).isMachineOpcode() && 2243 Base.getOperand(0).getMachineOpcode() == RISCV::LUI && 2244 isa<ConstantSDNode>(Base.getOperand(0).getOperand(0))) { 2245 // ADDIW can be merged if it's part of LUI+ADDIW constant materialization 2246 // and LUI+ADDI would have produced the same result. This is true for all 2247 // simm32 values except 0x7ffff800-0x7fffffff. 2248 int64_t Offset = 2249 SignExtend64<32>(Base.getOperand(0).getConstantOperandVal(0) << 12); 2250 Offset += cast<ConstantSDNode>(Base.getOperand(1))->getSExtValue(); 2251 if (!isInt<32>(Offset)) 2252 return false; 2253 } else 2254 return false; 2255 2256 SDValue ImmOperand = Base.getOperand(1); 2257 uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); 2258 2259 if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) { 2260 int64_t Offset1 = Const->getSExtValue(); 2261 int64_t CombinedOffset = Offset1 + Offset2; 2262 if (!isInt<12>(CombinedOffset)) 2263 return false; 2264 ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand), 2265 ImmOperand.getValueType()); 2266 } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) { 2267 // If the off1 in (addi base, off1) is a global variable's address (its 2268 // low part, really), then we can rely on the alignment of that variable 2269 // to provide a margin of safety before off1 can overflow the 12 bits. 2270 // Check if off2 falls within that margin; if so off1+off2 can't overflow. 2271 const DataLayout &DL = CurDAG->getDataLayout(); 2272 Align Alignment = GA->getGlobal()->getPointerAlignment(DL); 2273 if (Offset2 != 0 && Alignment <= Offset2) 2274 return false; 2275 int64_t Offset1 = GA->getOffset(); 2276 int64_t CombinedOffset = Offset1 + Offset2; 2277 ImmOperand = CurDAG->getTargetGlobalAddress( 2278 GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), 2279 CombinedOffset, GA->getTargetFlags()); 2280 } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) { 2281 // Ditto. 2282 Align Alignment = CP->getAlign(); 2283 if (Offset2 != 0 && Alignment <= Offset2) 2284 return false; 2285 int64_t Offset1 = CP->getOffset(); 2286 int64_t CombinedOffset = Offset1 + Offset2; 2287 ImmOperand = CurDAG->getTargetConstantPool( 2288 CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(), 2289 CombinedOffset, CP->getTargetFlags()); 2290 } else { 2291 return false; 2292 } 2293 2294 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 2295 LLVM_DEBUG(Base->dump(CurDAG)); 2296 LLVM_DEBUG(dbgs() << "\nN: "); 2297 LLVM_DEBUG(N->dump(CurDAG)); 2298 LLVM_DEBUG(dbgs() << "\n"); 2299 2300 // Modify the offset operand of the load/store. 2301 if (BaseOpIdx == 0) { // Load 2302 N = CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, 2303 N->getOperand(2)); 2304 } else { // Store 2305 N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), 2306 ImmOperand, N->getOperand(3)); 2307 } 2308 2309 return true; 2310 } 2311 2312 // Try to remove sext.w if the input is a W instruction or can be made into 2313 // a W instruction cheaply. 2314 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 2315 // Look for the sext.w pattern, addiw rd, rs1, 0. 2316 if (N->getMachineOpcode() != RISCV::ADDIW || 2317 !isNullConstant(N->getOperand(1))) 2318 return false; 2319 2320 SDValue N0 = N->getOperand(0); 2321 if (!N0.isMachineOpcode()) 2322 return false; 2323 2324 switch (N0.getMachineOpcode()) { 2325 default: 2326 break; 2327 case RISCV::ADD: 2328 case RISCV::ADDI: 2329 case RISCV::SUB: 2330 case RISCV::MUL: 2331 case RISCV::SLLI: { 2332 // Convert sext.w+add/sub/mul to their W instructions. This will create 2333 // a new independent instruction. This improves latency. 2334 unsigned Opc; 2335 switch (N0.getMachineOpcode()) { 2336 default: 2337 llvm_unreachable("Unexpected opcode!"); 2338 case RISCV::ADD: Opc = RISCV::ADDW; break; 2339 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 2340 case RISCV::SUB: Opc = RISCV::SUBW; break; 2341 case RISCV::MUL: Opc = RISCV::MULW; break; 2342 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 2343 } 2344 2345 SDValue N00 = N0.getOperand(0); 2346 SDValue N01 = N0.getOperand(1); 2347 2348 // Shift amount needs to be uimm5. 2349 if (N0.getMachineOpcode() == RISCV::SLLI && 2350 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 2351 break; 2352 2353 SDNode *Result = 2354 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 2355 N00, N01); 2356 ReplaceUses(N, Result); 2357 return true; 2358 } 2359 case RISCV::ADDW: 2360 case RISCV::ADDIW: 2361 case RISCV::SUBW: 2362 case RISCV::MULW: 2363 case RISCV::SLLIW: 2364 case RISCV::GREVIW: 2365 case RISCV::GORCIW: 2366 // Result is already sign extended just remove the sext.w. 2367 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 2368 ReplaceUses(N, N0.getNode()); 2369 return true; 2370 } 2371 2372 return false; 2373 } 2374 2375 // Optimize masked RVV pseudo instructions with a known all-ones mask to their 2376 // corresponding "unmasked" pseudo versions. The mask we're interested in will 2377 // take the form of a V0 physical register operand, with a glued 2378 // register-setting instruction. 2379 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { 2380 const RISCV::RISCVMaskedPseudoInfo *I = 2381 RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); 2382 if (!I) 2383 return false; 2384 2385 unsigned MaskOpIdx = I->MaskOpIdx; 2386 2387 // Check that we're using V0 as a mask register. 2388 if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) || 2389 cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0) 2390 return false; 2391 2392 // The glued user defines V0. 2393 const auto *Glued = N->getGluedNode(); 2394 2395 if (!Glued || Glued->getOpcode() != ISD::CopyToReg) 2396 return false; 2397 2398 // Check that we're defining V0 as a mask register. 2399 if (!isa<RegisterSDNode>(Glued->getOperand(1)) || 2400 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0) 2401 return false; 2402 2403 // Check the instruction defining V0; it needs to be a VMSET pseudo. 2404 SDValue MaskSetter = Glued->getOperand(2); 2405 2406 const auto IsVMSet = [](unsigned Opc) { 2407 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || 2408 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || 2409 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || 2410 Opc == RISCV::PseudoVMSET_M_B8; 2411 }; 2412 2413 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has 2414 // undefined behaviour if it's the wrong bitwidth, so we could choose to 2415 // assume that it's all-ones? Same applies to its VL. 2416 if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode())) 2417 return false; 2418 2419 // Retrieve the tail policy operand index, if any. 2420 Optional<unsigned> TailPolicyOpIdx; 2421 const RISCVInstrInfo &TII = *Subtarget->getInstrInfo(); 2422 const MCInstrDesc &MaskedMCID = TII.get(N->getMachineOpcode()); 2423 2424 bool IsTA = true; 2425 if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) { 2426 // The last operand of the pseudo is the policy op, but we might have a 2427 // Glue operand last. We might also have a chain. 2428 TailPolicyOpIdx = N->getNumOperands() - 1; 2429 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Glue) 2430 (*TailPolicyOpIdx)--; 2431 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Other) 2432 (*TailPolicyOpIdx)--; 2433 2434 if (!(N->getConstantOperandVal(*TailPolicyOpIdx) & 2435 RISCVII::TAIL_AGNOSTIC)) { 2436 // Keep the true-masked instruction when there is no unmasked TU 2437 // instruction 2438 if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef()) 2439 return false; 2440 // We can't use TA if the tie-operand is not IMPLICIT_DEF 2441 if (!N->getOperand(0).isUndef()) 2442 IsTA = false; 2443 } 2444 } 2445 2446 unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo; 2447 2448 // Check that we're dropping the mask operand and any policy operand 2449 // when we transform to this unmasked pseudo. Additionally, if this insturtion 2450 // is tail agnostic, the unmasked instruction should not have a merge op. 2451 uint64_t TSFlags = TII.get(Opc).TSFlags; 2452 assert((IsTA != RISCVII::hasMergeOp(TSFlags)) && 2453 RISCVII::hasDummyMaskOp(TSFlags) && 2454 !RISCVII::hasVecPolicyOp(TSFlags) && 2455 "Unexpected pseudo to transform to"); 2456 (void)TSFlags; 2457 2458 SmallVector<SDValue, 8> Ops; 2459 // Skip the merge operand at index 0 if IsTA 2460 for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) { 2461 // Skip the mask, the policy, and the Glue. 2462 SDValue Op = N->getOperand(I); 2463 if (I == MaskOpIdx || I == TailPolicyOpIdx || 2464 Op.getValueType() == MVT::Glue) 2465 continue; 2466 Ops.push_back(Op); 2467 } 2468 2469 // Transitively apply any node glued to our new node. 2470 if (auto *TGlued = Glued->getGluedNode()) 2471 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); 2472 2473 SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 2474 ReplaceUses(N, Result); 2475 2476 return true; 2477 } 2478 2479 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 2480 // for instruction scheduling. 2481 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, 2482 CodeGenOpt::Level OptLevel) { 2483 return new RISCVDAGToDAGISel(TM, OptLevel); 2484 } 2485