1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISCV target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVMCTargetDesc.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCVISelLowering.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/IR/IntrinsicsRISCV.h" 20 #include "llvm/Support/Alignment.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/KnownBits.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "riscv-isel" 29 30 namespace llvm { 31 namespace RISCV { 32 #define GET_RISCVVSSEGTable_IMPL 33 #define GET_RISCVVLSEGTable_IMPL 34 #define GET_RISCVVLXSEGTable_IMPL 35 #define GET_RISCVVSXSEGTable_IMPL 36 #define GET_RISCVVLETable_IMPL 37 #define GET_RISCVVSETable_IMPL 38 #define GET_RISCVVLXTable_IMPL 39 #define GET_RISCVVSXTable_IMPL 40 #define GET_RISCVMaskedPseudosTable_IMPL 41 #include "RISCVGenSearchableTables.inc" 42 } // namespace RISCV 43 } // namespace llvm 44 45 void RISCVDAGToDAGISel::PreprocessISelDAG() { 46 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 47 E = CurDAG->allnodes_end(); 48 I != E;) { 49 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 50 51 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point 52 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. 53 if (N->getOpcode() == ISD::SPLAT_VECTOR) { 54 MVT VT = N->getSimpleValueType(0); 55 unsigned Opc = 56 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; 57 SDLoc DL(N); 58 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); 59 SDValue Result = CurDAG->getNode(Opc, DL, VT, N->getOperand(0), VL); 60 61 --I; 62 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 63 ++I; 64 CurDAG->DeleteNode(N); 65 continue; 66 } 67 68 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 69 // load. Done after lowering and combining so that we have a chance to 70 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 71 if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) 72 continue; 73 74 assert(N->getNumOperands() == 3 && "Unexpected number of operands"); 75 MVT VT = N->getSimpleValueType(0); 76 SDValue Lo = N->getOperand(0); 77 SDValue Hi = N->getOperand(1); 78 SDValue VL = N->getOperand(2); 79 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 80 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 81 "Unexpected VTs!"); 82 MachineFunction &MF = CurDAG->getMachineFunction(); 83 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 84 SDLoc DL(N); 85 86 // We use the same frame index we use for moving two i32s into 64-bit FPR. 87 // This is an analogous operation. 88 int FI = FuncInfo->getMoveF64FrameIndex(MF); 89 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 90 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); 91 SDValue StackSlot = 92 CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); 93 94 SDValue Chain = CurDAG->getEntryNode(); 95 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 96 97 SDValue OffsetSlot = 98 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); 99 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 100 Align(8)); 101 102 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 103 104 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 105 SDValue IntID = 106 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 107 SDValue Ops[] = {Chain, 108 IntID, 109 CurDAG->getUNDEF(VT), 110 StackSlot, 111 CurDAG->getRegister(RISCV::X0, MVT::i64), 112 VL}; 113 114 SDValue Result = CurDAG->getMemIntrinsicNode( 115 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8), 116 MachineMemOperand::MOLoad); 117 118 // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the 119 // vlse we created. This will cause general havok on the dag because 120 // anything below the conversion could be folded into other existing nodes. 121 // To avoid invalidating 'I', back it up to the convert node. 122 --I; 123 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 124 125 // Now that we did that, the node is dead. Increment the iterator to the 126 // next node to process, then delete N. 127 ++I; 128 CurDAG->DeleteNode(N); 129 } 130 } 131 132 void RISCVDAGToDAGISel::PostprocessISelDAG() { 133 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 134 135 bool MadeChange = false; 136 while (Position != CurDAG->allnodes_begin()) { 137 SDNode *N = &*--Position; 138 // Skip dead nodes and any non-machine opcodes. 139 if (N->use_empty() || !N->isMachineOpcode()) 140 continue; 141 142 MadeChange |= doPeepholeSExtW(N); 143 MadeChange |= doPeepholeLoadStoreADDI(N); 144 MadeChange |= doPeepholeMaskedRVV(N); 145 } 146 147 if (MadeChange) 148 CurDAG->RemoveDeadNodes(); 149 } 150 151 static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL, 152 const MVT VT, int64_t Imm, 153 const RISCVSubtarget &Subtarget) { 154 assert(VT == MVT::i64 && "Expecting MVT::i64"); 155 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering(); 156 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(CurDAG->getConstantPool( 157 ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT)); 158 SDValue Addr = TLI->getAddr(CP, *CurDAG); 159 SDValue Offset = CurDAG->getTargetConstant(0, DL, VT); 160 // Since there is no data race, the chain can be the entry node. 161 SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset, 162 CurDAG->getEntryNode()); 163 MachineFunction &MF = CurDAG->getMachineFunction(); 164 MachineMemOperand *MemOp = MF.getMachineMemOperand( 165 MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, 166 LLT(VT), CP->getAlign()); 167 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Load), {MemOp}); 168 return Load; 169 } 170 171 static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 172 int64_t Imm, const RISCVSubtarget &Subtarget) { 173 MVT XLenVT = Subtarget.getXLenVT(); 174 RISCVMatInt::InstSeq Seq = 175 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); 176 177 // If Imm is expensive to build, then we put it into constant pool. 178 if (Subtarget.useConstantPoolForLargeInts() && 179 Seq.size() > Subtarget.getMaxBuildIntsCost()) 180 return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget); 181 182 SDNode *Result = nullptr; 183 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); 184 for (RISCVMatInt::Inst &Inst : Seq) { 185 SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT); 186 if (Inst.Opc == RISCV::LUI) 187 Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm); 188 else if (Inst.Opc == RISCV::ADD_UW) 189 Result = CurDAG->getMachineNode(RISCV::ADD_UW, DL, XLenVT, SrcReg, 190 CurDAG->getRegister(RISCV::X0, XLenVT)); 191 else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD || 192 Inst.Opc == RISCV::SH3ADD) 193 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg); 194 else 195 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm); 196 197 // Only the first instruction has X0 as its source. 198 SrcReg = SDValue(Result, 0); 199 } 200 201 return Result; 202 } 203 204 static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 205 unsigned RegClassID, unsigned SubReg0) { 206 assert(Regs.size() >= 2 && Regs.size() <= 8); 207 208 SDLoc DL(Regs[0]); 209 SmallVector<SDValue, 8> Ops; 210 211 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); 212 213 for (unsigned I = 0; I < Regs.size(); ++I) { 214 Ops.push_back(Regs[I]); 215 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); 216 } 217 SDNode *N = 218 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 219 return SDValue(N, 0); 220 } 221 222 static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 223 unsigned NF) { 224 static const unsigned RegClassIDs[] = { 225 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, 226 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, 227 RISCV::VRN8M1RegClassID}; 228 229 return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0); 230 } 231 232 static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 233 unsigned NF) { 234 static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID, 235 RISCV::VRN3M2RegClassID, 236 RISCV::VRN4M2RegClassID}; 237 238 return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0); 239 } 240 241 static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 242 unsigned NF) { 243 return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID, 244 RISCV::sub_vrm4_0); 245 } 246 247 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 248 unsigned NF, RISCVII::VLMUL LMUL) { 249 switch (LMUL) { 250 default: 251 llvm_unreachable("Invalid LMUL."); 252 case RISCVII::VLMUL::LMUL_F8: 253 case RISCVII::VLMUL::LMUL_F4: 254 case RISCVII::VLMUL::LMUL_F2: 255 case RISCVII::VLMUL::LMUL_1: 256 return createM1Tuple(CurDAG, Regs, NF); 257 case RISCVII::VLMUL::LMUL_2: 258 return createM2Tuple(CurDAG, Regs, NF); 259 case RISCVII::VLMUL::LMUL_4: 260 return createM4Tuple(CurDAG, Regs, NF); 261 } 262 } 263 264 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 265 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 266 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 267 bool IsLoad, MVT *IndexVT) { 268 SDValue Chain = Node->getOperand(0); 269 SDValue Glue; 270 271 SDValue Base; 272 SelectBaseAddr(Node->getOperand(CurOp++), Base); 273 Operands.push_back(Base); // Base pointer. 274 275 if (IsStridedOrIndexed) { 276 Operands.push_back(Node->getOperand(CurOp++)); // Index. 277 if (IndexVT) 278 *IndexVT = Operands.back()->getSimpleValueType(0); 279 } 280 281 if (IsMasked) { 282 // Mask needs to be copied to V0. 283 SDValue Mask = Node->getOperand(CurOp++); 284 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); 285 Glue = Chain.getValue(1); 286 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); 287 } 288 SDValue VL; 289 selectVLOp(Node->getOperand(CurOp++), VL); 290 Operands.push_back(VL); 291 292 MVT XLenVT = Subtarget->getXLenVT(); 293 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 294 Operands.push_back(SEWOp); 295 296 // Masked load has the tail policy argument. 297 if (IsMasked && IsLoad) { 298 // Policy must be a constant. 299 uint64_t Policy = Node->getConstantOperandVal(CurOp++); 300 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 301 Operands.push_back(PolicyOp); 302 } 303 304 Operands.push_back(Chain); // Chain. 305 if (Glue) 306 Operands.push_back(Glue); 307 } 308 309 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, 310 bool IsStrided) { 311 SDLoc DL(Node); 312 unsigned NF = Node->getNumValues() - 1; 313 MVT VT = Node->getSimpleValueType(0); 314 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 315 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 316 317 unsigned CurOp = 2; 318 SmallVector<SDValue, 8> Operands; 319 if (IsMasked) { 320 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 321 Node->op_begin() + CurOp + NF); 322 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 323 Operands.push_back(MaskedOff); 324 CurOp += NF; 325 } 326 327 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 328 Operands, /*IsLoad=*/true); 329 330 const RISCV::VLSEGPseudo *P = 331 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW, 332 static_cast<unsigned>(LMUL)); 333 MachineSDNode *Load = 334 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 335 336 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 337 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 338 339 SDValue SuperReg = SDValue(Load, 0); 340 for (unsigned I = 0; I < NF; ++I) { 341 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 342 ReplaceUses(SDValue(Node, I), 343 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 344 } 345 346 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 347 CurDAG->RemoveDeadNode(Node); 348 } 349 350 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { 351 SDLoc DL(Node); 352 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. 353 MVT VT = Node->getSimpleValueType(0); 354 MVT XLenVT = Subtarget->getXLenVT(); 355 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 356 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 357 358 unsigned CurOp = 2; 359 SmallVector<SDValue, 7> Operands; 360 if (IsMasked) { 361 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 362 Node->op_begin() + CurOp + NF); 363 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 364 Operands.push_back(MaskedOff); 365 CurOp += NF; 366 } 367 368 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 369 /*IsStridedOrIndexed*/ false, Operands, 370 /*IsLoad=*/true); 371 372 const RISCV::VLSEGPseudo *P = 373 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true, 374 Log2SEW, static_cast<unsigned>(LMUL)); 375 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 376 MVT::Other, MVT::Glue, Operands); 377 SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, 378 /*Glue*/ SDValue(Load, 2)); 379 380 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 381 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 382 383 SDValue SuperReg = SDValue(Load, 0); 384 for (unsigned I = 0; I < NF; ++I) { 385 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 386 ReplaceUses(SDValue(Node, I), 387 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 388 } 389 390 ReplaceUses(SDValue(Node, NF), SDValue(ReadVL, 0)); // VL 391 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 1)); // Chain 392 CurDAG->RemoveDeadNode(Node); 393 } 394 395 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, 396 bool IsOrdered) { 397 SDLoc DL(Node); 398 unsigned NF = Node->getNumValues() - 1; 399 MVT VT = Node->getSimpleValueType(0); 400 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 401 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 402 403 unsigned CurOp = 2; 404 SmallVector<SDValue, 8> Operands; 405 if (IsMasked) { 406 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 407 Node->op_begin() + CurOp + NF); 408 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 409 Operands.push_back(MaskedOff); 410 CurOp += NF; 411 } 412 413 MVT IndexVT; 414 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 415 /*IsStridedOrIndexed*/ true, Operands, 416 /*IsLoad=*/true, &IndexVT); 417 418 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 419 "Element count mismatch"); 420 421 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 422 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 423 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 424 report_fatal_error("The V extension does not support EEW=64 for index " 425 "values when XLEN=32"); 426 } 427 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 428 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 429 static_cast<unsigned>(IndexLMUL)); 430 MachineSDNode *Load = 431 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 432 433 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 434 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 435 436 SDValue SuperReg = SDValue(Load, 0); 437 for (unsigned I = 0; I < NF; ++I) { 438 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 439 ReplaceUses(SDValue(Node, I), 440 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 441 } 442 443 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 444 CurDAG->RemoveDeadNode(Node); 445 } 446 447 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, 448 bool IsStrided) { 449 SDLoc DL(Node); 450 unsigned NF = Node->getNumOperands() - 4; 451 if (IsStrided) 452 NF--; 453 if (IsMasked) 454 NF--; 455 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 456 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 457 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 458 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 459 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 460 461 SmallVector<SDValue, 8> Operands; 462 Operands.push_back(StoreVal); 463 unsigned CurOp = 2 + NF; 464 465 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 466 Operands); 467 468 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 469 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 470 MachineSDNode *Store = 471 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 472 473 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 474 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 475 476 ReplaceNode(Node, Store); 477 } 478 479 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, 480 bool IsOrdered) { 481 SDLoc DL(Node); 482 unsigned NF = Node->getNumOperands() - 5; 483 if (IsMasked) 484 --NF; 485 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 486 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 487 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 488 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 489 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 490 491 SmallVector<SDValue, 8> Operands; 492 Operands.push_back(StoreVal); 493 unsigned CurOp = 2 + NF; 494 495 MVT IndexVT; 496 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 497 /*IsStridedOrIndexed*/ true, Operands, 498 /*IsLoad=*/false, &IndexVT); 499 500 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 501 "Element count mismatch"); 502 503 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 504 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 505 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 506 report_fatal_error("The V extension does not support EEW=64 for index " 507 "values when XLEN=32"); 508 } 509 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 510 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 511 static_cast<unsigned>(IndexLMUL)); 512 MachineSDNode *Store = 513 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 514 515 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 516 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 517 518 ReplaceNode(Node, Store); 519 } 520 521 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { 522 if (!Subtarget->hasVInstructions()) 523 return; 524 525 assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN || 526 Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) && 527 "Unexpected opcode"); 528 529 SDLoc DL(Node); 530 MVT XLenVT = Subtarget->getXLenVT(); 531 532 bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN; 533 unsigned IntNoOffset = HasChain ? 1 : 0; 534 unsigned IntNo = Node->getConstantOperandVal(IntNoOffset); 535 536 assert((IntNo == Intrinsic::riscv_vsetvli || 537 IntNo == Intrinsic::riscv_vsetvlimax || 538 IntNo == Intrinsic::riscv_vsetvli_opt || 539 IntNo == Intrinsic::riscv_vsetvlimax_opt) && 540 "Unexpected vsetvli intrinsic"); 541 542 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax || 543 IntNo == Intrinsic::riscv_vsetvlimax_opt; 544 unsigned Offset = IntNoOffset + (VLMax ? 1 : 2); 545 546 assert(Node->getNumOperands() == Offset + 2 && 547 "Unexpected number of operands"); 548 549 unsigned SEW = 550 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 551 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( 552 Node->getConstantOperandVal(Offset + 1) & 0x7); 553 554 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true, 555 /*MaskAgnostic*/ false); 556 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 557 558 SmallVector<EVT, 2> VTs = {XLenVT}; 559 if (HasChain) 560 VTs.push_back(MVT::Other); 561 562 SDValue VLOperand; 563 unsigned Opcode = RISCV::PseudoVSETVLI; 564 if (VLMax) { 565 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 566 Opcode = RISCV::PseudoVSETVLIX0; 567 } else { 568 VLOperand = Node->getOperand(IntNoOffset + 1); 569 570 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 571 uint64_t AVL = C->getZExtValue(); 572 if (isUInt<5>(AVL)) { 573 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 574 SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp}; 575 if (HasChain) 576 Ops.push_back(Node->getOperand(0)); 577 ReplaceNode( 578 Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops)); 579 return; 580 } 581 } 582 } 583 584 SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp}; 585 if (HasChain) 586 Ops.push_back(Node->getOperand(0)); 587 588 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops)); 589 } 590 591 void RISCVDAGToDAGISel::Select(SDNode *Node) { 592 // If we have a custom node, we have already selected. 593 if (Node->isMachineOpcode()) { 594 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 595 Node->setNodeId(-1); 596 return; 597 } 598 599 // Instruction Selection not handled by the auto-generated tablegen selection 600 // should be handled here. 601 unsigned Opcode = Node->getOpcode(); 602 MVT XLenVT = Subtarget->getXLenVT(); 603 SDLoc DL(Node); 604 MVT VT = Node->getSimpleValueType(0); 605 606 switch (Opcode) { 607 case ISD::Constant: { 608 auto *ConstNode = cast<ConstantSDNode>(Node); 609 if (VT == XLenVT && ConstNode->isZero()) { 610 SDValue New = 611 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT); 612 ReplaceNode(Node, New.getNode()); 613 return; 614 } 615 int64_t Imm = ConstNode->getSExtValue(); 616 // If the upper XLen-16 bits are not used, try to convert this to a simm12 617 // by sign extending bit 15. 618 if (isUInt<16>(Imm) && isInt<12>(SignExtend64(Imm, 16)) && 619 hasAllHUsers(Node)) 620 Imm = SignExtend64(Imm, 16); 621 // If the upper 32-bits are not used try to convert this into a simm32 by 622 // sign extending bit 32. 623 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 624 Imm = SignExtend64(Imm, 32); 625 626 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); 627 return; 628 } 629 case ISD::FrameIndex: { 630 SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT); 631 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 632 SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); 633 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm)); 634 return; 635 } 636 case ISD::SRL: { 637 // Optimize (srl (and X, C2), C) -> 638 // (srli (slli X, (XLen-C3), (XLen-C3) + C) 639 // Where C2 is a mask with C3 trailing ones. 640 // Taking into account that the C2 may have had lower bits unset by 641 // SimplifyDemandedBits. This avoids materializing the C2 immediate. 642 // This pattern occurs when type legalizing right shifts for types with 643 // less than XLen bits. 644 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 645 if (!N1C) 646 break; 647 SDValue N0 = Node->getOperand(0); 648 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 649 !isa<ConstantSDNode>(N0.getOperand(1))) 650 break; 651 unsigned ShAmt = N1C->getZExtValue(); 652 uint64_t Mask = N0.getConstantOperandVal(1); 653 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 654 if (!isMask_64(Mask)) 655 break; 656 unsigned TrailingOnes = countTrailingOnes(Mask); 657 // 32 trailing ones should use srliw via tablegen pattern. 658 if (TrailingOnes == 32 || ShAmt >= TrailingOnes) 659 break; 660 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; 661 SDNode *SLLI = 662 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 663 CurDAG->getTargetConstant(LShAmt, DL, VT)); 664 SDNode *SRLI = CurDAG->getMachineNode( 665 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 666 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 667 ReplaceNode(Node, SRLI); 668 return; 669 } 670 case ISD::SRA: { 671 // Optimize (sra (sext_inreg X, i16), C) -> 672 // (srai (slli X, (XLen-16), (XLen-16) + C) 673 // And (sra (sext_inreg X, i8), C) -> 674 // (srai (slli X, (XLen-8), (XLen-8) + C) 675 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. 676 // This transform matches the code we get without Zbb. The shifts are more 677 // compressible, and this can help expose CSE opportunities in the sdiv by 678 // constant optimization. 679 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 680 if (!N1C) 681 break; 682 SDValue N0 = Node->getOperand(0); 683 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) 684 break; 685 unsigned ShAmt = N1C->getZExtValue(); 686 unsigned ExtSize = 687 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 688 // ExtSize of 32 should use sraiw via tablegen pattern. 689 if (ExtSize >= 32 || ShAmt >= ExtSize) 690 break; 691 unsigned LShAmt = Subtarget->getXLen() - ExtSize; 692 SDNode *SLLI = 693 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 694 CurDAG->getTargetConstant(LShAmt, DL, VT)); 695 SDNode *SRAI = CurDAG->getMachineNode( 696 RISCV::SRAI, DL, VT, SDValue(SLLI, 0), 697 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 698 ReplaceNode(Node, SRAI); 699 return; 700 } 701 case ISD::AND: { 702 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 703 if (!N1C) 704 break; 705 706 SDValue N0 = Node->getOperand(0); 707 708 bool LeftShift = N0.getOpcode() == ISD::SHL; 709 if (!LeftShift && N0.getOpcode() != ISD::SRL) 710 break; 711 712 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 713 if (!C) 714 break; 715 uint64_t C2 = C->getZExtValue(); 716 unsigned XLen = Subtarget->getXLen(); 717 if (!C2 || C2 >= XLen) 718 break; 719 720 uint64_t C1 = N1C->getZExtValue(); 721 722 // Keep track of whether this is an andi. 723 bool IsANDI = isInt<12>(N1C->getSExtValue()); 724 725 // Clear irrelevant bits in the mask. 726 if (LeftShift) 727 C1 &= maskTrailingZeros<uint64_t>(C2); 728 else 729 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 730 731 // Some transforms should only be done if the shift has a single use or 732 // the AND would become (srli (slli X, 32), 32) 733 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 734 735 SDValue X = N0.getOperand(0); 736 737 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 738 // with c3 leading zeros. 739 if (!LeftShift && isMask_64(C1)) { 740 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 741 if (C2 < C3) { 742 // If the number of leading zeros is C2+32 this can be SRLIW. 743 if (C2 + 32 == C3) { 744 SDNode *SRLIW = 745 CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X, 746 CurDAG->getTargetConstant(C2, DL, XLenVT)); 747 ReplaceNode(Node, SRLIW); 748 return; 749 } 750 751 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if 752 // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 753 // 754 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 755 // legalized and goes through DAG combine. 756 if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() && 757 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 758 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { 759 SDNode *SRAIW = 760 CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, X.getOperand(0), 761 CurDAG->getTargetConstant(31, DL, XLenVT)); 762 SDNode *SRLIW = CurDAG->getMachineNode( 763 RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0), 764 CurDAG->getTargetConstant(C3 - 32, DL, XLenVT)); 765 ReplaceNode(Node, SRLIW); 766 return; 767 } 768 769 // (srli (slli x, c3-c2), c3). 770 if (OneUseOrZExtW && !IsANDI) { 771 SDNode *SLLI = CurDAG->getMachineNode( 772 RISCV::SLLI, DL, XLenVT, X, 773 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 774 SDNode *SRLI = 775 CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), 776 CurDAG->getTargetConstant(C3, DL, XLenVT)); 777 ReplaceNode(Node, SRLI); 778 return; 779 } 780 } 781 } 782 783 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 784 // shifted by c2 bits with c3 leading zeros. 785 if (LeftShift && isShiftedMask_64(C1)) { 786 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 787 788 if (C2 + C3 < XLen && 789 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) { 790 // Use slli.uw when possible. 791 if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) { 792 SDNode *SLLI_UW = 793 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, XLenVT, X, 794 CurDAG->getTargetConstant(C2, DL, XLenVT)); 795 ReplaceNode(Node, SLLI_UW); 796 return; 797 } 798 799 // (srli (slli c2+c3), c3) 800 if (OneUseOrZExtW && !IsANDI) { 801 SDNode *SLLI = CurDAG->getMachineNode( 802 RISCV::SLLI, DL, XLenVT, X, 803 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 804 SDNode *SRLI = 805 CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), 806 CurDAG->getTargetConstant(C3, DL, XLenVT)); 807 ReplaceNode(Node, SRLI); 808 return; 809 } 810 } 811 } 812 813 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 814 // shifted mask with c2 leading zeros and c3 trailing zeros. 815 if (!LeftShift && isShiftedMask_64(C1)) { 816 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 817 uint64_t C3 = countTrailingZeros(C1); 818 if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsANDI) { 819 SDNode *SRLI = CurDAG->getMachineNode( 820 RISCV::SRLI, DL, XLenVT, X, 821 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 822 SDNode *SLLI = 823 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), 824 CurDAG->getTargetConstant(C3, DL, XLenVT)); 825 ReplaceNode(Node, SLLI); 826 return; 827 } 828 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 829 if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 && 830 OneUseOrZExtW && !IsANDI) { 831 SDNode *SRLIW = CurDAG->getMachineNode( 832 RISCV::SRLIW, DL, XLenVT, X, 833 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 834 SDNode *SLLI = 835 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), 836 CurDAG->getTargetConstant(C3, DL, XLenVT)); 837 ReplaceNode(Node, SLLI); 838 return; 839 } 840 } 841 842 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 843 // shifted mask with no leading zeros and c3 trailing zeros. 844 if (LeftShift && isShiftedMask_64(C1)) { 845 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 846 uint64_t C3 = countTrailingZeros(C1); 847 if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsANDI) { 848 SDNode *SRLI = CurDAG->getMachineNode( 849 RISCV::SRLI, DL, XLenVT, X, 850 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 851 SDNode *SLLI = 852 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), 853 CurDAG->getTargetConstant(C3, DL, XLenVT)); 854 ReplaceNode(Node, SLLI); 855 return; 856 } 857 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 858 if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsANDI) { 859 SDNode *SRLIW = CurDAG->getMachineNode( 860 RISCV::SRLIW, DL, XLenVT, X, 861 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 862 SDNode *SLLI = 863 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), 864 CurDAG->getTargetConstant(C3, DL, XLenVT)); 865 ReplaceNode(Node, SLLI); 866 return; 867 } 868 } 869 870 break; 871 } 872 case ISD::MUL: { 873 // Special case for calculating (mul (and X, C2), C1) where the full product 874 // fits in XLen bits. We can shift X left by the number of leading zeros in 875 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 876 // product has XLen trailing zeros, putting it in the output of MULHU. This 877 // can avoid materializing a constant in a register for C2. 878 879 // RHS should be a constant. 880 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 881 if (!N1C || !N1C->hasOneUse()) 882 break; 883 884 // LHS should be an AND with constant. 885 SDValue N0 = Node->getOperand(0); 886 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 887 break; 888 889 uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 890 891 // Constant should be a mask. 892 if (!isMask_64(C2)) 893 break; 894 895 // This should be the only use of the AND unless we will use 896 // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND 897 // constants. 898 if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF)) 899 break; 900 901 // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this 902 // optimization. 903 if (isInt<12>(C2) || 904 (C2 == UINT64_C(0xFFFF) && 905 (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) || 906 (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())) 907 break; 908 909 // We need to shift left the AND input and C1 by a total of XLen bits. 910 911 // How far left do we need to shift the AND input? 912 unsigned XLen = Subtarget->getXLen(); 913 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2)); 914 915 // The constant gets shifted by the remaining amount unless that would 916 // shift bits out. 917 uint64_t C1 = N1C->getZExtValue(); 918 unsigned ConstantShift = XLen - LeadingZeros; 919 if (ConstantShift > (XLen - (64 - countLeadingZeros(C1)))) 920 break; 921 922 uint64_t ShiftedC1 = C1 << ConstantShift; 923 // If this RV32, we need to sign extend the constant. 924 if (XLen == 32) 925 ShiftedC1 = SignExtend64(ShiftedC1, 32); 926 927 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 928 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); 929 SDNode *SLLI = 930 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 931 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 932 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 933 SDValue(SLLI, 0), SDValue(Imm, 0)); 934 ReplaceNode(Node, MULHU); 935 return; 936 } 937 case ISD::INTRINSIC_WO_CHAIN: { 938 unsigned IntNo = Node->getConstantOperandVal(0); 939 switch (IntNo) { 940 // By default we do not custom select any intrinsic. 941 default: 942 break; 943 case Intrinsic::riscv_vmsgeu: 944 case Intrinsic::riscv_vmsge: { 945 SDValue Src1 = Node->getOperand(1); 946 SDValue Src2 = Node->getOperand(2); 947 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 948 bool IsCmpUnsignedZero = false; 949 // Only custom select scalar second operand. 950 if (Src2.getValueType() != XLenVT) 951 break; 952 // Small constants are handled with patterns. 953 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 954 int64_t CVal = C->getSExtValue(); 955 if (CVal >= -15 && CVal <= 16) { 956 if (!IsUnsigned || CVal != 0) 957 break; 958 IsCmpUnsignedZero = true; 959 } 960 } 961 MVT Src1VT = Src1.getSimpleValueType(); 962 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; 963 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 964 default: 965 llvm_unreachable("Unexpected LMUL!"); 966 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 967 case RISCVII::VLMUL::lmulenum: \ 968 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 969 : RISCV::PseudoVMSLT_VX_##suffix; \ 970 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 971 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 972 break; 973 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1) 974 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2) 975 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4) 976 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8) 977 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16) 978 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32) 979 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64) 980 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES 981 } 982 SDValue SEW = CurDAG->getTargetConstant( 983 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 984 SDValue VL; 985 selectVLOp(Node->getOperand(3), VL); 986 987 // If vmsgeu with 0 immediate, expand it to vmset. 988 if (IsCmpUnsignedZero) { 989 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW)); 990 return; 991 } 992 993 // Expand to 994 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 995 SDValue Cmp = SDValue( 996 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 997 0); 998 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 999 {Cmp, Cmp, VL, SEW})); 1000 return; 1001 } 1002 case Intrinsic::riscv_vmsgeu_mask: 1003 case Intrinsic::riscv_vmsge_mask: { 1004 SDValue Src1 = Node->getOperand(2); 1005 SDValue Src2 = Node->getOperand(3); 1006 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 1007 bool IsCmpUnsignedZero = false; 1008 // Only custom select scalar second operand. 1009 if (Src2.getValueType() != XLenVT) 1010 break; 1011 // Small constants are handled with patterns. 1012 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1013 int64_t CVal = C->getSExtValue(); 1014 if (CVal >= -15 && CVal <= 16) { 1015 if (!IsUnsigned || CVal != 0) 1016 break; 1017 IsCmpUnsignedZero = true; 1018 } 1019 } 1020 MVT Src1VT = Src1.getSimpleValueType(); 1021 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, 1022 VMSetOpcode, VMANDOpcode; 1023 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1024 default: 1025 llvm_unreachable("Unexpected LMUL!"); 1026 #define CASE_VMSLT_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 1027 case RISCVII::VLMUL::lmulenum: \ 1028 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1029 : RISCV::PseudoVMSLT_VX_##suffix; \ 1030 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 1031 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 1032 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 1033 break; 1034 CASE_VMSLT_VMSET_OPCODES(LMUL_F8, MF8, B1) 1035 CASE_VMSLT_VMSET_OPCODES(LMUL_F4, MF4, B2) 1036 CASE_VMSLT_VMSET_OPCODES(LMUL_F2, MF2, B4) 1037 CASE_VMSLT_VMSET_OPCODES(LMUL_1, M1, B8) 1038 CASE_VMSLT_VMSET_OPCODES(LMUL_2, M2, B16) 1039 CASE_VMSLT_VMSET_OPCODES(LMUL_4, M4, B32) 1040 CASE_VMSLT_VMSET_OPCODES(LMUL_8, M8, B64) 1041 #undef CASE_VMSLT_VMSET_OPCODES 1042 } 1043 // Mask operations use the LMUL from the mask type. 1044 switch (RISCVTargetLowering::getLMUL(VT)) { 1045 default: 1046 llvm_unreachable("Unexpected LMUL!"); 1047 #define CASE_VMXOR_VMANDN_VMAND_OPCODES(lmulenum, suffix) \ 1048 case RISCVII::VLMUL::lmulenum: \ 1049 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 1050 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 1051 VMANDOpcode = RISCV::PseudoVMAND_MM_##suffix; \ 1052 break; 1053 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F8, MF8) 1054 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F4, MF4) 1055 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F2, MF2) 1056 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_1, M1) 1057 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_2, M2) 1058 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_4, M4) 1059 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_8, M8) 1060 #undef CASE_VMXOR_VMANDN_VMAND_OPCODES 1061 } 1062 SDValue SEW = CurDAG->getTargetConstant( 1063 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1064 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 1065 SDValue VL; 1066 selectVLOp(Node->getOperand(5), VL); 1067 SDValue MaskedOff = Node->getOperand(1); 1068 SDValue Mask = Node->getOperand(4); 1069 1070 // If vmsgeu_mask with 0 immediate, expand it to {vmset, vmand}. 1071 if (IsCmpUnsignedZero) { 1072 SDValue VMSet = 1073 SDValue(CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW), 0); 1074 ReplaceNode(Node, CurDAG->getMachineNode(VMANDOpcode, DL, VT, 1075 {Mask, VMSet, VL, MaskSEW})); 1076 return; 1077 } 1078 1079 // If the MaskedOff value and the Mask are the same value use 1080 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 1081 // This avoids needing to copy v0 to vd before starting the next sequence. 1082 if (Mask == MaskedOff) { 1083 SDValue Cmp = SDValue( 1084 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1085 0); 1086 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 1087 {Mask, Cmp, VL, MaskSEW})); 1088 return; 1089 } 1090 1091 // Mask needs to be copied to V0. 1092 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1093 RISCV::V0, Mask, SDValue()); 1094 SDValue Glue = Chain.getValue(1); 1095 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); 1096 1097 // Otherwise use 1098 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 1099 SDValue Cmp = SDValue( 1100 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 1101 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 1102 0); 1103 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 1104 {Cmp, Mask, VL, MaskSEW})); 1105 return; 1106 } 1107 case Intrinsic::riscv_vsetvli_opt: 1108 case Intrinsic::riscv_vsetvlimax_opt: 1109 return selectVSETVLI(Node); 1110 } 1111 break; 1112 } 1113 case ISD::INTRINSIC_W_CHAIN: { 1114 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1115 switch (IntNo) { 1116 // By default we do not custom select any intrinsic. 1117 default: 1118 break; 1119 case Intrinsic::riscv_vsetvli: 1120 case Intrinsic::riscv_vsetvlimax: 1121 return selectVSETVLI(Node); 1122 case Intrinsic::riscv_vlseg2: 1123 case Intrinsic::riscv_vlseg3: 1124 case Intrinsic::riscv_vlseg4: 1125 case Intrinsic::riscv_vlseg5: 1126 case Intrinsic::riscv_vlseg6: 1127 case Intrinsic::riscv_vlseg7: 1128 case Intrinsic::riscv_vlseg8: { 1129 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1130 return; 1131 } 1132 case Intrinsic::riscv_vlseg2_mask: 1133 case Intrinsic::riscv_vlseg3_mask: 1134 case Intrinsic::riscv_vlseg4_mask: 1135 case Intrinsic::riscv_vlseg5_mask: 1136 case Intrinsic::riscv_vlseg6_mask: 1137 case Intrinsic::riscv_vlseg7_mask: 1138 case Intrinsic::riscv_vlseg8_mask: { 1139 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1140 return; 1141 } 1142 case Intrinsic::riscv_vlsseg2: 1143 case Intrinsic::riscv_vlsseg3: 1144 case Intrinsic::riscv_vlsseg4: 1145 case Intrinsic::riscv_vlsseg5: 1146 case Intrinsic::riscv_vlsseg6: 1147 case Intrinsic::riscv_vlsseg7: 1148 case Intrinsic::riscv_vlsseg8: { 1149 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1150 return; 1151 } 1152 case Intrinsic::riscv_vlsseg2_mask: 1153 case Intrinsic::riscv_vlsseg3_mask: 1154 case Intrinsic::riscv_vlsseg4_mask: 1155 case Intrinsic::riscv_vlsseg5_mask: 1156 case Intrinsic::riscv_vlsseg6_mask: 1157 case Intrinsic::riscv_vlsseg7_mask: 1158 case Intrinsic::riscv_vlsseg8_mask: { 1159 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1160 return; 1161 } 1162 case Intrinsic::riscv_vloxseg2: 1163 case Intrinsic::riscv_vloxseg3: 1164 case Intrinsic::riscv_vloxseg4: 1165 case Intrinsic::riscv_vloxseg5: 1166 case Intrinsic::riscv_vloxseg6: 1167 case Intrinsic::riscv_vloxseg7: 1168 case Intrinsic::riscv_vloxseg8: 1169 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1170 return; 1171 case Intrinsic::riscv_vluxseg2: 1172 case Intrinsic::riscv_vluxseg3: 1173 case Intrinsic::riscv_vluxseg4: 1174 case Intrinsic::riscv_vluxseg5: 1175 case Intrinsic::riscv_vluxseg6: 1176 case Intrinsic::riscv_vluxseg7: 1177 case Intrinsic::riscv_vluxseg8: 1178 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1179 return; 1180 case Intrinsic::riscv_vloxseg2_mask: 1181 case Intrinsic::riscv_vloxseg3_mask: 1182 case Intrinsic::riscv_vloxseg4_mask: 1183 case Intrinsic::riscv_vloxseg5_mask: 1184 case Intrinsic::riscv_vloxseg6_mask: 1185 case Intrinsic::riscv_vloxseg7_mask: 1186 case Intrinsic::riscv_vloxseg8_mask: 1187 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1188 return; 1189 case Intrinsic::riscv_vluxseg2_mask: 1190 case Intrinsic::riscv_vluxseg3_mask: 1191 case Intrinsic::riscv_vluxseg4_mask: 1192 case Intrinsic::riscv_vluxseg5_mask: 1193 case Intrinsic::riscv_vluxseg6_mask: 1194 case Intrinsic::riscv_vluxseg7_mask: 1195 case Intrinsic::riscv_vluxseg8_mask: 1196 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1197 return; 1198 case Intrinsic::riscv_vlseg8ff: 1199 case Intrinsic::riscv_vlseg7ff: 1200 case Intrinsic::riscv_vlseg6ff: 1201 case Intrinsic::riscv_vlseg5ff: 1202 case Intrinsic::riscv_vlseg4ff: 1203 case Intrinsic::riscv_vlseg3ff: 1204 case Intrinsic::riscv_vlseg2ff: { 1205 selectVLSEGFF(Node, /*IsMasked*/ false); 1206 return; 1207 } 1208 case Intrinsic::riscv_vlseg8ff_mask: 1209 case Intrinsic::riscv_vlseg7ff_mask: 1210 case Intrinsic::riscv_vlseg6ff_mask: 1211 case Intrinsic::riscv_vlseg5ff_mask: 1212 case Intrinsic::riscv_vlseg4ff_mask: 1213 case Intrinsic::riscv_vlseg3ff_mask: 1214 case Intrinsic::riscv_vlseg2ff_mask: { 1215 selectVLSEGFF(Node, /*IsMasked*/ true); 1216 return; 1217 } 1218 case Intrinsic::riscv_vloxei: 1219 case Intrinsic::riscv_vloxei_mask: 1220 case Intrinsic::riscv_vluxei: 1221 case Intrinsic::riscv_vluxei_mask: { 1222 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 1223 IntNo == Intrinsic::riscv_vluxei_mask; 1224 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 1225 IntNo == Intrinsic::riscv_vloxei_mask; 1226 1227 MVT VT = Node->getSimpleValueType(0); 1228 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1229 1230 unsigned CurOp = 2; 1231 // Masked intrinsic only have TU version pseduo instructions. 1232 bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); 1233 SmallVector<SDValue, 8> Operands; 1234 if (IsTU) 1235 Operands.push_back(Node->getOperand(CurOp++)); 1236 else 1237 // Skip the undef passthru operand for nomask TA version pseudo 1238 CurOp++; 1239 1240 MVT IndexVT; 1241 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1242 /*IsStridedOrIndexed*/ true, Operands, 1243 /*IsLoad=*/true, &IndexVT); 1244 1245 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1246 "Element count mismatch"); 1247 1248 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1249 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1250 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1251 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1252 report_fatal_error("The V extension does not support EEW=64 for index " 1253 "values when XLEN=32"); 1254 } 1255 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 1256 IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1257 static_cast<unsigned>(IndexLMUL)); 1258 MachineSDNode *Load = 1259 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1260 1261 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1262 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1263 1264 ReplaceNode(Node, Load); 1265 return; 1266 } 1267 case Intrinsic::riscv_vlm: 1268 case Intrinsic::riscv_vle: 1269 case Intrinsic::riscv_vle_mask: 1270 case Intrinsic::riscv_vlse: 1271 case Intrinsic::riscv_vlse_mask: { 1272 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 1273 IntNo == Intrinsic::riscv_vlse_mask; 1274 bool IsStrided = 1275 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 1276 1277 MVT VT = Node->getSimpleValueType(0); 1278 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1279 1280 unsigned CurOp = 2; 1281 // The riscv_vlm intrinsic are always tail agnostic and no passthru operand. 1282 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; 1283 // Masked intrinsic only have TU version pseduo instructions. 1284 bool IsTU = 1285 HasPassthruOperand && 1286 ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked); 1287 SmallVector<SDValue, 8> Operands; 1288 if (IsTU) 1289 Operands.push_back(Node->getOperand(CurOp++)); 1290 else if (HasPassthruOperand) 1291 // Skip the undef passthru operand for nomask TA version pseudo 1292 CurOp++; 1293 1294 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1295 Operands, /*IsLoad=*/true); 1296 1297 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1298 const RISCV::VLEPseudo *P = 1299 RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 1300 static_cast<unsigned>(LMUL)); 1301 MachineSDNode *Load = 1302 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1303 1304 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1305 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1306 1307 ReplaceNode(Node, Load); 1308 return; 1309 } 1310 case Intrinsic::riscv_vleff: 1311 case Intrinsic::riscv_vleff_mask: { 1312 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 1313 1314 MVT VT = Node->getSimpleValueType(0); 1315 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1316 1317 unsigned CurOp = 2; 1318 // Masked intrinsic only have TU version pseduo instructions. 1319 bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); 1320 SmallVector<SDValue, 7> Operands; 1321 if (IsTU) 1322 Operands.push_back(Node->getOperand(CurOp++)); 1323 else 1324 // Skip the undef passthru operand for nomask TA version pseudo 1325 CurOp++; 1326 1327 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1328 /*IsStridedOrIndexed*/ false, Operands, 1329 /*IsLoad=*/true); 1330 1331 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1332 const RISCV::VLEPseudo *P = 1333 RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 1334 Log2SEW, static_cast<unsigned>(LMUL)); 1335 MachineSDNode *Load = 1336 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), 1337 MVT::Other, MVT::Glue, Operands); 1338 SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, 1339 /*Glue*/ SDValue(Load, 2)); 1340 1341 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1342 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1343 1344 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); 1345 ReplaceUses(SDValue(Node, 1), SDValue(ReadVL, 0)); // VL 1346 ReplaceUses(SDValue(Node, 2), SDValue(Load, 1)); // Chain 1347 CurDAG->RemoveDeadNode(Node); 1348 return; 1349 } 1350 } 1351 break; 1352 } 1353 case ISD::INTRINSIC_VOID: { 1354 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1355 switch (IntNo) { 1356 case Intrinsic::riscv_vsseg2: 1357 case Intrinsic::riscv_vsseg3: 1358 case Intrinsic::riscv_vsseg4: 1359 case Intrinsic::riscv_vsseg5: 1360 case Intrinsic::riscv_vsseg6: 1361 case Intrinsic::riscv_vsseg7: 1362 case Intrinsic::riscv_vsseg8: { 1363 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1364 return; 1365 } 1366 case Intrinsic::riscv_vsseg2_mask: 1367 case Intrinsic::riscv_vsseg3_mask: 1368 case Intrinsic::riscv_vsseg4_mask: 1369 case Intrinsic::riscv_vsseg5_mask: 1370 case Intrinsic::riscv_vsseg6_mask: 1371 case Intrinsic::riscv_vsseg7_mask: 1372 case Intrinsic::riscv_vsseg8_mask: { 1373 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1374 return; 1375 } 1376 case Intrinsic::riscv_vssseg2: 1377 case Intrinsic::riscv_vssseg3: 1378 case Intrinsic::riscv_vssseg4: 1379 case Intrinsic::riscv_vssseg5: 1380 case Intrinsic::riscv_vssseg6: 1381 case Intrinsic::riscv_vssseg7: 1382 case Intrinsic::riscv_vssseg8: { 1383 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1384 return; 1385 } 1386 case Intrinsic::riscv_vssseg2_mask: 1387 case Intrinsic::riscv_vssseg3_mask: 1388 case Intrinsic::riscv_vssseg4_mask: 1389 case Intrinsic::riscv_vssseg5_mask: 1390 case Intrinsic::riscv_vssseg6_mask: 1391 case Intrinsic::riscv_vssseg7_mask: 1392 case Intrinsic::riscv_vssseg8_mask: { 1393 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1394 return; 1395 } 1396 case Intrinsic::riscv_vsoxseg2: 1397 case Intrinsic::riscv_vsoxseg3: 1398 case Intrinsic::riscv_vsoxseg4: 1399 case Intrinsic::riscv_vsoxseg5: 1400 case Intrinsic::riscv_vsoxseg6: 1401 case Intrinsic::riscv_vsoxseg7: 1402 case Intrinsic::riscv_vsoxseg8: 1403 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1404 return; 1405 case Intrinsic::riscv_vsuxseg2: 1406 case Intrinsic::riscv_vsuxseg3: 1407 case Intrinsic::riscv_vsuxseg4: 1408 case Intrinsic::riscv_vsuxseg5: 1409 case Intrinsic::riscv_vsuxseg6: 1410 case Intrinsic::riscv_vsuxseg7: 1411 case Intrinsic::riscv_vsuxseg8: 1412 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1413 return; 1414 case Intrinsic::riscv_vsoxseg2_mask: 1415 case Intrinsic::riscv_vsoxseg3_mask: 1416 case Intrinsic::riscv_vsoxseg4_mask: 1417 case Intrinsic::riscv_vsoxseg5_mask: 1418 case Intrinsic::riscv_vsoxseg6_mask: 1419 case Intrinsic::riscv_vsoxseg7_mask: 1420 case Intrinsic::riscv_vsoxseg8_mask: 1421 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1422 return; 1423 case Intrinsic::riscv_vsuxseg2_mask: 1424 case Intrinsic::riscv_vsuxseg3_mask: 1425 case Intrinsic::riscv_vsuxseg4_mask: 1426 case Intrinsic::riscv_vsuxseg5_mask: 1427 case Intrinsic::riscv_vsuxseg6_mask: 1428 case Intrinsic::riscv_vsuxseg7_mask: 1429 case Intrinsic::riscv_vsuxseg8_mask: 1430 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1431 return; 1432 case Intrinsic::riscv_vsoxei: 1433 case Intrinsic::riscv_vsoxei_mask: 1434 case Intrinsic::riscv_vsuxei: 1435 case Intrinsic::riscv_vsuxei_mask: { 1436 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 1437 IntNo == Intrinsic::riscv_vsuxei_mask; 1438 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 1439 IntNo == Intrinsic::riscv_vsoxei_mask; 1440 1441 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1442 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1443 1444 unsigned CurOp = 2; 1445 SmallVector<SDValue, 8> Operands; 1446 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1447 1448 MVT IndexVT; 1449 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1450 /*IsStridedOrIndexed*/ true, Operands, 1451 /*IsLoad=*/false, &IndexVT); 1452 1453 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1454 "Element count mismatch"); 1455 1456 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1457 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1458 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1459 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1460 report_fatal_error("The V extension does not support EEW=64 for index " 1461 "values when XLEN=32"); 1462 } 1463 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 1464 IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW, 1465 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); 1466 MachineSDNode *Store = 1467 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1468 1469 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1470 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1471 1472 ReplaceNode(Node, Store); 1473 return; 1474 } 1475 case Intrinsic::riscv_vsm: 1476 case Intrinsic::riscv_vse: 1477 case Intrinsic::riscv_vse_mask: 1478 case Intrinsic::riscv_vsse: 1479 case Intrinsic::riscv_vsse_mask: { 1480 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 1481 IntNo == Intrinsic::riscv_vsse_mask; 1482 bool IsStrided = 1483 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 1484 1485 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1486 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1487 1488 unsigned CurOp = 2; 1489 SmallVector<SDValue, 8> Operands; 1490 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1491 1492 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1493 Operands); 1494 1495 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1496 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 1497 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 1498 MachineSDNode *Store = 1499 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1500 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1501 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1502 1503 ReplaceNode(Node, Store); 1504 return; 1505 } 1506 } 1507 break; 1508 } 1509 case ISD::BITCAST: { 1510 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 1511 // Just drop bitcasts between vectors if both are fixed or both are 1512 // scalable. 1513 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 1514 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 1515 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 1516 CurDAG->RemoveDeadNode(Node); 1517 return; 1518 } 1519 break; 1520 } 1521 case ISD::INSERT_SUBVECTOR: { 1522 SDValue V = Node->getOperand(0); 1523 SDValue SubV = Node->getOperand(1); 1524 SDLoc DL(SubV); 1525 auto Idx = Node->getConstantOperandVal(2); 1526 MVT SubVecVT = SubV.getSimpleValueType(); 1527 1528 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1529 MVT SubVecContainerVT = SubVecVT; 1530 // Establish the correct scalable-vector types for any fixed-length type. 1531 if (SubVecVT.isFixedLengthVector()) 1532 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 1533 if (VT.isFixedLengthVector()) 1534 VT = TLI.getContainerForFixedLengthVector(VT); 1535 1536 const auto *TRI = Subtarget->getRegisterInfo(); 1537 unsigned SubRegIdx; 1538 std::tie(SubRegIdx, Idx) = 1539 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1540 VT, SubVecContainerVT, Idx, TRI); 1541 1542 // If the Idx hasn't been completely eliminated then this is a subvector 1543 // insert which doesn't naturally align to a vector register. These must 1544 // be handled using instructions to manipulate the vector registers. 1545 if (Idx != 0) 1546 break; 1547 1548 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); 1549 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 1550 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 1551 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 1552 (void)IsSubVecPartReg; // Silence unused variable warning without asserts. 1553 assert((!IsSubVecPartReg || V.isUndef()) && 1554 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 1555 "the subvector is smaller than a full-sized register"); 1556 1557 // If we haven't set a SubRegIdx, then we must be going between 1558 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 1559 if (SubRegIdx == RISCV::NoSubRegister) { 1560 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT); 1561 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1562 InRegClassID && 1563 "Unexpected subvector extraction"); 1564 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1565 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 1566 DL, VT, SubV, RC); 1567 ReplaceNode(Node, NewNode); 1568 return; 1569 } 1570 1571 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 1572 ReplaceNode(Node, Insert.getNode()); 1573 return; 1574 } 1575 case ISD::EXTRACT_SUBVECTOR: { 1576 SDValue V = Node->getOperand(0); 1577 auto Idx = Node->getConstantOperandVal(1); 1578 MVT InVT = V.getSimpleValueType(); 1579 SDLoc DL(V); 1580 1581 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1582 MVT SubVecContainerVT = VT; 1583 // Establish the correct scalable-vector types for any fixed-length type. 1584 if (VT.isFixedLengthVector()) 1585 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 1586 if (InVT.isFixedLengthVector()) 1587 InVT = TLI.getContainerForFixedLengthVector(InVT); 1588 1589 const auto *TRI = Subtarget->getRegisterInfo(); 1590 unsigned SubRegIdx; 1591 std::tie(SubRegIdx, Idx) = 1592 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1593 InVT, SubVecContainerVT, Idx, TRI); 1594 1595 // If the Idx hasn't been completely eliminated then this is a subvector 1596 // extract which doesn't naturally align to a vector register. These must 1597 // be handled using instructions to manipulate the vector registers. 1598 if (Idx != 0) 1599 break; 1600 1601 // If we haven't set a SubRegIdx, then we must be going between 1602 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 1603 if (SubRegIdx == RISCV::NoSubRegister) { 1604 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 1605 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1606 InRegClassID && 1607 "Unexpected subvector extraction"); 1608 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1609 SDNode *NewNode = 1610 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 1611 ReplaceNode(Node, NewNode); 1612 return; 1613 } 1614 1615 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 1616 ReplaceNode(Node, Extract.getNode()); 1617 return; 1618 } 1619 case ISD::SPLAT_VECTOR: 1620 case RISCVISD::VMV_S_X_VL: 1621 case RISCVISD::VFMV_S_F_VL: 1622 case RISCVISD::VMV_V_X_VL: 1623 case RISCVISD::VFMV_V_F_VL: { 1624 // Try to match splat of a scalar load to a strided load with stride of x0. 1625 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || 1626 Node->getOpcode() == RISCVISD::VFMV_S_F_VL; 1627 if (IsScalarMove && !Node->getOperand(0).isUndef()) 1628 break; 1629 SDValue Src = IsScalarMove ? Node->getOperand(1) : Node->getOperand(0); 1630 auto *Ld = dyn_cast<LoadSDNode>(Src); 1631 if (!Ld) 1632 break; 1633 EVT MemVT = Ld->getMemoryVT(); 1634 // The memory VT should be the same size as the element type. 1635 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 1636 break; 1637 if (!IsProfitableToFold(Src, Node, Node) || 1638 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 1639 break; 1640 1641 SDValue VL; 1642 if (Node->getOpcode() == ISD::SPLAT_VECTOR) 1643 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); 1644 else if (IsScalarMove) { 1645 // We could deal with more VL if we update the VSETVLI insert pass to 1646 // avoid introducing more VSETVLI. 1647 if (!isOneConstant(Node->getOperand(2))) 1648 break; 1649 selectVLOp(Node->getOperand(2), VL); 1650 } else 1651 selectVLOp(Node->getOperand(1), VL); 1652 1653 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1654 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 1655 1656 SDValue Operands[] = {Ld->getBasePtr(), 1657 CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW, 1658 Ld->getChain()}; 1659 1660 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1661 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 1662 /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false, 1663 Log2SEW, static_cast<unsigned>(LMUL)); 1664 MachineSDNode *Load = 1665 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1666 1667 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1668 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1669 1670 ReplaceNode(Node, Load); 1671 return; 1672 } 1673 } 1674 1675 // Select the default instruction. 1676 SelectCode(Node); 1677 } 1678 1679 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 1680 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 1681 switch (ConstraintID) { 1682 case InlineAsm::Constraint_m: 1683 // We just support simple memory operands that have a single address 1684 // operand and need no special handling. 1685 OutOps.push_back(Op); 1686 return false; 1687 case InlineAsm::Constraint_A: 1688 OutOps.push_back(Op); 1689 return false; 1690 default: 1691 break; 1692 } 1693 1694 return true; 1695 } 1696 1697 bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) { 1698 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 1699 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1700 return true; 1701 } 1702 return false; 1703 } 1704 1705 bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { 1706 // If this is FrameIndex, select it directly. Otherwise just let it get 1707 // selected to a register independently. 1708 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) 1709 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1710 else 1711 Base = Addr; 1712 return true; 1713 } 1714 1715 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 1716 SDValue &ShAmt) { 1717 // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift 1718 // amount. If there is an AND on the shift amount, we can bypass it if it 1719 // doesn't affect any of those bits. 1720 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 1721 const APInt &AndMask = N->getConstantOperandAPInt(1); 1722 1723 // Since the max shift amount is a power of 2 we can subtract 1 to make a 1724 // mask that covers the bits needed to represent all shift amounts. 1725 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 1726 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 1727 1728 if (ShMask.isSubsetOf(AndMask)) { 1729 ShAmt = N.getOperand(0); 1730 return true; 1731 } 1732 1733 // SimplifyDemandedBits may have optimized the mask so try restoring any 1734 // bits that are known zero. 1735 KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0)); 1736 if (ShMask.isSubsetOf(AndMask | Known.Zero)) { 1737 ShAmt = N.getOperand(0); 1738 return true; 1739 } 1740 } else if (N.getOpcode() == ISD::SUB && 1741 isa<ConstantSDNode>(N.getOperand(0))) { 1742 uint64_t Imm = N.getConstantOperandVal(0); 1743 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 1744 // generate a NEG instead of a SUB of a constant. 1745 if (Imm != 0 && Imm % ShiftWidth == 0) { 1746 SDLoc DL(N); 1747 EVT VT = N.getValueType(); 1748 SDValue Zero = 1749 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT); 1750 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; 1751 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, 1752 N.getOperand(1)); 1753 ShAmt = SDValue(Neg, 0); 1754 return true; 1755 } 1756 } 1757 1758 ShAmt = N; 1759 return true; 1760 } 1761 1762 bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { 1763 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 1764 cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { 1765 Val = N.getOperand(0); 1766 return true; 1767 } 1768 MVT VT = N.getSimpleValueType(); 1769 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { 1770 Val = N; 1771 return true; 1772 } 1773 1774 return false; 1775 } 1776 1777 bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { 1778 if (N.getOpcode() == ISD::AND) { 1779 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1780 if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) { 1781 Val = N.getOperand(0); 1782 return true; 1783 } 1784 } 1785 MVT VT = N.getSimpleValueType(); 1786 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32); 1787 if (CurDAG->MaskedValueIsZero(N, Mask)) { 1788 Val = N; 1789 return true; 1790 } 1791 1792 return false; 1793 } 1794 1795 // Return true if all users of this SDNode* only consume the lower \p Bits. 1796 // This can be used to form W instructions for add/sub/mul/shl even when the 1797 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 1798 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 1799 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 1800 // the add/sub/mul/shl to become non-W instructions. By checking the users we 1801 // may be able to use a W instruction and CSE with the other instruction if 1802 // this has happened. We could try to detect that the CSE opportunity exists 1803 // before doing this, but that would be more complicated. 1804 // TODO: Does this need to look through AND/OR/XOR to their users to find more 1805 // opportunities. 1806 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { 1807 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 1808 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 1809 Node->getOpcode() == ISD::SRL || 1810 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 1811 isa<ConstantSDNode>(Node)) && 1812 "Unexpected opcode"); 1813 1814 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 1815 SDNode *User = *UI; 1816 // Users of this node should have already been instruction selected 1817 if (!User->isMachineOpcode()) 1818 return false; 1819 1820 // TODO: Add more opcodes? 1821 switch (User->getMachineOpcode()) { 1822 default: 1823 return false; 1824 case RISCV::ADDW: 1825 case RISCV::ADDIW: 1826 case RISCV::SUBW: 1827 case RISCV::MULW: 1828 case RISCV::SLLW: 1829 case RISCV::SLLIW: 1830 case RISCV::SRAW: 1831 case RISCV::SRAIW: 1832 case RISCV::SRLW: 1833 case RISCV::SRLIW: 1834 case RISCV::DIVW: 1835 case RISCV::DIVUW: 1836 case RISCV::REMW: 1837 case RISCV::REMUW: 1838 case RISCV::ROLW: 1839 case RISCV::RORW: 1840 case RISCV::RORIW: 1841 case RISCV::CLZW: 1842 case RISCV::CTZW: 1843 case RISCV::CPOPW: 1844 case RISCV::SLLI_UW: 1845 case RISCV::FCVT_H_W: 1846 case RISCV::FCVT_H_WU: 1847 case RISCV::FCVT_S_W: 1848 case RISCV::FCVT_S_WU: 1849 case RISCV::FCVT_D_W: 1850 case RISCV::FCVT_D_WU: 1851 if (Bits < 32) 1852 return false; 1853 break; 1854 case RISCV::SLLI: 1855 // SLLI only uses the lower (XLen - ShAmt) bits. 1856 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1)) 1857 return false; 1858 break; 1859 case RISCV::ANDI: 1860 if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1)))) 1861 return false; 1862 break; 1863 case RISCV::SEXT_B: 1864 if (Bits < 8) 1865 return false; 1866 break; 1867 case RISCV::SEXT_H: 1868 case RISCV::ZEXT_H_RV32: 1869 case RISCV::ZEXT_H_RV64: 1870 if (Bits < 16) 1871 return false; 1872 break; 1873 case RISCV::ADD_UW: 1874 case RISCV::SH1ADD_UW: 1875 case RISCV::SH2ADD_UW: 1876 case RISCV::SH3ADD_UW: 1877 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 1878 // 32 bits. 1879 if (UI.getOperandNo() != 0 || Bits < 32) 1880 return false; 1881 break; 1882 case RISCV::SB: 1883 if (UI.getOperandNo() != 0 || Bits < 8) 1884 return false; 1885 break; 1886 case RISCV::SH: 1887 if (UI.getOperandNo() != 0 || Bits < 16) 1888 return false; 1889 break; 1890 case RISCV::SW: 1891 if (UI.getOperandNo() != 0 || Bits < 32) 1892 return false; 1893 break; 1894 } 1895 } 1896 1897 return true; 1898 } 1899 1900 // Select VL as a 5 bit immediate or a value that will become a register. This 1901 // allows us to choose betwen VSETIVLI or VSETVLI later. 1902 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 1903 auto *C = dyn_cast<ConstantSDNode>(N); 1904 if (C && isUInt<5>(C->getZExtValue())) { 1905 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 1906 N->getValueType(0)); 1907 } else if (C && C->isAllOnesValue()) { 1908 // Treat all ones as VLMax. 1909 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 1910 N->getValueType(0)); 1911 } else if (isa<RegisterSDNode>(N) && 1912 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) { 1913 // All our VL operands use an operand that allows GPRNoX0 or an immediate 1914 // as the register class. Convert X0 to a special immediate to pass the 1915 // MachineVerifier. This is recognized specially by the vsetvli insertion 1916 // pass. 1917 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 1918 N->getValueType(0)); 1919 } else { 1920 VL = N; 1921 } 1922 1923 return true; 1924 } 1925 1926 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 1927 if (N.getOpcode() != RISCVISD::VMV_V_X_VL) 1928 return false; 1929 SplatVal = N.getOperand(0); 1930 return true; 1931 } 1932 1933 using ValidateFn = bool (*)(int64_t); 1934 1935 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, 1936 SelectionDAG &DAG, 1937 const RISCVSubtarget &Subtarget, 1938 ValidateFn ValidateImm) { 1939 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || 1940 !isa<ConstantSDNode>(N.getOperand(0))) 1941 return false; 1942 1943 int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue(); 1944 1945 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand 1946 // type is wider than the resulting vector element type: an implicit 1947 // truncation first takes place. Therefore, perform a manual 1948 // truncation/sign-extension in order to ignore any truncated bits and catch 1949 // any zero-extended immediate. 1950 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 1951 // sign-extending to (XLenVT -1). 1952 MVT XLenVT = Subtarget.getXLenVT(); 1953 assert(XLenVT == N.getOperand(0).getSimpleValueType() && 1954 "Unexpected splat operand type"); 1955 MVT EltVT = N.getSimpleValueType().getVectorElementType(); 1956 if (EltVT.bitsLT(XLenVT)) 1957 SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits()); 1958 1959 if (!ValidateImm(SplatImm)) 1960 return false; 1961 1962 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT); 1963 return true; 1964 } 1965 1966 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 1967 return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget, 1968 [](int64_t Imm) { return isInt<5>(Imm); }); 1969 } 1970 1971 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 1972 return selectVSplatSimmHelper( 1973 N, SplatVal, *CurDAG, *Subtarget, 1974 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); 1975 } 1976 1977 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 1978 SDValue &SplatVal) { 1979 return selectVSplatSimmHelper( 1980 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { 1981 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 1982 }); 1983 } 1984 1985 bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { 1986 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || 1987 !isa<ConstantSDNode>(N.getOperand(0))) 1988 return false; 1989 1990 int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue(); 1991 1992 if (!isUInt<5>(SplatImm)) 1993 return false; 1994 1995 SplatVal = 1996 CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT()); 1997 1998 return true; 1999 } 2000 2001 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 2002 SDValue &Imm) { 2003 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 2004 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 2005 2006 if (!isInt<5>(ImmVal)) 2007 return false; 2008 2009 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); 2010 return true; 2011 } 2012 2013 return false; 2014 } 2015 2016 // Merge an ADDI into the offset of a load/store instruction where possible. 2017 // (load (addi base, off1), off2) -> (load base, off1+off2) 2018 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) 2019 // This is possible when off1+off2 fits a 12-bit immediate. 2020 bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { 2021 int OffsetOpIdx; 2022 int BaseOpIdx; 2023 2024 // Only attempt this optimisation for I-type loads and S-type stores. 2025 switch (N->getMachineOpcode()) { 2026 default: 2027 return false; 2028 case RISCV::LB: 2029 case RISCV::LH: 2030 case RISCV::LW: 2031 case RISCV::LBU: 2032 case RISCV::LHU: 2033 case RISCV::LWU: 2034 case RISCV::LD: 2035 case RISCV::FLH: 2036 case RISCV::FLW: 2037 case RISCV::FLD: 2038 BaseOpIdx = 0; 2039 OffsetOpIdx = 1; 2040 break; 2041 case RISCV::SB: 2042 case RISCV::SH: 2043 case RISCV::SW: 2044 case RISCV::SD: 2045 case RISCV::FSH: 2046 case RISCV::FSW: 2047 case RISCV::FSD: 2048 BaseOpIdx = 1; 2049 OffsetOpIdx = 2; 2050 break; 2051 } 2052 2053 if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx))) 2054 return false; 2055 2056 SDValue Base = N->getOperand(BaseOpIdx); 2057 2058 // If the base is an ADDI, we can merge it in to the load/store. 2059 if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI) 2060 return false; 2061 2062 SDValue ImmOperand = Base.getOperand(1); 2063 uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); 2064 2065 if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) { 2066 int64_t Offset1 = Const->getSExtValue(); 2067 int64_t CombinedOffset = Offset1 + Offset2; 2068 if (!isInt<12>(CombinedOffset)) 2069 return false; 2070 ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand), 2071 ImmOperand.getValueType()); 2072 } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) { 2073 // If the off1 in (addi base, off1) is a global variable's address (its 2074 // low part, really), then we can rely on the alignment of that variable 2075 // to provide a margin of safety before off1 can overflow the 12 bits. 2076 // Check if off2 falls within that margin; if so off1+off2 can't overflow. 2077 const DataLayout &DL = CurDAG->getDataLayout(); 2078 Align Alignment = GA->getGlobal()->getPointerAlignment(DL); 2079 if (Offset2 != 0 && Alignment <= Offset2) 2080 return false; 2081 int64_t Offset1 = GA->getOffset(); 2082 int64_t CombinedOffset = Offset1 + Offset2; 2083 ImmOperand = CurDAG->getTargetGlobalAddress( 2084 GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), 2085 CombinedOffset, GA->getTargetFlags()); 2086 } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) { 2087 // Ditto. 2088 Align Alignment = CP->getAlign(); 2089 if (Offset2 != 0 && Alignment <= Offset2) 2090 return false; 2091 int64_t Offset1 = CP->getOffset(); 2092 int64_t CombinedOffset = Offset1 + Offset2; 2093 ImmOperand = CurDAG->getTargetConstantPool( 2094 CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(), 2095 CombinedOffset, CP->getTargetFlags()); 2096 } else { 2097 return false; 2098 } 2099 2100 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 2101 LLVM_DEBUG(Base->dump(CurDAG)); 2102 LLVM_DEBUG(dbgs() << "\nN: "); 2103 LLVM_DEBUG(N->dump(CurDAG)); 2104 LLVM_DEBUG(dbgs() << "\n"); 2105 2106 // Modify the offset operand of the load/store. 2107 if (BaseOpIdx == 0) // Load 2108 CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, 2109 N->getOperand(2)); 2110 else // Store 2111 CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), 2112 ImmOperand, N->getOperand(3)); 2113 2114 return true; 2115 } 2116 2117 // Try to remove sext.w if the input is a W instruction or can be made into 2118 // a W instruction cheaply. 2119 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 2120 // Look for the sext.w pattern, addiw rd, rs1, 0. 2121 if (N->getMachineOpcode() != RISCV::ADDIW || 2122 !isNullConstant(N->getOperand(1))) 2123 return false; 2124 2125 SDValue N0 = N->getOperand(0); 2126 if (!N0.isMachineOpcode()) 2127 return false; 2128 2129 switch (N0.getMachineOpcode()) { 2130 default: 2131 break; 2132 case RISCV::ADD: 2133 case RISCV::ADDI: 2134 case RISCV::SUB: 2135 case RISCV::MUL: 2136 case RISCV::SLLI: { 2137 // Convert sext.w+add/sub/mul to their W instructions. This will create 2138 // a new independent instruction. This improves latency. 2139 unsigned Opc; 2140 switch (N0.getMachineOpcode()) { 2141 default: 2142 llvm_unreachable("Unexpected opcode!"); 2143 case RISCV::ADD: Opc = RISCV::ADDW; break; 2144 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 2145 case RISCV::SUB: Opc = RISCV::SUBW; break; 2146 case RISCV::MUL: Opc = RISCV::MULW; break; 2147 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 2148 } 2149 2150 SDValue N00 = N0.getOperand(0); 2151 SDValue N01 = N0.getOperand(1); 2152 2153 // Shift amount needs to be uimm5. 2154 if (N0.getMachineOpcode() == RISCV::SLLI && 2155 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 2156 break; 2157 2158 SDNode *Result = 2159 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 2160 N00, N01); 2161 ReplaceUses(N, Result); 2162 return true; 2163 } 2164 case RISCV::ADDW: 2165 case RISCV::ADDIW: 2166 case RISCV::SUBW: 2167 case RISCV::MULW: 2168 case RISCV::SLLIW: 2169 // Result is already sign extended just remove the sext.w. 2170 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 2171 ReplaceUses(N, N0.getNode()); 2172 return true; 2173 } 2174 2175 return false; 2176 } 2177 2178 // Optimize masked RVV pseudo instructions with a known all-ones mask to their 2179 // corresponding "unmasked" pseudo versions. The mask we're interested in will 2180 // take the form of a V0 physical register operand, with a glued 2181 // register-setting instruction. 2182 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { 2183 const RISCV::RISCVMaskedPseudoInfo *I = 2184 RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); 2185 if (!I) 2186 return false; 2187 2188 unsigned MaskOpIdx = I->MaskOpIdx; 2189 2190 // Check that we're using V0 as a mask register. 2191 if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) || 2192 cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0) 2193 return false; 2194 2195 // The glued user defines V0. 2196 const auto *Glued = N->getGluedNode(); 2197 2198 if (!Glued || Glued->getOpcode() != ISD::CopyToReg) 2199 return false; 2200 2201 // Check that we're defining V0 as a mask register. 2202 if (!isa<RegisterSDNode>(Glued->getOperand(1)) || 2203 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0) 2204 return false; 2205 2206 // Check the instruction defining V0; it needs to be a VMSET pseudo. 2207 SDValue MaskSetter = Glued->getOperand(2); 2208 2209 const auto IsVMSet = [](unsigned Opc) { 2210 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || 2211 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || 2212 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || 2213 Opc == RISCV::PseudoVMSET_M_B8; 2214 }; 2215 2216 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has 2217 // undefined behaviour if it's the wrong bitwidth, so we could choose to 2218 // assume that it's all-ones? Same applies to its VL. 2219 if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode())) 2220 return false; 2221 2222 // Retrieve the tail policy operand index, if any. 2223 Optional<unsigned> TailPolicyOpIdx; 2224 const RISCVInstrInfo *TII = static_cast<const RISCVInstrInfo *>( 2225 CurDAG->getSubtarget().getInstrInfo()); 2226 2227 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode()); 2228 2229 if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) { 2230 // The last operand of the pseudo is the policy op, but we're expecting a 2231 // Glue operand last. We may also have a chain. 2232 TailPolicyOpIdx = N->getNumOperands() - 1; 2233 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Glue) 2234 (*TailPolicyOpIdx)--; 2235 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Other) 2236 (*TailPolicyOpIdx)--; 2237 2238 // If the policy isn't TAIL_AGNOSTIC we can't perform this optimization. 2239 if (N->getConstantOperandVal(*TailPolicyOpIdx) != RISCVII::TAIL_AGNOSTIC) 2240 return false; 2241 } 2242 2243 const MCInstrDesc &UnmaskedMCID = TII->get(I->UnmaskedPseudo); 2244 2245 // Check that we're dropping the merge operand, the mask operand, and any 2246 // policy operand when we transform to this unmasked pseudo. 2247 assert(!RISCVII::hasMergeOp(UnmaskedMCID.TSFlags) && 2248 RISCVII::hasDummyMaskOp(UnmaskedMCID.TSFlags) && 2249 !RISCVII::hasVecPolicyOp(UnmaskedMCID.TSFlags) && 2250 "Unexpected pseudo to transform to"); 2251 (void)UnmaskedMCID; 2252 2253 SmallVector<SDValue, 8> Ops; 2254 // Skip the merge operand at index 0. 2255 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) { 2256 // Skip the mask, the policy, and the Glue. 2257 SDValue Op = N->getOperand(I); 2258 if (I == MaskOpIdx || I == TailPolicyOpIdx || 2259 Op.getValueType() == MVT::Glue) 2260 continue; 2261 Ops.push_back(Op); 2262 } 2263 2264 // Transitively apply any node glued to our new node. 2265 if (auto *TGlued = Glued->getGluedNode()) 2266 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); 2267 2268 SDNode *Result = 2269 CurDAG->getMachineNode(I->UnmaskedPseudo, SDLoc(N), N->getVTList(), Ops); 2270 ReplaceUses(N, Result); 2271 2272 return true; 2273 } 2274 2275 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 2276 // for instruction scheduling. 2277 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) { 2278 return new RISCVDAGToDAGISel(TM); 2279 } 2280