1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISCV target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVMCTargetDesc.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCVISelLowering.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/IR/IntrinsicsRISCV.h" 20 #include "llvm/Support/Alignment.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/KnownBits.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "riscv-isel" 29 30 namespace llvm { 31 namespace RISCV { 32 #define GET_RISCVVSSEGTable_IMPL 33 #define GET_RISCVVLSEGTable_IMPL 34 #define GET_RISCVVLXSEGTable_IMPL 35 #define GET_RISCVVSXSEGTable_IMPL 36 #define GET_RISCVVLETable_IMPL 37 #define GET_RISCVVSETable_IMPL 38 #define GET_RISCVVLXTable_IMPL 39 #define GET_RISCVVSXTable_IMPL 40 #define GET_RISCVMaskedPseudosTable_IMPL 41 #include "RISCVGenSearchableTables.inc" 42 } // namespace RISCV 43 } // namespace llvm 44 45 void RISCVDAGToDAGISel::PreprocessISelDAG() { 46 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 47 E = CurDAG->allnodes_end(); 48 I != E;) { 49 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 50 51 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point 52 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. 53 if (N->getOpcode() == ISD::SPLAT_VECTOR) { 54 MVT VT = N->getSimpleValueType(0); 55 unsigned Opc = 56 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; 57 SDLoc DL(N); 58 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); 59 SDValue Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), 60 N->getOperand(0), VL); 61 62 --I; 63 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 64 ++I; 65 CurDAG->DeleteNode(N); 66 continue; 67 } 68 69 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 70 // load. Done after lowering and combining so that we have a chance to 71 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 72 if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) 73 continue; 74 75 assert(N->getNumOperands() == 4 && "Unexpected number of operands"); 76 MVT VT = N->getSimpleValueType(0); 77 SDValue Passthru = N->getOperand(0); 78 SDValue Lo = N->getOperand(1); 79 SDValue Hi = N->getOperand(2); 80 SDValue VL = N->getOperand(3); 81 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 82 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 83 "Unexpected VTs!"); 84 MachineFunction &MF = CurDAG->getMachineFunction(); 85 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 86 SDLoc DL(N); 87 88 // We use the same frame index we use for moving two i32s into 64-bit FPR. 89 // This is an analogous operation. 90 int FI = FuncInfo->getMoveF64FrameIndex(MF); 91 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 92 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); 93 SDValue StackSlot = 94 CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); 95 96 SDValue Chain = CurDAG->getEntryNode(); 97 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 98 99 SDValue OffsetSlot = 100 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); 101 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 102 Align(8)); 103 104 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 105 106 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 107 SDValue IntID = 108 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 109 SDValue Ops[] = {Chain, 110 IntID, 111 Passthru, 112 StackSlot, 113 CurDAG->getRegister(RISCV::X0, MVT::i64), 114 VL}; 115 116 SDValue Result = CurDAG->getMemIntrinsicNode( 117 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8), 118 MachineMemOperand::MOLoad); 119 120 // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the 121 // vlse we created. This will cause general havok on the dag because 122 // anything below the conversion could be folded into other existing nodes. 123 // To avoid invalidating 'I', back it up to the convert node. 124 --I; 125 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 126 127 // Now that we did that, the node is dead. Increment the iterator to the 128 // next node to process, then delete N. 129 ++I; 130 CurDAG->DeleteNode(N); 131 } 132 } 133 134 void RISCVDAGToDAGISel::PostprocessISelDAG() { 135 HandleSDNode Dummy(CurDAG->getRoot()); 136 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 137 138 bool MadeChange = false; 139 while (Position != CurDAG->allnodes_begin()) { 140 SDNode *N = &*--Position; 141 // Skip dead nodes and any non-machine opcodes. 142 if (N->use_empty() || !N->isMachineOpcode()) 143 continue; 144 145 MadeChange |= doPeepholeSExtW(N); 146 MadeChange |= doPeepholeLoadStoreADDI(N); 147 MadeChange |= doPeepholeMaskedRVV(N); 148 } 149 150 CurDAG->setRoot(Dummy.getValue()); 151 152 if (MadeChange) 153 CurDAG->RemoveDeadNodes(); 154 } 155 156 static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL, 157 const MVT VT, int64_t Imm, 158 const RISCVSubtarget &Subtarget) { 159 assert(VT == MVT::i64 && "Expecting MVT::i64"); 160 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering(); 161 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(CurDAG->getConstantPool( 162 ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT)); 163 SDValue Addr = TLI->getAddr(CP, *CurDAG); 164 SDValue Offset = CurDAG->getTargetConstant(0, DL, VT); 165 // Since there is no data race, the chain can be the entry node. 166 SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset, 167 CurDAG->getEntryNode()); 168 MachineFunction &MF = CurDAG->getMachineFunction(); 169 MachineMemOperand *MemOp = MF.getMachineMemOperand( 170 MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, 171 LLT(VT), CP->getAlign()); 172 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Load), {MemOp}); 173 return Load; 174 } 175 176 static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 177 int64_t Imm, const RISCVSubtarget &Subtarget) { 178 MVT XLenVT = Subtarget.getXLenVT(); 179 RISCVMatInt::InstSeq Seq = 180 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); 181 182 // If Imm is expensive to build, then we put it into constant pool. 183 if (Subtarget.useConstantPoolForLargeInts() && 184 Seq.size() > Subtarget.getMaxBuildIntsCost()) 185 return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget); 186 187 SDNode *Result = nullptr; 188 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); 189 for (RISCVMatInt::Inst &Inst : Seq) { 190 SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT); 191 if (Inst.Opc == RISCV::LUI) 192 Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm); 193 else if (Inst.Opc == RISCV::ADD_UW) 194 Result = CurDAG->getMachineNode(RISCV::ADD_UW, DL, XLenVT, SrcReg, 195 CurDAG->getRegister(RISCV::X0, XLenVT)); 196 else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD || 197 Inst.Opc == RISCV::SH3ADD) 198 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg); 199 else 200 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm); 201 202 // Only the first instruction has X0 as its source. 203 SrcReg = SDValue(Result, 0); 204 } 205 206 return Result; 207 } 208 209 static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 210 unsigned RegClassID, unsigned SubReg0) { 211 assert(Regs.size() >= 2 && Regs.size() <= 8); 212 213 SDLoc DL(Regs[0]); 214 SmallVector<SDValue, 8> Ops; 215 216 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); 217 218 for (unsigned I = 0; I < Regs.size(); ++I) { 219 Ops.push_back(Regs[I]); 220 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); 221 } 222 SDNode *N = 223 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 224 return SDValue(N, 0); 225 } 226 227 static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 228 unsigned NF) { 229 static const unsigned RegClassIDs[] = { 230 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, 231 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, 232 RISCV::VRN8M1RegClassID}; 233 234 return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0); 235 } 236 237 static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 238 unsigned NF) { 239 static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID, 240 RISCV::VRN3M2RegClassID, 241 RISCV::VRN4M2RegClassID}; 242 243 return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0); 244 } 245 246 static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 247 unsigned NF) { 248 return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID, 249 RISCV::sub_vrm4_0); 250 } 251 252 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 253 unsigned NF, RISCVII::VLMUL LMUL) { 254 switch (LMUL) { 255 default: 256 llvm_unreachable("Invalid LMUL."); 257 case RISCVII::VLMUL::LMUL_F8: 258 case RISCVII::VLMUL::LMUL_F4: 259 case RISCVII::VLMUL::LMUL_F2: 260 case RISCVII::VLMUL::LMUL_1: 261 return createM1Tuple(CurDAG, Regs, NF); 262 case RISCVII::VLMUL::LMUL_2: 263 return createM2Tuple(CurDAG, Regs, NF); 264 case RISCVII::VLMUL::LMUL_4: 265 return createM4Tuple(CurDAG, Regs, NF); 266 } 267 } 268 269 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 270 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 271 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 272 bool IsLoad, MVT *IndexVT) { 273 SDValue Chain = Node->getOperand(0); 274 SDValue Glue; 275 276 SDValue Base; 277 SelectBaseAddr(Node->getOperand(CurOp++), Base); 278 Operands.push_back(Base); // Base pointer. 279 280 if (IsStridedOrIndexed) { 281 Operands.push_back(Node->getOperand(CurOp++)); // Index. 282 if (IndexVT) 283 *IndexVT = Operands.back()->getSimpleValueType(0); 284 } 285 286 if (IsMasked) { 287 // Mask needs to be copied to V0. 288 SDValue Mask = Node->getOperand(CurOp++); 289 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); 290 Glue = Chain.getValue(1); 291 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); 292 } 293 SDValue VL; 294 selectVLOp(Node->getOperand(CurOp++), VL); 295 Operands.push_back(VL); 296 297 MVT XLenVT = Subtarget->getXLenVT(); 298 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 299 Operands.push_back(SEWOp); 300 301 // Masked load has the tail policy argument. 302 if (IsMasked && IsLoad) { 303 // Policy must be a constant. 304 uint64_t Policy = Node->getConstantOperandVal(CurOp++); 305 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 306 Operands.push_back(PolicyOp); 307 } 308 309 Operands.push_back(Chain); // Chain. 310 if (Glue) 311 Operands.push_back(Glue); 312 } 313 314 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, 315 bool IsStrided) { 316 SDLoc DL(Node); 317 unsigned NF = Node->getNumValues() - 1; 318 MVT VT = Node->getSimpleValueType(0); 319 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 320 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 321 322 unsigned CurOp = 2; 323 SmallVector<SDValue, 8> Operands; 324 if (IsMasked) { 325 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 326 Node->op_begin() + CurOp + NF); 327 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 328 Operands.push_back(MaskedOff); 329 CurOp += NF; 330 } 331 332 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 333 Operands, /*IsLoad=*/true); 334 335 const RISCV::VLSEGPseudo *P = 336 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW, 337 static_cast<unsigned>(LMUL)); 338 MachineSDNode *Load = 339 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 340 341 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 342 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 343 344 SDValue SuperReg = SDValue(Load, 0); 345 for (unsigned I = 0; I < NF; ++I) { 346 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 347 ReplaceUses(SDValue(Node, I), 348 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 349 } 350 351 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 352 CurDAG->RemoveDeadNode(Node); 353 } 354 355 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { 356 SDLoc DL(Node); 357 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. 358 MVT VT = Node->getSimpleValueType(0); 359 MVT XLenVT = Subtarget->getXLenVT(); 360 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 361 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 362 363 unsigned CurOp = 2; 364 SmallVector<SDValue, 7> Operands; 365 if (IsMasked) { 366 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 367 Node->op_begin() + CurOp + NF); 368 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 369 Operands.push_back(MaskedOff); 370 CurOp += NF; 371 } 372 373 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 374 /*IsStridedOrIndexed*/ false, Operands, 375 /*IsLoad=*/true); 376 377 const RISCV::VLSEGPseudo *P = 378 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true, 379 Log2SEW, static_cast<unsigned>(LMUL)); 380 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 381 MVT::Other, MVT::Glue, Operands); 382 SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, 383 /*Glue*/ SDValue(Load, 2)); 384 385 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 386 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 387 388 SDValue SuperReg = SDValue(Load, 0); 389 for (unsigned I = 0; I < NF; ++I) { 390 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 391 ReplaceUses(SDValue(Node, I), 392 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 393 } 394 395 ReplaceUses(SDValue(Node, NF), SDValue(ReadVL, 0)); // VL 396 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 1)); // Chain 397 CurDAG->RemoveDeadNode(Node); 398 } 399 400 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, 401 bool IsOrdered) { 402 SDLoc DL(Node); 403 unsigned NF = Node->getNumValues() - 1; 404 MVT VT = Node->getSimpleValueType(0); 405 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 406 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 407 408 unsigned CurOp = 2; 409 SmallVector<SDValue, 8> Operands; 410 if (IsMasked) { 411 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 412 Node->op_begin() + CurOp + NF); 413 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 414 Operands.push_back(MaskedOff); 415 CurOp += NF; 416 } 417 418 MVT IndexVT; 419 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 420 /*IsStridedOrIndexed*/ true, Operands, 421 /*IsLoad=*/true, &IndexVT); 422 423 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 424 "Element count mismatch"); 425 426 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 427 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 428 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 429 report_fatal_error("The V extension does not support EEW=64 for index " 430 "values when XLEN=32"); 431 } 432 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 433 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 434 static_cast<unsigned>(IndexLMUL)); 435 MachineSDNode *Load = 436 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 437 438 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 439 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 440 441 SDValue SuperReg = SDValue(Load, 0); 442 for (unsigned I = 0; I < NF; ++I) { 443 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 444 ReplaceUses(SDValue(Node, I), 445 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 446 } 447 448 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 449 CurDAG->RemoveDeadNode(Node); 450 } 451 452 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, 453 bool IsStrided) { 454 SDLoc DL(Node); 455 unsigned NF = Node->getNumOperands() - 4; 456 if (IsStrided) 457 NF--; 458 if (IsMasked) 459 NF--; 460 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 461 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 462 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 463 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 464 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 465 466 SmallVector<SDValue, 8> Operands; 467 Operands.push_back(StoreVal); 468 unsigned CurOp = 2 + NF; 469 470 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 471 Operands); 472 473 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 474 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 475 MachineSDNode *Store = 476 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 477 478 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 479 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 480 481 ReplaceNode(Node, Store); 482 } 483 484 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, 485 bool IsOrdered) { 486 SDLoc DL(Node); 487 unsigned NF = Node->getNumOperands() - 5; 488 if (IsMasked) 489 --NF; 490 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 491 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 492 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 493 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 494 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 495 496 SmallVector<SDValue, 8> Operands; 497 Operands.push_back(StoreVal); 498 unsigned CurOp = 2 + NF; 499 500 MVT IndexVT; 501 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 502 /*IsStridedOrIndexed*/ true, Operands, 503 /*IsLoad=*/false, &IndexVT); 504 505 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 506 "Element count mismatch"); 507 508 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 509 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 510 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 511 report_fatal_error("The V extension does not support EEW=64 for index " 512 "values when XLEN=32"); 513 } 514 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 515 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 516 static_cast<unsigned>(IndexLMUL)); 517 MachineSDNode *Store = 518 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 519 520 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 521 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 522 523 ReplaceNode(Node, Store); 524 } 525 526 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { 527 if (!Subtarget->hasVInstructions()) 528 return; 529 530 assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN || 531 Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) && 532 "Unexpected opcode"); 533 534 SDLoc DL(Node); 535 MVT XLenVT = Subtarget->getXLenVT(); 536 537 bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN; 538 unsigned IntNoOffset = HasChain ? 1 : 0; 539 unsigned IntNo = Node->getConstantOperandVal(IntNoOffset); 540 541 assert((IntNo == Intrinsic::riscv_vsetvli || 542 IntNo == Intrinsic::riscv_vsetvlimax || 543 IntNo == Intrinsic::riscv_vsetvli_opt || 544 IntNo == Intrinsic::riscv_vsetvlimax_opt) && 545 "Unexpected vsetvli intrinsic"); 546 547 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax || 548 IntNo == Intrinsic::riscv_vsetvlimax_opt; 549 unsigned Offset = IntNoOffset + (VLMax ? 1 : 2); 550 551 assert(Node->getNumOperands() == Offset + 2 && 552 "Unexpected number of operands"); 553 554 unsigned SEW = 555 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 556 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( 557 Node->getConstantOperandVal(Offset + 1) & 0x7); 558 559 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true, 560 /*MaskAgnostic*/ false); 561 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 562 563 SmallVector<EVT, 2> VTs = {XLenVT}; 564 if (HasChain) 565 VTs.push_back(MVT::Other); 566 567 SDValue VLOperand; 568 unsigned Opcode = RISCV::PseudoVSETVLI; 569 if (VLMax) { 570 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 571 Opcode = RISCV::PseudoVSETVLIX0; 572 } else { 573 VLOperand = Node->getOperand(IntNoOffset + 1); 574 575 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 576 uint64_t AVL = C->getZExtValue(); 577 if (isUInt<5>(AVL)) { 578 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 579 SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp}; 580 if (HasChain) 581 Ops.push_back(Node->getOperand(0)); 582 ReplaceNode( 583 Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops)); 584 return; 585 } 586 } 587 } 588 589 SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp}; 590 if (HasChain) 591 Ops.push_back(Node->getOperand(0)); 592 593 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops)); 594 } 595 596 void RISCVDAGToDAGISel::Select(SDNode *Node) { 597 // If we have a custom node, we have already selected. 598 if (Node->isMachineOpcode()) { 599 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 600 Node->setNodeId(-1); 601 return; 602 } 603 604 // Instruction Selection not handled by the auto-generated tablegen selection 605 // should be handled here. 606 unsigned Opcode = Node->getOpcode(); 607 MVT XLenVT = Subtarget->getXLenVT(); 608 SDLoc DL(Node); 609 MVT VT = Node->getSimpleValueType(0); 610 611 switch (Opcode) { 612 case ISD::Constant: { 613 auto *ConstNode = cast<ConstantSDNode>(Node); 614 if (VT == XLenVT && ConstNode->isZero()) { 615 SDValue New = 616 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT); 617 ReplaceNode(Node, New.getNode()); 618 return; 619 } 620 int64_t Imm = ConstNode->getSExtValue(); 621 // If the upper XLen-16 bits are not used, try to convert this to a simm12 622 // by sign extending bit 15. 623 if (isUInt<16>(Imm) && isInt<12>(SignExtend64(Imm, 16)) && 624 hasAllHUsers(Node)) 625 Imm = SignExtend64(Imm, 16); 626 // If the upper 32-bits are not used try to convert this into a simm32 by 627 // sign extending bit 32. 628 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 629 Imm = SignExtend64(Imm, 32); 630 631 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); 632 return; 633 } 634 case ISD::FrameIndex: { 635 SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT); 636 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 637 SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); 638 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm)); 639 return; 640 } 641 case ISD::SRL: { 642 // Optimize (srl (and X, C2), C) -> 643 // (srli (slli X, (XLen-C3), (XLen-C3) + C) 644 // Where C2 is a mask with C3 trailing ones. 645 // Taking into account that the C2 may have had lower bits unset by 646 // SimplifyDemandedBits. This avoids materializing the C2 immediate. 647 // This pattern occurs when type legalizing right shifts for types with 648 // less than XLen bits. 649 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 650 if (!N1C) 651 break; 652 SDValue N0 = Node->getOperand(0); 653 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 654 !isa<ConstantSDNode>(N0.getOperand(1))) 655 break; 656 unsigned ShAmt = N1C->getZExtValue(); 657 uint64_t Mask = N0.getConstantOperandVal(1); 658 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 659 if (!isMask_64(Mask)) 660 break; 661 unsigned TrailingOnes = countTrailingOnes(Mask); 662 // 32 trailing ones should use srliw via tablegen pattern. 663 if (TrailingOnes == 32 || ShAmt >= TrailingOnes) 664 break; 665 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; 666 SDNode *SLLI = 667 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 668 CurDAG->getTargetConstant(LShAmt, DL, VT)); 669 SDNode *SRLI = CurDAG->getMachineNode( 670 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 671 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 672 ReplaceNode(Node, SRLI); 673 return; 674 } 675 case ISD::SRA: { 676 // Optimize (sra (sext_inreg X, i16), C) -> 677 // (srai (slli X, (XLen-16), (XLen-16) + C) 678 // And (sra (sext_inreg X, i8), C) -> 679 // (srai (slli X, (XLen-8), (XLen-8) + C) 680 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. 681 // This transform matches the code we get without Zbb. The shifts are more 682 // compressible, and this can help expose CSE opportunities in the sdiv by 683 // constant optimization. 684 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 685 if (!N1C) 686 break; 687 SDValue N0 = Node->getOperand(0); 688 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) 689 break; 690 unsigned ShAmt = N1C->getZExtValue(); 691 unsigned ExtSize = 692 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 693 // ExtSize of 32 should use sraiw via tablegen pattern. 694 if (ExtSize >= 32 || ShAmt >= ExtSize) 695 break; 696 unsigned LShAmt = Subtarget->getXLen() - ExtSize; 697 SDNode *SLLI = 698 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 699 CurDAG->getTargetConstant(LShAmt, DL, VT)); 700 SDNode *SRAI = CurDAG->getMachineNode( 701 RISCV::SRAI, DL, VT, SDValue(SLLI, 0), 702 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 703 ReplaceNode(Node, SRAI); 704 return; 705 } 706 case ISD::AND: { 707 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 708 if (!N1C) 709 break; 710 711 SDValue N0 = Node->getOperand(0); 712 713 bool LeftShift = N0.getOpcode() == ISD::SHL; 714 if (!LeftShift && N0.getOpcode() != ISD::SRL) 715 break; 716 717 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 718 if (!C) 719 break; 720 uint64_t C2 = C->getZExtValue(); 721 unsigned XLen = Subtarget->getXLen(); 722 if (!C2 || C2 >= XLen) 723 break; 724 725 uint64_t C1 = N1C->getZExtValue(); 726 727 // Keep track of whether this is a c.andi. If we can't use c.andi, the 728 // shift pair might offer more compression opportunities. 729 // TODO: We could check for C extension here, but we don't have many lit 730 // tests with the C extension enabled so not checking gets better coverage. 731 // TODO: What if ANDI faster than shift? 732 bool IsCANDI = isInt<6>(N1C->getSExtValue()); 733 734 // Clear irrelevant bits in the mask. 735 if (LeftShift) 736 C1 &= maskTrailingZeros<uint64_t>(C2); 737 else 738 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 739 740 // Some transforms should only be done if the shift has a single use or 741 // the AND would become (srli (slli X, 32), 32) 742 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 743 744 SDValue X = N0.getOperand(0); 745 746 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 747 // with c3 leading zeros. 748 if (!LeftShift && isMask_64(C1)) { 749 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 750 if (C2 < C3) { 751 // If the number of leading zeros is C2+32 this can be SRLIW. 752 if (C2 + 32 == C3) { 753 SDNode *SRLIW = 754 CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X, 755 CurDAG->getTargetConstant(C2, DL, XLenVT)); 756 ReplaceNode(Node, SRLIW); 757 return; 758 } 759 760 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if 761 // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 762 // 763 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 764 // legalized and goes through DAG combine. 765 if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() && 766 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 767 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { 768 SDNode *SRAIW = 769 CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, X.getOperand(0), 770 CurDAG->getTargetConstant(31, DL, XLenVT)); 771 SDNode *SRLIW = CurDAG->getMachineNode( 772 RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0), 773 CurDAG->getTargetConstant(C3 - 32, DL, XLenVT)); 774 ReplaceNode(Node, SRLIW); 775 return; 776 } 777 778 // (srli (slli x, c3-c2), c3). 779 // Skip if we could use (zext.w (sraiw X, C2)). 780 bool Skip = Subtarget->hasStdExtZba() && C3 == 32 && 781 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 782 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32; 783 // Also Skip if we can use bexti. 784 Skip |= Subtarget->hasStdExtZbs() && C3 == XLen - 1; 785 if (OneUseOrZExtW && !Skip) { 786 SDNode *SLLI = CurDAG->getMachineNode( 787 RISCV::SLLI, DL, XLenVT, X, 788 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 789 SDNode *SRLI = 790 CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), 791 CurDAG->getTargetConstant(C3, DL, XLenVT)); 792 ReplaceNode(Node, SRLI); 793 return; 794 } 795 } 796 } 797 798 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 799 // shifted by c2 bits with c3 leading zeros. 800 if (LeftShift && isShiftedMask_64(C1)) { 801 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 802 803 if (C2 + C3 < XLen && 804 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) { 805 // Use slli.uw when possible. 806 if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) { 807 SDNode *SLLI_UW = 808 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, XLenVT, X, 809 CurDAG->getTargetConstant(C2, DL, XLenVT)); 810 ReplaceNode(Node, SLLI_UW); 811 return; 812 } 813 814 // (srli (slli c2+c3), c3) 815 if (OneUseOrZExtW && !IsCANDI) { 816 SDNode *SLLI = CurDAG->getMachineNode( 817 RISCV::SLLI, DL, XLenVT, X, 818 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 819 SDNode *SRLI = 820 CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), 821 CurDAG->getTargetConstant(C3, DL, XLenVT)); 822 ReplaceNode(Node, SRLI); 823 return; 824 } 825 } 826 } 827 828 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 829 // shifted mask with c2 leading zeros and c3 trailing zeros. 830 if (!LeftShift && isShiftedMask_64(C1)) { 831 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 832 uint64_t C3 = countTrailingZeros(C1); 833 if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsCANDI) { 834 unsigned SrliOpc = RISCV::SRLI; 835 // If the input is zexti32 we should use SRLIW. 836 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) && 837 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) { 838 SrliOpc = RISCV::SRLIW; 839 X = X.getOperand(0); 840 } 841 SDNode *SRLI = CurDAG->getMachineNode( 842 SrliOpc, DL, XLenVT, X, 843 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 844 SDNode *SLLI = 845 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), 846 CurDAG->getTargetConstant(C3, DL, XLenVT)); 847 ReplaceNode(Node, SLLI); 848 return; 849 } 850 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 851 if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 && 852 OneUseOrZExtW && !IsCANDI) { 853 SDNode *SRLIW = CurDAG->getMachineNode( 854 RISCV::SRLIW, DL, XLenVT, X, 855 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 856 SDNode *SLLI = 857 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), 858 CurDAG->getTargetConstant(C3, DL, XLenVT)); 859 ReplaceNode(Node, SLLI); 860 return; 861 } 862 } 863 864 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 865 // shifted mask with no leading zeros and c3 trailing zeros. 866 if (LeftShift && isShiftedMask_64(C1)) { 867 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 868 uint64_t C3 = countTrailingZeros(C1); 869 if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsCANDI) { 870 SDNode *SRLI = CurDAG->getMachineNode( 871 RISCV::SRLI, DL, XLenVT, X, 872 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 873 SDNode *SLLI = 874 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), 875 CurDAG->getTargetConstant(C3, DL, XLenVT)); 876 ReplaceNode(Node, SLLI); 877 return; 878 } 879 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 880 if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { 881 SDNode *SRLIW = CurDAG->getMachineNode( 882 RISCV::SRLIW, DL, XLenVT, X, 883 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 884 SDNode *SLLI = 885 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), 886 CurDAG->getTargetConstant(C3, DL, XLenVT)); 887 ReplaceNode(Node, SLLI); 888 return; 889 } 890 } 891 892 break; 893 } 894 case ISD::MUL: { 895 // Special case for calculating (mul (and X, C2), C1) where the full product 896 // fits in XLen bits. We can shift X left by the number of leading zeros in 897 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 898 // product has XLen trailing zeros, putting it in the output of MULHU. This 899 // can avoid materializing a constant in a register for C2. 900 901 // RHS should be a constant. 902 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 903 if (!N1C || !N1C->hasOneUse()) 904 break; 905 906 // LHS should be an AND with constant. 907 SDValue N0 = Node->getOperand(0); 908 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 909 break; 910 911 uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 912 913 // Constant should be a mask. 914 if (!isMask_64(C2)) 915 break; 916 917 // This should be the only use of the AND unless we will use 918 // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND 919 // constants. 920 if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF)) 921 break; 922 923 // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this 924 // optimization. 925 if (isInt<12>(C2) || 926 (C2 == UINT64_C(0xFFFF) && 927 (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) || 928 (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())) 929 break; 930 931 // We need to shift left the AND input and C1 by a total of XLen bits. 932 933 // How far left do we need to shift the AND input? 934 unsigned XLen = Subtarget->getXLen(); 935 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2)); 936 937 // The constant gets shifted by the remaining amount unless that would 938 // shift bits out. 939 uint64_t C1 = N1C->getZExtValue(); 940 unsigned ConstantShift = XLen - LeadingZeros; 941 if (ConstantShift > (XLen - (64 - countLeadingZeros(C1)))) 942 break; 943 944 uint64_t ShiftedC1 = C1 << ConstantShift; 945 // If this RV32, we need to sign extend the constant. 946 if (XLen == 32) 947 ShiftedC1 = SignExtend64(ShiftedC1, 32); 948 949 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 950 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); 951 SDNode *SLLI = 952 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 953 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 954 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 955 SDValue(SLLI, 0), SDValue(Imm, 0)); 956 ReplaceNode(Node, MULHU); 957 return; 958 } 959 case ISD::INTRINSIC_WO_CHAIN: { 960 unsigned IntNo = Node->getConstantOperandVal(0); 961 switch (IntNo) { 962 // By default we do not custom select any intrinsic. 963 default: 964 break; 965 case Intrinsic::riscv_vmsgeu: 966 case Intrinsic::riscv_vmsge: { 967 SDValue Src1 = Node->getOperand(1); 968 SDValue Src2 = Node->getOperand(2); 969 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 970 bool IsCmpUnsignedZero = false; 971 // Only custom select scalar second operand. 972 if (Src2.getValueType() != XLenVT) 973 break; 974 // Small constants are handled with patterns. 975 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 976 int64_t CVal = C->getSExtValue(); 977 if (CVal >= -15 && CVal <= 16) { 978 if (!IsUnsigned || CVal != 0) 979 break; 980 IsCmpUnsignedZero = true; 981 } 982 } 983 MVT Src1VT = Src1.getSimpleValueType(); 984 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; 985 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 986 default: 987 llvm_unreachable("Unexpected LMUL!"); 988 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 989 case RISCVII::VLMUL::lmulenum: \ 990 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 991 : RISCV::PseudoVMSLT_VX_##suffix; \ 992 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 993 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 994 break; 995 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1) 996 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2) 997 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4) 998 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8) 999 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16) 1000 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32) 1001 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64) 1002 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES 1003 } 1004 SDValue SEW = CurDAG->getTargetConstant( 1005 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1006 SDValue VL; 1007 selectVLOp(Node->getOperand(3), VL); 1008 1009 // If vmsgeu with 0 immediate, expand it to vmset. 1010 if (IsCmpUnsignedZero) { 1011 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW)); 1012 return; 1013 } 1014 1015 // Expand to 1016 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 1017 SDValue Cmp = SDValue( 1018 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1019 0); 1020 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 1021 {Cmp, Cmp, VL, SEW})); 1022 return; 1023 } 1024 case Intrinsic::riscv_vmsgeu_mask: 1025 case Intrinsic::riscv_vmsge_mask: { 1026 SDValue Src1 = Node->getOperand(2); 1027 SDValue Src2 = Node->getOperand(3); 1028 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 1029 bool IsCmpUnsignedZero = false; 1030 // Only custom select scalar second operand. 1031 if (Src2.getValueType() != XLenVT) 1032 break; 1033 // Small constants are handled with patterns. 1034 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1035 int64_t CVal = C->getSExtValue(); 1036 if (CVal >= -15 && CVal <= 16) { 1037 if (!IsUnsigned || CVal != 0) 1038 break; 1039 IsCmpUnsignedZero = true; 1040 } 1041 } 1042 MVT Src1VT = Src1.getSimpleValueType(); 1043 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, 1044 VMOROpcode; 1045 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1046 default: 1047 llvm_unreachable("Unexpected LMUL!"); 1048 #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \ 1049 case RISCVII::VLMUL::lmulenum: \ 1050 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1051 : RISCV::PseudoVMSLT_VX_##suffix; \ 1052 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 1053 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 1054 break; 1055 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1) 1056 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2) 1057 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4) 1058 CASE_VMSLT_OPCODES(LMUL_1, M1, B8) 1059 CASE_VMSLT_OPCODES(LMUL_2, M2, B16) 1060 CASE_VMSLT_OPCODES(LMUL_4, M4, B32) 1061 CASE_VMSLT_OPCODES(LMUL_8, M8, B64) 1062 #undef CASE_VMSLT_OPCODES 1063 } 1064 // Mask operations use the LMUL from the mask type. 1065 switch (RISCVTargetLowering::getLMUL(VT)) { 1066 default: 1067 llvm_unreachable("Unexpected LMUL!"); 1068 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ 1069 case RISCVII::VLMUL::lmulenum: \ 1070 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 1071 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 1072 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ 1073 break; 1074 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8) 1075 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4) 1076 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2) 1077 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1) 1078 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2) 1079 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4) 1080 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8) 1081 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES 1082 } 1083 SDValue SEW = CurDAG->getTargetConstant( 1084 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1085 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 1086 SDValue VL; 1087 selectVLOp(Node->getOperand(5), VL); 1088 SDValue MaskedOff = Node->getOperand(1); 1089 SDValue Mask = Node->getOperand(4); 1090 1091 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff. 1092 if (IsCmpUnsignedZero) { 1093 // We don't need vmor if the MaskedOff and the Mask are the same 1094 // value. 1095 if (Mask == MaskedOff) { 1096 ReplaceUses(Node, Mask.getNode()); 1097 return; 1098 } 1099 ReplaceNode(Node, 1100 CurDAG->getMachineNode(VMOROpcode, DL, VT, 1101 {Mask, MaskedOff, VL, MaskSEW})); 1102 return; 1103 } 1104 1105 // If the MaskedOff value and the Mask are the same value use 1106 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 1107 // This avoids needing to copy v0 to vd before starting the next sequence. 1108 if (Mask == MaskedOff) { 1109 SDValue Cmp = SDValue( 1110 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1111 0); 1112 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 1113 {Mask, Cmp, VL, MaskSEW})); 1114 return; 1115 } 1116 1117 // Mask needs to be copied to V0. 1118 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1119 RISCV::V0, Mask, SDValue()); 1120 SDValue Glue = Chain.getValue(1); 1121 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); 1122 1123 // Otherwise use 1124 // vmslt{u}.vx vd, va, x, v0.t; if mask policy is agnostic. 1125 SDValue Cmp = SDValue( 1126 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 1127 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 1128 0); 1129 if (MaskedOff.isUndef()) { 1130 ReplaceNode(Node, Cmp.getNode()); 1131 return; 1132 } 1133 // Need vmxor.mm vd, vd, v0 to assign inactive value. 1134 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 1135 {Cmp, Mask, VL, MaskSEW})); 1136 return; 1137 } 1138 case Intrinsic::riscv_vsetvli_opt: 1139 case Intrinsic::riscv_vsetvlimax_opt: 1140 return selectVSETVLI(Node); 1141 } 1142 break; 1143 } 1144 case ISD::INTRINSIC_W_CHAIN: { 1145 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1146 switch (IntNo) { 1147 // By default we do not custom select any intrinsic. 1148 default: 1149 break; 1150 case Intrinsic::riscv_vsetvli: 1151 case Intrinsic::riscv_vsetvlimax: 1152 return selectVSETVLI(Node); 1153 case Intrinsic::riscv_vlseg2: 1154 case Intrinsic::riscv_vlseg3: 1155 case Intrinsic::riscv_vlseg4: 1156 case Intrinsic::riscv_vlseg5: 1157 case Intrinsic::riscv_vlseg6: 1158 case Intrinsic::riscv_vlseg7: 1159 case Intrinsic::riscv_vlseg8: { 1160 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1161 return; 1162 } 1163 case Intrinsic::riscv_vlseg2_mask: 1164 case Intrinsic::riscv_vlseg3_mask: 1165 case Intrinsic::riscv_vlseg4_mask: 1166 case Intrinsic::riscv_vlseg5_mask: 1167 case Intrinsic::riscv_vlseg6_mask: 1168 case Intrinsic::riscv_vlseg7_mask: 1169 case Intrinsic::riscv_vlseg8_mask: { 1170 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1171 return; 1172 } 1173 case Intrinsic::riscv_vlsseg2: 1174 case Intrinsic::riscv_vlsseg3: 1175 case Intrinsic::riscv_vlsseg4: 1176 case Intrinsic::riscv_vlsseg5: 1177 case Intrinsic::riscv_vlsseg6: 1178 case Intrinsic::riscv_vlsseg7: 1179 case Intrinsic::riscv_vlsseg8: { 1180 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1181 return; 1182 } 1183 case Intrinsic::riscv_vlsseg2_mask: 1184 case Intrinsic::riscv_vlsseg3_mask: 1185 case Intrinsic::riscv_vlsseg4_mask: 1186 case Intrinsic::riscv_vlsseg5_mask: 1187 case Intrinsic::riscv_vlsseg6_mask: 1188 case Intrinsic::riscv_vlsseg7_mask: 1189 case Intrinsic::riscv_vlsseg8_mask: { 1190 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1191 return; 1192 } 1193 case Intrinsic::riscv_vloxseg2: 1194 case Intrinsic::riscv_vloxseg3: 1195 case Intrinsic::riscv_vloxseg4: 1196 case Intrinsic::riscv_vloxseg5: 1197 case Intrinsic::riscv_vloxseg6: 1198 case Intrinsic::riscv_vloxseg7: 1199 case Intrinsic::riscv_vloxseg8: 1200 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1201 return; 1202 case Intrinsic::riscv_vluxseg2: 1203 case Intrinsic::riscv_vluxseg3: 1204 case Intrinsic::riscv_vluxseg4: 1205 case Intrinsic::riscv_vluxseg5: 1206 case Intrinsic::riscv_vluxseg6: 1207 case Intrinsic::riscv_vluxseg7: 1208 case Intrinsic::riscv_vluxseg8: 1209 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1210 return; 1211 case Intrinsic::riscv_vloxseg2_mask: 1212 case Intrinsic::riscv_vloxseg3_mask: 1213 case Intrinsic::riscv_vloxseg4_mask: 1214 case Intrinsic::riscv_vloxseg5_mask: 1215 case Intrinsic::riscv_vloxseg6_mask: 1216 case Intrinsic::riscv_vloxseg7_mask: 1217 case Intrinsic::riscv_vloxseg8_mask: 1218 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1219 return; 1220 case Intrinsic::riscv_vluxseg2_mask: 1221 case Intrinsic::riscv_vluxseg3_mask: 1222 case Intrinsic::riscv_vluxseg4_mask: 1223 case Intrinsic::riscv_vluxseg5_mask: 1224 case Intrinsic::riscv_vluxseg6_mask: 1225 case Intrinsic::riscv_vluxseg7_mask: 1226 case Intrinsic::riscv_vluxseg8_mask: 1227 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1228 return; 1229 case Intrinsic::riscv_vlseg8ff: 1230 case Intrinsic::riscv_vlseg7ff: 1231 case Intrinsic::riscv_vlseg6ff: 1232 case Intrinsic::riscv_vlseg5ff: 1233 case Intrinsic::riscv_vlseg4ff: 1234 case Intrinsic::riscv_vlseg3ff: 1235 case Intrinsic::riscv_vlseg2ff: { 1236 selectVLSEGFF(Node, /*IsMasked*/ false); 1237 return; 1238 } 1239 case Intrinsic::riscv_vlseg8ff_mask: 1240 case Intrinsic::riscv_vlseg7ff_mask: 1241 case Intrinsic::riscv_vlseg6ff_mask: 1242 case Intrinsic::riscv_vlseg5ff_mask: 1243 case Intrinsic::riscv_vlseg4ff_mask: 1244 case Intrinsic::riscv_vlseg3ff_mask: 1245 case Intrinsic::riscv_vlseg2ff_mask: { 1246 selectVLSEGFF(Node, /*IsMasked*/ true); 1247 return; 1248 } 1249 case Intrinsic::riscv_vloxei: 1250 case Intrinsic::riscv_vloxei_mask: 1251 case Intrinsic::riscv_vluxei: 1252 case Intrinsic::riscv_vluxei_mask: { 1253 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 1254 IntNo == Intrinsic::riscv_vluxei_mask; 1255 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 1256 IntNo == Intrinsic::riscv_vloxei_mask; 1257 1258 MVT VT = Node->getSimpleValueType(0); 1259 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1260 1261 unsigned CurOp = 2; 1262 // Masked intrinsic only have TU version pseduo instructions. 1263 bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); 1264 SmallVector<SDValue, 8> Operands; 1265 if (IsTU) 1266 Operands.push_back(Node->getOperand(CurOp++)); 1267 else 1268 // Skip the undef passthru operand for nomask TA version pseudo 1269 CurOp++; 1270 1271 MVT IndexVT; 1272 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1273 /*IsStridedOrIndexed*/ true, Operands, 1274 /*IsLoad=*/true, &IndexVT); 1275 1276 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1277 "Element count mismatch"); 1278 1279 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1280 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1281 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1282 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1283 report_fatal_error("The V extension does not support EEW=64 for index " 1284 "values when XLEN=32"); 1285 } 1286 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 1287 IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1288 static_cast<unsigned>(IndexLMUL)); 1289 MachineSDNode *Load = 1290 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1291 1292 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1293 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1294 1295 ReplaceNode(Node, Load); 1296 return; 1297 } 1298 case Intrinsic::riscv_vlm: 1299 case Intrinsic::riscv_vle: 1300 case Intrinsic::riscv_vle_mask: 1301 case Intrinsic::riscv_vlse: 1302 case Intrinsic::riscv_vlse_mask: { 1303 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 1304 IntNo == Intrinsic::riscv_vlse_mask; 1305 bool IsStrided = 1306 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 1307 1308 MVT VT = Node->getSimpleValueType(0); 1309 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1310 1311 unsigned CurOp = 2; 1312 // The riscv_vlm intrinsic are always tail agnostic and no passthru operand. 1313 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; 1314 // Masked intrinsic only have TU version pseduo instructions. 1315 bool IsTU = 1316 HasPassthruOperand && 1317 ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked); 1318 SmallVector<SDValue, 8> Operands; 1319 if (IsTU) 1320 Operands.push_back(Node->getOperand(CurOp++)); 1321 else if (HasPassthruOperand) 1322 // Skip the undef passthru operand for nomask TA version pseudo 1323 CurOp++; 1324 1325 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1326 Operands, /*IsLoad=*/true); 1327 1328 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1329 const RISCV::VLEPseudo *P = 1330 RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 1331 static_cast<unsigned>(LMUL)); 1332 MachineSDNode *Load = 1333 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1334 1335 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1336 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1337 1338 ReplaceNode(Node, Load); 1339 return; 1340 } 1341 case Intrinsic::riscv_vleff: 1342 case Intrinsic::riscv_vleff_mask: { 1343 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 1344 1345 MVT VT = Node->getSimpleValueType(0); 1346 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1347 1348 unsigned CurOp = 2; 1349 // Masked intrinsic only have TU version pseduo instructions. 1350 bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); 1351 SmallVector<SDValue, 7> Operands; 1352 if (IsTU) 1353 Operands.push_back(Node->getOperand(CurOp++)); 1354 else 1355 // Skip the undef passthru operand for nomask TA version pseudo 1356 CurOp++; 1357 1358 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1359 /*IsStridedOrIndexed*/ false, Operands, 1360 /*IsLoad=*/true); 1361 1362 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1363 const RISCV::VLEPseudo *P = 1364 RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 1365 Log2SEW, static_cast<unsigned>(LMUL)); 1366 MachineSDNode *Load = 1367 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), 1368 MVT::Other, MVT::Glue, Operands); 1369 SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, 1370 /*Glue*/ SDValue(Load, 2)); 1371 1372 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1373 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1374 1375 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); 1376 ReplaceUses(SDValue(Node, 1), SDValue(ReadVL, 0)); // VL 1377 ReplaceUses(SDValue(Node, 2), SDValue(Load, 1)); // Chain 1378 CurDAG->RemoveDeadNode(Node); 1379 return; 1380 } 1381 } 1382 break; 1383 } 1384 case ISD::INTRINSIC_VOID: { 1385 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1386 switch (IntNo) { 1387 case Intrinsic::riscv_vsseg2: 1388 case Intrinsic::riscv_vsseg3: 1389 case Intrinsic::riscv_vsseg4: 1390 case Intrinsic::riscv_vsseg5: 1391 case Intrinsic::riscv_vsseg6: 1392 case Intrinsic::riscv_vsseg7: 1393 case Intrinsic::riscv_vsseg8: { 1394 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1395 return; 1396 } 1397 case Intrinsic::riscv_vsseg2_mask: 1398 case Intrinsic::riscv_vsseg3_mask: 1399 case Intrinsic::riscv_vsseg4_mask: 1400 case Intrinsic::riscv_vsseg5_mask: 1401 case Intrinsic::riscv_vsseg6_mask: 1402 case Intrinsic::riscv_vsseg7_mask: 1403 case Intrinsic::riscv_vsseg8_mask: { 1404 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1405 return; 1406 } 1407 case Intrinsic::riscv_vssseg2: 1408 case Intrinsic::riscv_vssseg3: 1409 case Intrinsic::riscv_vssseg4: 1410 case Intrinsic::riscv_vssseg5: 1411 case Intrinsic::riscv_vssseg6: 1412 case Intrinsic::riscv_vssseg7: 1413 case Intrinsic::riscv_vssseg8: { 1414 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1415 return; 1416 } 1417 case Intrinsic::riscv_vssseg2_mask: 1418 case Intrinsic::riscv_vssseg3_mask: 1419 case Intrinsic::riscv_vssseg4_mask: 1420 case Intrinsic::riscv_vssseg5_mask: 1421 case Intrinsic::riscv_vssseg6_mask: 1422 case Intrinsic::riscv_vssseg7_mask: 1423 case Intrinsic::riscv_vssseg8_mask: { 1424 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1425 return; 1426 } 1427 case Intrinsic::riscv_vsoxseg2: 1428 case Intrinsic::riscv_vsoxseg3: 1429 case Intrinsic::riscv_vsoxseg4: 1430 case Intrinsic::riscv_vsoxseg5: 1431 case Intrinsic::riscv_vsoxseg6: 1432 case Intrinsic::riscv_vsoxseg7: 1433 case Intrinsic::riscv_vsoxseg8: 1434 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1435 return; 1436 case Intrinsic::riscv_vsuxseg2: 1437 case Intrinsic::riscv_vsuxseg3: 1438 case Intrinsic::riscv_vsuxseg4: 1439 case Intrinsic::riscv_vsuxseg5: 1440 case Intrinsic::riscv_vsuxseg6: 1441 case Intrinsic::riscv_vsuxseg7: 1442 case Intrinsic::riscv_vsuxseg8: 1443 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1444 return; 1445 case Intrinsic::riscv_vsoxseg2_mask: 1446 case Intrinsic::riscv_vsoxseg3_mask: 1447 case Intrinsic::riscv_vsoxseg4_mask: 1448 case Intrinsic::riscv_vsoxseg5_mask: 1449 case Intrinsic::riscv_vsoxseg6_mask: 1450 case Intrinsic::riscv_vsoxseg7_mask: 1451 case Intrinsic::riscv_vsoxseg8_mask: 1452 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1453 return; 1454 case Intrinsic::riscv_vsuxseg2_mask: 1455 case Intrinsic::riscv_vsuxseg3_mask: 1456 case Intrinsic::riscv_vsuxseg4_mask: 1457 case Intrinsic::riscv_vsuxseg5_mask: 1458 case Intrinsic::riscv_vsuxseg6_mask: 1459 case Intrinsic::riscv_vsuxseg7_mask: 1460 case Intrinsic::riscv_vsuxseg8_mask: 1461 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1462 return; 1463 case Intrinsic::riscv_vsoxei: 1464 case Intrinsic::riscv_vsoxei_mask: 1465 case Intrinsic::riscv_vsuxei: 1466 case Intrinsic::riscv_vsuxei_mask: { 1467 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 1468 IntNo == Intrinsic::riscv_vsuxei_mask; 1469 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 1470 IntNo == Intrinsic::riscv_vsoxei_mask; 1471 1472 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1473 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1474 1475 unsigned CurOp = 2; 1476 SmallVector<SDValue, 8> Operands; 1477 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1478 1479 MVT IndexVT; 1480 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1481 /*IsStridedOrIndexed*/ true, Operands, 1482 /*IsLoad=*/false, &IndexVT); 1483 1484 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1485 "Element count mismatch"); 1486 1487 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1488 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1489 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1490 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1491 report_fatal_error("The V extension does not support EEW=64 for index " 1492 "values when XLEN=32"); 1493 } 1494 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 1495 IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW, 1496 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); 1497 MachineSDNode *Store = 1498 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1499 1500 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1501 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1502 1503 ReplaceNode(Node, Store); 1504 return; 1505 } 1506 case Intrinsic::riscv_vsm: 1507 case Intrinsic::riscv_vse: 1508 case Intrinsic::riscv_vse_mask: 1509 case Intrinsic::riscv_vsse: 1510 case Intrinsic::riscv_vsse_mask: { 1511 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 1512 IntNo == Intrinsic::riscv_vsse_mask; 1513 bool IsStrided = 1514 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 1515 1516 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1517 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1518 1519 unsigned CurOp = 2; 1520 SmallVector<SDValue, 8> Operands; 1521 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1522 1523 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1524 Operands); 1525 1526 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1527 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 1528 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 1529 MachineSDNode *Store = 1530 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1531 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1532 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1533 1534 ReplaceNode(Node, Store); 1535 return; 1536 } 1537 } 1538 break; 1539 } 1540 case ISD::BITCAST: { 1541 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 1542 // Just drop bitcasts between vectors if both are fixed or both are 1543 // scalable. 1544 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 1545 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 1546 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 1547 CurDAG->RemoveDeadNode(Node); 1548 return; 1549 } 1550 break; 1551 } 1552 case ISD::INSERT_SUBVECTOR: { 1553 SDValue V = Node->getOperand(0); 1554 SDValue SubV = Node->getOperand(1); 1555 SDLoc DL(SubV); 1556 auto Idx = Node->getConstantOperandVal(2); 1557 MVT SubVecVT = SubV.getSimpleValueType(); 1558 1559 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1560 MVT SubVecContainerVT = SubVecVT; 1561 // Establish the correct scalable-vector types for any fixed-length type. 1562 if (SubVecVT.isFixedLengthVector()) 1563 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 1564 if (VT.isFixedLengthVector()) 1565 VT = TLI.getContainerForFixedLengthVector(VT); 1566 1567 const auto *TRI = Subtarget->getRegisterInfo(); 1568 unsigned SubRegIdx; 1569 std::tie(SubRegIdx, Idx) = 1570 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1571 VT, SubVecContainerVT, Idx, TRI); 1572 1573 // If the Idx hasn't been completely eliminated then this is a subvector 1574 // insert which doesn't naturally align to a vector register. These must 1575 // be handled using instructions to manipulate the vector registers. 1576 if (Idx != 0) 1577 break; 1578 1579 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); 1580 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 1581 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 1582 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 1583 (void)IsSubVecPartReg; // Silence unused variable warning without asserts. 1584 assert((!IsSubVecPartReg || V.isUndef()) && 1585 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 1586 "the subvector is smaller than a full-sized register"); 1587 1588 // If we haven't set a SubRegIdx, then we must be going between 1589 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 1590 if (SubRegIdx == RISCV::NoSubRegister) { 1591 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT); 1592 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1593 InRegClassID && 1594 "Unexpected subvector extraction"); 1595 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1596 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 1597 DL, VT, SubV, RC); 1598 ReplaceNode(Node, NewNode); 1599 return; 1600 } 1601 1602 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 1603 ReplaceNode(Node, Insert.getNode()); 1604 return; 1605 } 1606 case ISD::EXTRACT_SUBVECTOR: { 1607 SDValue V = Node->getOperand(0); 1608 auto Idx = Node->getConstantOperandVal(1); 1609 MVT InVT = V.getSimpleValueType(); 1610 SDLoc DL(V); 1611 1612 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1613 MVT SubVecContainerVT = VT; 1614 // Establish the correct scalable-vector types for any fixed-length type. 1615 if (VT.isFixedLengthVector()) 1616 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 1617 if (InVT.isFixedLengthVector()) 1618 InVT = TLI.getContainerForFixedLengthVector(InVT); 1619 1620 const auto *TRI = Subtarget->getRegisterInfo(); 1621 unsigned SubRegIdx; 1622 std::tie(SubRegIdx, Idx) = 1623 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1624 InVT, SubVecContainerVT, Idx, TRI); 1625 1626 // If the Idx hasn't been completely eliminated then this is a subvector 1627 // extract which doesn't naturally align to a vector register. These must 1628 // be handled using instructions to manipulate the vector registers. 1629 if (Idx != 0) 1630 break; 1631 1632 // If we haven't set a SubRegIdx, then we must be going between 1633 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 1634 if (SubRegIdx == RISCV::NoSubRegister) { 1635 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 1636 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1637 InRegClassID && 1638 "Unexpected subvector extraction"); 1639 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1640 SDNode *NewNode = 1641 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 1642 ReplaceNode(Node, NewNode); 1643 return; 1644 } 1645 1646 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 1647 ReplaceNode(Node, Extract.getNode()); 1648 return; 1649 } 1650 case ISD::SPLAT_VECTOR: 1651 case RISCVISD::VMV_S_X_VL: 1652 case RISCVISD::VFMV_S_F_VL: 1653 case RISCVISD::VMV_V_X_VL: 1654 case RISCVISD::VFMV_V_F_VL: { 1655 // Try to match splat of a scalar load to a strided load with stride of x0. 1656 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || 1657 Node->getOpcode() == RISCVISD::VFMV_S_F_VL; 1658 bool HasPassthruOperand = Node->getOpcode() != ISD::SPLAT_VECTOR; 1659 if (HasPassthruOperand && !Node->getOperand(0).isUndef()) 1660 break; 1661 SDValue Src = HasPassthruOperand ? Node->getOperand(1) : Node->getOperand(0); 1662 auto *Ld = dyn_cast<LoadSDNode>(Src); 1663 if (!Ld) 1664 break; 1665 EVT MemVT = Ld->getMemoryVT(); 1666 // The memory VT should be the same size as the element type. 1667 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 1668 break; 1669 if (!IsProfitableToFold(Src, Node, Node) || 1670 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 1671 break; 1672 1673 SDValue VL; 1674 if (Node->getOpcode() == ISD::SPLAT_VECTOR) 1675 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); 1676 else if (IsScalarMove) { 1677 // We could deal with more VL if we update the VSETVLI insert pass to 1678 // avoid introducing more VSETVLI. 1679 if (!isOneConstant(Node->getOperand(2))) 1680 break; 1681 selectVLOp(Node->getOperand(2), VL); 1682 } else 1683 selectVLOp(Node->getOperand(2), VL); 1684 1685 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1686 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 1687 1688 SDValue Operands[] = {Ld->getBasePtr(), 1689 CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW, 1690 Ld->getChain()}; 1691 1692 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1693 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 1694 /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false, 1695 Log2SEW, static_cast<unsigned>(LMUL)); 1696 MachineSDNode *Load = 1697 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1698 1699 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()}); 1700 1701 ReplaceNode(Node, Load); 1702 return; 1703 } 1704 } 1705 1706 // Select the default instruction. 1707 SelectCode(Node); 1708 } 1709 1710 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 1711 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 1712 switch (ConstraintID) { 1713 case InlineAsm::Constraint_m: 1714 // We just support simple memory operands that have a single address 1715 // operand and need no special handling. 1716 OutOps.push_back(Op); 1717 return false; 1718 case InlineAsm::Constraint_A: 1719 OutOps.push_back(Op); 1720 return false; 1721 default: 1722 break; 1723 } 1724 1725 return true; 1726 } 1727 1728 bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) { 1729 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 1730 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1731 return true; 1732 } 1733 return false; 1734 } 1735 1736 bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { 1737 // If this is FrameIndex, select it directly. Otherwise just let it get 1738 // selected to a register independently. 1739 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) 1740 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1741 else 1742 Base = Addr; 1743 return true; 1744 } 1745 1746 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 1747 SDValue &ShAmt) { 1748 // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift 1749 // amount. If there is an AND on the shift amount, we can bypass it if it 1750 // doesn't affect any of those bits. 1751 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 1752 const APInt &AndMask = N->getConstantOperandAPInt(1); 1753 1754 // Since the max shift amount is a power of 2 we can subtract 1 to make a 1755 // mask that covers the bits needed to represent all shift amounts. 1756 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 1757 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 1758 1759 if (ShMask.isSubsetOf(AndMask)) { 1760 ShAmt = N.getOperand(0); 1761 return true; 1762 } 1763 1764 // SimplifyDemandedBits may have optimized the mask so try restoring any 1765 // bits that are known zero. 1766 KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0)); 1767 if (ShMask.isSubsetOf(AndMask | Known.Zero)) { 1768 ShAmt = N.getOperand(0); 1769 return true; 1770 } 1771 } else if (N.getOpcode() == ISD::SUB && 1772 isa<ConstantSDNode>(N.getOperand(0))) { 1773 uint64_t Imm = N.getConstantOperandVal(0); 1774 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 1775 // generate a NEG instead of a SUB of a constant. 1776 if (Imm != 0 && Imm % ShiftWidth == 0) { 1777 SDLoc DL(N); 1778 EVT VT = N.getValueType(); 1779 SDValue Zero = 1780 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT); 1781 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; 1782 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, 1783 N.getOperand(1)); 1784 ShAmt = SDValue(Neg, 0); 1785 return true; 1786 } 1787 } 1788 1789 ShAmt = N; 1790 return true; 1791 } 1792 1793 bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { 1794 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 1795 cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { 1796 Val = N.getOperand(0); 1797 return true; 1798 } 1799 MVT VT = N.getSimpleValueType(); 1800 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { 1801 Val = N; 1802 return true; 1803 } 1804 1805 return false; 1806 } 1807 1808 bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { 1809 if (N.getOpcode() == ISD::AND) { 1810 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1811 if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) { 1812 Val = N.getOperand(0); 1813 return true; 1814 } 1815 } 1816 MVT VT = N.getSimpleValueType(); 1817 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32); 1818 if (CurDAG->MaskedValueIsZero(N, Mask)) { 1819 Val = N; 1820 return true; 1821 } 1822 1823 return false; 1824 } 1825 1826 // Return true if all users of this SDNode* only consume the lower \p Bits. 1827 // This can be used to form W instructions for add/sub/mul/shl even when the 1828 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 1829 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 1830 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 1831 // the add/sub/mul/shl to become non-W instructions. By checking the users we 1832 // may be able to use a W instruction and CSE with the other instruction if 1833 // this has happened. We could try to detect that the CSE opportunity exists 1834 // before doing this, but that would be more complicated. 1835 // TODO: Does this need to look through AND/OR/XOR to their users to find more 1836 // opportunities. 1837 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { 1838 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 1839 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 1840 Node->getOpcode() == ISD::SRL || 1841 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 1842 Node->getOpcode() == RISCVISD::GREV || 1843 Node->getOpcode() == RISCVISD::GORC || 1844 isa<ConstantSDNode>(Node)) && 1845 "Unexpected opcode"); 1846 1847 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 1848 SDNode *User = *UI; 1849 // Users of this node should have already been instruction selected 1850 if (!User->isMachineOpcode()) 1851 return false; 1852 1853 // TODO: Add more opcodes? 1854 switch (User->getMachineOpcode()) { 1855 default: 1856 return false; 1857 case RISCV::ADDW: 1858 case RISCV::ADDIW: 1859 case RISCV::SUBW: 1860 case RISCV::MULW: 1861 case RISCV::SLLW: 1862 case RISCV::SLLIW: 1863 case RISCV::SRAW: 1864 case RISCV::SRAIW: 1865 case RISCV::SRLW: 1866 case RISCV::SRLIW: 1867 case RISCV::DIVW: 1868 case RISCV::DIVUW: 1869 case RISCV::REMW: 1870 case RISCV::REMUW: 1871 case RISCV::ROLW: 1872 case RISCV::RORW: 1873 case RISCV::RORIW: 1874 case RISCV::CLZW: 1875 case RISCV::CTZW: 1876 case RISCV::CPOPW: 1877 case RISCV::SLLI_UW: 1878 case RISCV::FMV_W_X: 1879 case RISCV::FCVT_H_W: 1880 case RISCV::FCVT_H_WU: 1881 case RISCV::FCVT_S_W: 1882 case RISCV::FCVT_S_WU: 1883 case RISCV::FCVT_D_W: 1884 case RISCV::FCVT_D_WU: 1885 if (Bits < 32) 1886 return false; 1887 break; 1888 case RISCV::SLLI: 1889 // SLLI only uses the lower (XLen - ShAmt) bits. 1890 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1)) 1891 return false; 1892 break; 1893 case RISCV::ANDI: 1894 if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1)))) 1895 return false; 1896 break; 1897 case RISCV::SEXT_B: 1898 if (Bits < 8) 1899 return false; 1900 break; 1901 case RISCV::SEXT_H: 1902 case RISCV::FMV_H_X: 1903 case RISCV::ZEXT_H_RV32: 1904 case RISCV::ZEXT_H_RV64: 1905 if (Bits < 16) 1906 return false; 1907 break; 1908 case RISCV::ADD_UW: 1909 case RISCV::SH1ADD_UW: 1910 case RISCV::SH2ADD_UW: 1911 case RISCV::SH3ADD_UW: 1912 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 1913 // 32 bits. 1914 if (UI.getOperandNo() != 0 || Bits < 32) 1915 return false; 1916 break; 1917 case RISCV::SB: 1918 if (UI.getOperandNo() != 0 || Bits < 8) 1919 return false; 1920 break; 1921 case RISCV::SH: 1922 if (UI.getOperandNo() != 0 || Bits < 16) 1923 return false; 1924 break; 1925 case RISCV::SW: 1926 if (UI.getOperandNo() != 0 || Bits < 32) 1927 return false; 1928 break; 1929 } 1930 } 1931 1932 return true; 1933 } 1934 1935 // Select VL as a 5 bit immediate or a value that will become a register. This 1936 // allows us to choose betwen VSETIVLI or VSETVLI later. 1937 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 1938 auto *C = dyn_cast<ConstantSDNode>(N); 1939 if (C && isUInt<5>(C->getZExtValue())) { 1940 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 1941 N->getValueType(0)); 1942 } else if (C && C->isAllOnesValue()) { 1943 // Treat all ones as VLMax. 1944 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 1945 N->getValueType(0)); 1946 } else if (isa<RegisterSDNode>(N) && 1947 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) { 1948 // All our VL operands use an operand that allows GPRNoX0 or an immediate 1949 // as the register class. Convert X0 to a special immediate to pass the 1950 // MachineVerifier. This is recognized specially by the vsetvli insertion 1951 // pass. 1952 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 1953 N->getValueType(0)); 1954 } else { 1955 VL = N; 1956 } 1957 1958 return true; 1959 } 1960 1961 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 1962 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef()) 1963 return false; 1964 SplatVal = N.getOperand(1); 1965 return true; 1966 } 1967 1968 using ValidateFn = bool (*)(int64_t); 1969 1970 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, 1971 SelectionDAG &DAG, 1972 const RISCVSubtarget &Subtarget, 1973 ValidateFn ValidateImm) { 1974 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 1975 !isa<ConstantSDNode>(N.getOperand(1))) 1976 return false; 1977 1978 int64_t SplatImm = 1979 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 1980 1981 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand 1982 // type is wider than the resulting vector element type: an implicit 1983 // truncation first takes place. Therefore, perform a manual 1984 // truncation/sign-extension in order to ignore any truncated bits and catch 1985 // any zero-extended immediate. 1986 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 1987 // sign-extending to (XLenVT -1). 1988 MVT XLenVT = Subtarget.getXLenVT(); 1989 assert(XLenVT == N.getOperand(1).getSimpleValueType() && 1990 "Unexpected splat operand type"); 1991 MVT EltVT = N.getSimpleValueType().getVectorElementType(); 1992 if (EltVT.bitsLT(XLenVT)) 1993 SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits()); 1994 1995 if (!ValidateImm(SplatImm)) 1996 return false; 1997 1998 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT); 1999 return true; 2000 } 2001 2002 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 2003 return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget, 2004 [](int64_t Imm) { return isInt<5>(Imm); }); 2005 } 2006 2007 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 2008 return selectVSplatSimmHelper( 2009 N, SplatVal, *CurDAG, *Subtarget, 2010 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); 2011 } 2012 2013 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 2014 SDValue &SplatVal) { 2015 return selectVSplatSimmHelper( 2016 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { 2017 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 2018 }); 2019 } 2020 2021 bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { 2022 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 2023 !isa<ConstantSDNode>(N.getOperand(1))) 2024 return false; 2025 2026 int64_t SplatImm = 2027 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 2028 2029 if (!isUInt<5>(SplatImm)) 2030 return false; 2031 2032 SplatVal = 2033 CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT()); 2034 2035 return true; 2036 } 2037 2038 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 2039 SDValue &Imm) { 2040 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 2041 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 2042 2043 if (!isInt<5>(ImmVal)) 2044 return false; 2045 2046 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); 2047 return true; 2048 } 2049 2050 return false; 2051 } 2052 2053 // Merge an ADDI into the offset of a load/store instruction where possible. 2054 // (load (addi base, off1), off2) -> (load base, off1+off2) 2055 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) 2056 // This is possible when off1+off2 fits a 12-bit immediate. 2057 bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { 2058 int OffsetOpIdx; 2059 int BaseOpIdx; 2060 2061 // Only attempt this optimisation for I-type loads and S-type stores. 2062 switch (N->getMachineOpcode()) { 2063 default: 2064 return false; 2065 case RISCV::LB: 2066 case RISCV::LH: 2067 case RISCV::LW: 2068 case RISCV::LBU: 2069 case RISCV::LHU: 2070 case RISCV::LWU: 2071 case RISCV::LD: 2072 case RISCV::FLH: 2073 case RISCV::FLW: 2074 case RISCV::FLD: 2075 BaseOpIdx = 0; 2076 OffsetOpIdx = 1; 2077 break; 2078 case RISCV::SB: 2079 case RISCV::SH: 2080 case RISCV::SW: 2081 case RISCV::SD: 2082 case RISCV::FSH: 2083 case RISCV::FSW: 2084 case RISCV::FSD: 2085 BaseOpIdx = 1; 2086 OffsetOpIdx = 2; 2087 break; 2088 } 2089 2090 if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx))) 2091 return false; 2092 2093 SDValue Base = N->getOperand(BaseOpIdx); 2094 2095 // If the base is an ADDI, we can merge it in to the load/store. 2096 if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI) 2097 return false; 2098 2099 SDValue ImmOperand = Base.getOperand(1); 2100 uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); 2101 2102 if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) { 2103 int64_t Offset1 = Const->getSExtValue(); 2104 int64_t CombinedOffset = Offset1 + Offset2; 2105 if (!isInt<12>(CombinedOffset)) 2106 return false; 2107 ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand), 2108 ImmOperand.getValueType()); 2109 } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) { 2110 // If the off1 in (addi base, off1) is a global variable's address (its 2111 // low part, really), then we can rely on the alignment of that variable 2112 // to provide a margin of safety before off1 can overflow the 12 bits. 2113 // Check if off2 falls within that margin; if so off1+off2 can't overflow. 2114 const DataLayout &DL = CurDAG->getDataLayout(); 2115 Align Alignment = GA->getGlobal()->getPointerAlignment(DL); 2116 if (Offset2 != 0 && Alignment <= Offset2) 2117 return false; 2118 int64_t Offset1 = GA->getOffset(); 2119 int64_t CombinedOffset = Offset1 + Offset2; 2120 ImmOperand = CurDAG->getTargetGlobalAddress( 2121 GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), 2122 CombinedOffset, GA->getTargetFlags()); 2123 } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) { 2124 // Ditto. 2125 Align Alignment = CP->getAlign(); 2126 if (Offset2 != 0 && Alignment <= Offset2) 2127 return false; 2128 int64_t Offset1 = CP->getOffset(); 2129 int64_t CombinedOffset = Offset1 + Offset2; 2130 ImmOperand = CurDAG->getTargetConstantPool( 2131 CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(), 2132 CombinedOffset, CP->getTargetFlags()); 2133 } else { 2134 return false; 2135 } 2136 2137 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 2138 LLVM_DEBUG(Base->dump(CurDAG)); 2139 LLVM_DEBUG(dbgs() << "\nN: "); 2140 LLVM_DEBUG(N->dump(CurDAG)); 2141 LLVM_DEBUG(dbgs() << "\n"); 2142 2143 // Modify the offset operand of the load/store. 2144 if (BaseOpIdx == 0) // Load 2145 CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, 2146 N->getOperand(2)); 2147 else // Store 2148 CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), 2149 ImmOperand, N->getOperand(3)); 2150 2151 return true; 2152 } 2153 2154 // Try to remove sext.w if the input is a W instruction or can be made into 2155 // a W instruction cheaply. 2156 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 2157 // Look for the sext.w pattern, addiw rd, rs1, 0. 2158 if (N->getMachineOpcode() != RISCV::ADDIW || 2159 !isNullConstant(N->getOperand(1))) 2160 return false; 2161 2162 SDValue N0 = N->getOperand(0); 2163 if (!N0.isMachineOpcode()) 2164 return false; 2165 2166 switch (N0.getMachineOpcode()) { 2167 default: 2168 break; 2169 case RISCV::ADD: 2170 case RISCV::ADDI: 2171 case RISCV::SUB: 2172 case RISCV::MUL: 2173 case RISCV::SLLI: { 2174 // Convert sext.w+add/sub/mul to their W instructions. This will create 2175 // a new independent instruction. This improves latency. 2176 unsigned Opc; 2177 switch (N0.getMachineOpcode()) { 2178 default: 2179 llvm_unreachable("Unexpected opcode!"); 2180 case RISCV::ADD: Opc = RISCV::ADDW; break; 2181 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 2182 case RISCV::SUB: Opc = RISCV::SUBW; break; 2183 case RISCV::MUL: Opc = RISCV::MULW; break; 2184 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 2185 } 2186 2187 SDValue N00 = N0.getOperand(0); 2188 SDValue N01 = N0.getOperand(1); 2189 2190 // Shift amount needs to be uimm5. 2191 if (N0.getMachineOpcode() == RISCV::SLLI && 2192 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 2193 break; 2194 2195 SDNode *Result = 2196 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 2197 N00, N01); 2198 ReplaceUses(N, Result); 2199 return true; 2200 } 2201 case RISCV::ADDW: 2202 case RISCV::ADDIW: 2203 case RISCV::SUBW: 2204 case RISCV::MULW: 2205 case RISCV::SLLIW: 2206 case RISCV::GREVIW: 2207 case RISCV::GORCIW: 2208 // Result is already sign extended just remove the sext.w. 2209 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 2210 ReplaceUses(N, N0.getNode()); 2211 return true; 2212 } 2213 2214 return false; 2215 } 2216 2217 // Optimize masked RVV pseudo instructions with a known all-ones mask to their 2218 // corresponding "unmasked" pseudo versions. The mask we're interested in will 2219 // take the form of a V0 physical register operand, with a glued 2220 // register-setting instruction. 2221 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { 2222 const RISCV::RISCVMaskedPseudoInfo *I = 2223 RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); 2224 if (!I) 2225 return false; 2226 2227 unsigned MaskOpIdx = I->MaskOpIdx; 2228 2229 // Check that we're using V0 as a mask register. 2230 if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) || 2231 cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0) 2232 return false; 2233 2234 // The glued user defines V0. 2235 const auto *Glued = N->getGluedNode(); 2236 2237 if (!Glued || Glued->getOpcode() != ISD::CopyToReg) 2238 return false; 2239 2240 // Check that we're defining V0 as a mask register. 2241 if (!isa<RegisterSDNode>(Glued->getOperand(1)) || 2242 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0) 2243 return false; 2244 2245 // Check the instruction defining V0; it needs to be a VMSET pseudo. 2246 SDValue MaskSetter = Glued->getOperand(2); 2247 2248 const auto IsVMSet = [](unsigned Opc) { 2249 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || 2250 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || 2251 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || 2252 Opc == RISCV::PseudoVMSET_M_B8; 2253 }; 2254 2255 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has 2256 // undefined behaviour if it's the wrong bitwidth, so we could choose to 2257 // assume that it's all-ones? Same applies to its VL. 2258 if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode())) 2259 return false; 2260 2261 // Retrieve the tail policy operand index, if any. 2262 Optional<unsigned> TailPolicyOpIdx; 2263 const RISCVInstrInfo *TII = static_cast<const RISCVInstrInfo *>( 2264 CurDAG->getSubtarget().getInstrInfo()); 2265 2266 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode()); 2267 2268 bool IsTA = true; 2269 if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) { 2270 // The last operand of the pseudo is the policy op, but we might have a 2271 // Glue operand last. We might also have a chain. 2272 TailPolicyOpIdx = N->getNumOperands() - 1; 2273 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Glue) 2274 (*TailPolicyOpIdx)--; 2275 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Other) 2276 (*TailPolicyOpIdx)--; 2277 2278 if (!(N->getConstantOperandVal(*TailPolicyOpIdx) & 2279 RISCVII::TAIL_AGNOSTIC)) { 2280 // Keep the true-masked instruction when there is no unmasked TU 2281 // instruction 2282 if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef()) 2283 return false; 2284 // We can't use TA if the tie-operand is not IMPLICIT_DEF 2285 if (!N->getOperand(0).isUndef()) 2286 IsTA = false; 2287 } 2288 } 2289 2290 if (IsTA) { 2291 uint64_t TSFlags = TII->get(I->UnmaskedPseudo).TSFlags; 2292 2293 // Check that we're dropping the merge operand, the mask operand, and any 2294 // policy operand when we transform to this unmasked pseudo. 2295 assert(!RISCVII::hasMergeOp(TSFlags) && RISCVII::hasDummyMaskOp(TSFlags) && 2296 !RISCVII::hasVecPolicyOp(TSFlags) && 2297 "Unexpected pseudo to transform to"); 2298 (void)TSFlags; 2299 } else { 2300 uint64_t TSFlags = TII->get(I->UnmaskedTUPseudo).TSFlags; 2301 2302 // Check that we're dropping the mask operand, and any policy operand 2303 // when we transform to this unmasked tu pseudo. 2304 assert(RISCVII::hasMergeOp(TSFlags) && RISCVII::hasDummyMaskOp(TSFlags) && 2305 !RISCVII::hasVecPolicyOp(TSFlags) && 2306 "Unexpected pseudo to transform to"); 2307 (void)TSFlags; 2308 } 2309 2310 unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo; 2311 SmallVector<SDValue, 8> Ops; 2312 // Skip the merge operand at index 0 if IsTA 2313 for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) { 2314 // Skip the mask, the policy, and the Glue. 2315 SDValue Op = N->getOperand(I); 2316 if (I == MaskOpIdx || I == TailPolicyOpIdx || 2317 Op.getValueType() == MVT::Glue) 2318 continue; 2319 Ops.push_back(Op); 2320 } 2321 2322 // Transitively apply any node glued to our new node. 2323 if (auto *TGlued = Glued->getGluedNode()) 2324 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); 2325 2326 SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 2327 ReplaceUses(N, Result); 2328 2329 return true; 2330 } 2331 2332 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 2333 // for instruction scheduling. 2334 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) { 2335 return new RISCVDAGToDAGISel(TM); 2336 } 2337