1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISCV target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVMCTargetDesc.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCVISelLowering.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/IR/IntrinsicsRISCV.h" 20 #include "llvm/Support/Alignment.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/KnownBits.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "riscv-isel" 29 30 namespace llvm { 31 namespace RISCV { 32 #define GET_RISCVVSSEGTable_IMPL 33 #define GET_RISCVVLSEGTable_IMPL 34 #define GET_RISCVVLXSEGTable_IMPL 35 #define GET_RISCVVSXSEGTable_IMPL 36 #define GET_RISCVVLETable_IMPL 37 #define GET_RISCVVSETable_IMPL 38 #define GET_RISCVVLXTable_IMPL 39 #define GET_RISCVVSXTable_IMPL 40 #define GET_RISCVMaskedPseudosTable_IMPL 41 #include "RISCVGenSearchableTables.inc" 42 } // namespace RISCV 43 } // namespace llvm 44 45 void RISCVDAGToDAGISel::PreprocessISelDAG() { 46 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 47 E = CurDAG->allnodes_end(); 48 I != E;) { 49 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 50 51 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point 52 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. 53 if (N->getOpcode() == ISD::SPLAT_VECTOR) { 54 MVT VT = N->getSimpleValueType(0); 55 unsigned Opc = 56 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; 57 SDLoc DL(N); 58 SDValue VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, 59 Subtarget->getXLenVT()); 60 SDValue Result = CurDAG->getNode(Opc, DL, VT, N->getOperand(0), VL); 61 62 --I; 63 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 64 ++I; 65 CurDAG->DeleteNode(N); 66 continue; 67 } 68 69 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 70 // load. Done after lowering and combining so that we have a chance to 71 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 72 if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) 73 continue; 74 75 assert(N->getNumOperands() == 3 && "Unexpected number of operands"); 76 MVT VT = N->getSimpleValueType(0); 77 SDValue Lo = N->getOperand(0); 78 SDValue Hi = N->getOperand(1); 79 SDValue VL = N->getOperand(2); 80 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 81 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 82 "Unexpected VTs!"); 83 MachineFunction &MF = CurDAG->getMachineFunction(); 84 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 85 SDLoc DL(N); 86 87 // We use the same frame index we use for moving two i32s into 64-bit FPR. 88 // This is an analogous operation. 89 int FI = FuncInfo->getMoveF64FrameIndex(MF); 90 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 91 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); 92 SDValue StackSlot = 93 CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); 94 95 SDValue Chain = CurDAG->getEntryNode(); 96 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 97 98 SDValue OffsetSlot = 99 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); 100 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 101 Align(8)); 102 103 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 104 105 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 106 SDValue IntID = 107 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 108 SDValue Ops[] = {Chain, 109 IntID, 110 CurDAG->getUNDEF(VT), 111 StackSlot, 112 CurDAG->getRegister(RISCV::X0, MVT::i64), 113 VL}; 114 115 SDValue Result = CurDAG->getMemIntrinsicNode( 116 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8), 117 MachineMemOperand::MOLoad); 118 119 // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the 120 // vlse we created. This will cause general havok on the dag because 121 // anything below the conversion could be folded into other existing nodes. 122 // To avoid invalidating 'I', back it up to the convert node. 123 --I; 124 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 125 126 // Now that we did that, the node is dead. Increment the iterator to the 127 // next node to process, then delete N. 128 ++I; 129 CurDAG->DeleteNode(N); 130 } 131 } 132 133 void RISCVDAGToDAGISel::PostprocessISelDAG() { 134 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 135 136 bool MadeChange = false; 137 while (Position != CurDAG->allnodes_begin()) { 138 SDNode *N = &*--Position; 139 // Skip dead nodes and any non-machine opcodes. 140 if (N->use_empty() || !N->isMachineOpcode()) 141 continue; 142 143 MadeChange |= doPeepholeSExtW(N); 144 MadeChange |= doPeepholeLoadStoreADDI(N); 145 MadeChange |= doPeepholeMaskedRVV(N); 146 } 147 148 if (MadeChange) 149 CurDAG->RemoveDeadNodes(); 150 } 151 152 static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL, 153 const MVT VT, int64_t Imm, 154 const RISCVSubtarget &Subtarget) { 155 assert(VT == MVT::i64 && "Expecting MVT::i64"); 156 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering(); 157 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(CurDAG->getConstantPool( 158 ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT)); 159 SDValue Addr = TLI->getAddr(CP, *CurDAG); 160 SDValue Offset = CurDAG->getTargetConstant(0, DL, VT); 161 // Since there is no data race, the chain can be the entry node. 162 SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset, 163 CurDAG->getEntryNode()); 164 MachineFunction &MF = CurDAG->getMachineFunction(); 165 MachineMemOperand *MemOp = MF.getMachineMemOperand( 166 MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, 167 LLT(VT), CP->getAlign()); 168 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Load), {MemOp}); 169 return Load; 170 } 171 172 static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 173 int64_t Imm, const RISCVSubtarget &Subtarget) { 174 MVT XLenVT = Subtarget.getXLenVT(); 175 RISCVMatInt::InstSeq Seq = 176 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); 177 178 // If Imm is expensive to build, then we put it into constant pool. 179 if (Subtarget.useConstantPoolForLargeInts() && 180 Seq.size() > Subtarget.getMaxBuildIntsCost()) 181 return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget); 182 183 SDNode *Result = nullptr; 184 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); 185 for (RISCVMatInt::Inst &Inst : Seq) { 186 SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT); 187 if (Inst.Opc == RISCV::LUI) 188 Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm); 189 else if (Inst.Opc == RISCV::ADD_UW) 190 Result = CurDAG->getMachineNode(RISCV::ADD_UW, DL, XLenVT, SrcReg, 191 CurDAG->getRegister(RISCV::X0, XLenVT)); 192 else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD || 193 Inst.Opc == RISCV::SH3ADD) 194 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg); 195 else 196 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm); 197 198 // Only the first instruction has X0 as its source. 199 SrcReg = SDValue(Result, 0); 200 } 201 202 return Result; 203 } 204 205 static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 206 unsigned RegClassID, unsigned SubReg0) { 207 assert(Regs.size() >= 2 && Regs.size() <= 8); 208 209 SDLoc DL(Regs[0]); 210 SmallVector<SDValue, 8> Ops; 211 212 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); 213 214 for (unsigned I = 0; I < Regs.size(); ++I) { 215 Ops.push_back(Regs[I]); 216 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); 217 } 218 SDNode *N = 219 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 220 return SDValue(N, 0); 221 } 222 223 static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 224 unsigned NF) { 225 static const unsigned RegClassIDs[] = { 226 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, 227 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, 228 RISCV::VRN8M1RegClassID}; 229 230 return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0); 231 } 232 233 static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 234 unsigned NF) { 235 static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID, 236 RISCV::VRN3M2RegClassID, 237 RISCV::VRN4M2RegClassID}; 238 239 return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0); 240 } 241 242 static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 243 unsigned NF) { 244 return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID, 245 RISCV::sub_vrm4_0); 246 } 247 248 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 249 unsigned NF, RISCVII::VLMUL LMUL) { 250 switch (LMUL) { 251 default: 252 llvm_unreachable("Invalid LMUL."); 253 case RISCVII::VLMUL::LMUL_F8: 254 case RISCVII::VLMUL::LMUL_F4: 255 case RISCVII::VLMUL::LMUL_F2: 256 case RISCVII::VLMUL::LMUL_1: 257 return createM1Tuple(CurDAG, Regs, NF); 258 case RISCVII::VLMUL::LMUL_2: 259 return createM2Tuple(CurDAG, Regs, NF); 260 case RISCVII::VLMUL::LMUL_4: 261 return createM4Tuple(CurDAG, Regs, NF); 262 } 263 } 264 265 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 266 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 267 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 268 bool IsLoad, MVT *IndexVT) { 269 SDValue Chain = Node->getOperand(0); 270 SDValue Glue; 271 272 SDValue Base; 273 SelectBaseAddr(Node->getOperand(CurOp++), Base); 274 Operands.push_back(Base); // Base pointer. 275 276 if (IsStridedOrIndexed) { 277 Operands.push_back(Node->getOperand(CurOp++)); // Index. 278 if (IndexVT) 279 *IndexVT = Operands.back()->getSimpleValueType(0); 280 } 281 282 if (IsMasked) { 283 // Mask needs to be copied to V0. 284 SDValue Mask = Node->getOperand(CurOp++); 285 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); 286 Glue = Chain.getValue(1); 287 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); 288 } 289 SDValue VL; 290 selectVLOp(Node->getOperand(CurOp++), VL); 291 Operands.push_back(VL); 292 293 MVT XLenVT = Subtarget->getXLenVT(); 294 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 295 Operands.push_back(SEWOp); 296 297 // Masked load has the tail policy argument. 298 if (IsMasked && IsLoad) { 299 // Policy must be a constant. 300 uint64_t Policy = Node->getConstantOperandVal(CurOp++); 301 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 302 Operands.push_back(PolicyOp); 303 } 304 305 Operands.push_back(Chain); // Chain. 306 if (Glue) 307 Operands.push_back(Glue); 308 } 309 310 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, 311 bool IsStrided) { 312 SDLoc DL(Node); 313 unsigned NF = Node->getNumValues() - 1; 314 MVT VT = Node->getSimpleValueType(0); 315 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 316 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 317 318 unsigned CurOp = 2; 319 SmallVector<SDValue, 8> Operands; 320 if (IsMasked) { 321 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 322 Node->op_begin() + CurOp + NF); 323 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 324 Operands.push_back(MaskedOff); 325 CurOp += NF; 326 } 327 328 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 329 Operands, /*IsLoad=*/true); 330 331 const RISCV::VLSEGPseudo *P = 332 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW, 333 static_cast<unsigned>(LMUL)); 334 MachineSDNode *Load = 335 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 336 337 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 338 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 339 340 SDValue SuperReg = SDValue(Load, 0); 341 for (unsigned I = 0; I < NF; ++I) { 342 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 343 ReplaceUses(SDValue(Node, I), 344 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 345 } 346 347 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 348 CurDAG->RemoveDeadNode(Node); 349 } 350 351 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { 352 SDLoc DL(Node); 353 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. 354 MVT VT = Node->getSimpleValueType(0); 355 MVT XLenVT = Subtarget->getXLenVT(); 356 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 357 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 358 359 unsigned CurOp = 2; 360 SmallVector<SDValue, 7> Operands; 361 if (IsMasked) { 362 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 363 Node->op_begin() + CurOp + NF); 364 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 365 Operands.push_back(MaskedOff); 366 CurOp += NF; 367 } 368 369 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 370 /*IsStridedOrIndexed*/ false, Operands, 371 /*IsLoad=*/true); 372 373 const RISCV::VLSEGPseudo *P = 374 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true, 375 Log2SEW, static_cast<unsigned>(LMUL)); 376 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 377 MVT::Other, MVT::Glue, Operands); 378 SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, 379 /*Glue*/ SDValue(Load, 2)); 380 381 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 382 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 383 384 SDValue SuperReg = SDValue(Load, 0); 385 for (unsigned I = 0; I < NF; ++I) { 386 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 387 ReplaceUses(SDValue(Node, I), 388 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 389 } 390 391 ReplaceUses(SDValue(Node, NF), SDValue(ReadVL, 0)); // VL 392 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 1)); // Chain 393 CurDAG->RemoveDeadNode(Node); 394 } 395 396 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, 397 bool IsOrdered) { 398 SDLoc DL(Node); 399 unsigned NF = Node->getNumValues() - 1; 400 MVT VT = Node->getSimpleValueType(0); 401 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 402 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 403 404 unsigned CurOp = 2; 405 SmallVector<SDValue, 8> Operands; 406 if (IsMasked) { 407 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 408 Node->op_begin() + CurOp + NF); 409 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 410 Operands.push_back(MaskedOff); 411 CurOp += NF; 412 } 413 414 MVT IndexVT; 415 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 416 /*IsStridedOrIndexed*/ true, Operands, 417 /*IsLoad=*/true, &IndexVT); 418 419 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 420 "Element count mismatch"); 421 422 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 423 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 424 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 425 report_fatal_error("The V extension does not support EEW=64 for index " 426 "values when XLEN=32"); 427 } 428 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 429 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 430 static_cast<unsigned>(IndexLMUL)); 431 MachineSDNode *Load = 432 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 433 434 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 435 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 436 437 SDValue SuperReg = SDValue(Load, 0); 438 for (unsigned I = 0; I < NF; ++I) { 439 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 440 ReplaceUses(SDValue(Node, I), 441 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 442 } 443 444 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 445 CurDAG->RemoveDeadNode(Node); 446 } 447 448 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, 449 bool IsStrided) { 450 SDLoc DL(Node); 451 unsigned NF = Node->getNumOperands() - 4; 452 if (IsStrided) 453 NF--; 454 if (IsMasked) 455 NF--; 456 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 457 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 458 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 459 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 460 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 461 462 SmallVector<SDValue, 8> Operands; 463 Operands.push_back(StoreVal); 464 unsigned CurOp = 2 + NF; 465 466 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 467 Operands); 468 469 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 470 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 471 MachineSDNode *Store = 472 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 473 474 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 475 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 476 477 ReplaceNode(Node, Store); 478 } 479 480 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, 481 bool IsOrdered) { 482 SDLoc DL(Node); 483 unsigned NF = Node->getNumOperands() - 5; 484 if (IsMasked) 485 --NF; 486 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 487 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 488 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 489 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 490 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 491 492 SmallVector<SDValue, 8> Operands; 493 Operands.push_back(StoreVal); 494 unsigned CurOp = 2 + NF; 495 496 MVT IndexVT; 497 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 498 /*IsStridedOrIndexed*/ true, Operands, 499 /*IsLoad=*/false, &IndexVT); 500 501 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 502 "Element count mismatch"); 503 504 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 505 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 506 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 507 report_fatal_error("The V extension does not support EEW=64 for index " 508 "values when XLEN=32"); 509 } 510 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 511 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 512 static_cast<unsigned>(IndexLMUL)); 513 MachineSDNode *Store = 514 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 515 516 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 517 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 518 519 ReplaceNode(Node, Store); 520 } 521 522 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { 523 if (!Subtarget->hasVInstructions()) 524 return; 525 526 assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN || 527 Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) && 528 "Unexpected opcode"); 529 530 SDLoc DL(Node); 531 MVT XLenVT = Subtarget->getXLenVT(); 532 533 bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN; 534 unsigned IntNoOffset = HasChain ? 1 : 0; 535 unsigned IntNo = Node->getConstantOperandVal(IntNoOffset); 536 537 assert((IntNo == Intrinsic::riscv_vsetvli || 538 IntNo == Intrinsic::riscv_vsetvlimax || 539 IntNo == Intrinsic::riscv_vsetvli_opt || 540 IntNo == Intrinsic::riscv_vsetvlimax_opt) && 541 "Unexpected vsetvli intrinsic"); 542 543 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax || 544 IntNo == Intrinsic::riscv_vsetvlimax_opt; 545 unsigned Offset = IntNoOffset + (VLMax ? 1 : 2); 546 547 assert(Node->getNumOperands() == Offset + 2 && 548 "Unexpected number of operands"); 549 550 unsigned SEW = 551 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 552 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( 553 Node->getConstantOperandVal(Offset + 1) & 0x7); 554 555 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true, 556 /*MaskAgnostic*/ false); 557 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 558 559 SmallVector<EVT, 2> VTs = {XLenVT}; 560 if (HasChain) 561 VTs.push_back(MVT::Other); 562 563 SDValue VLOperand; 564 unsigned Opcode = RISCV::PseudoVSETVLI; 565 if (VLMax) { 566 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 567 Opcode = RISCV::PseudoVSETVLIX0; 568 } else { 569 VLOperand = Node->getOperand(IntNoOffset + 1); 570 571 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 572 uint64_t AVL = C->getZExtValue(); 573 if (isUInt<5>(AVL)) { 574 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 575 SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp}; 576 if (HasChain) 577 Ops.push_back(Node->getOperand(0)); 578 ReplaceNode( 579 Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops)); 580 return; 581 } 582 } 583 } 584 585 SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp}; 586 if (HasChain) 587 Ops.push_back(Node->getOperand(0)); 588 589 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops)); 590 } 591 592 void RISCVDAGToDAGISel::Select(SDNode *Node) { 593 // If we have a custom node, we have already selected. 594 if (Node->isMachineOpcode()) { 595 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 596 Node->setNodeId(-1); 597 return; 598 } 599 600 // Instruction Selection not handled by the auto-generated tablegen selection 601 // should be handled here. 602 unsigned Opcode = Node->getOpcode(); 603 MVT XLenVT = Subtarget->getXLenVT(); 604 SDLoc DL(Node); 605 MVT VT = Node->getSimpleValueType(0); 606 607 switch (Opcode) { 608 case ISD::Constant: { 609 auto *ConstNode = cast<ConstantSDNode>(Node); 610 if (VT == XLenVT && ConstNode->isZero()) { 611 SDValue New = 612 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT); 613 ReplaceNode(Node, New.getNode()); 614 return; 615 } 616 int64_t Imm = ConstNode->getSExtValue(); 617 // If the upper XLen-16 bits are not used, try to convert this to a simm12 618 // by sign extending bit 15. 619 if (isUInt<16>(Imm) && isInt<12>(SignExtend64(Imm, 16)) && 620 hasAllHUsers(Node)) 621 Imm = SignExtend64(Imm, 16); 622 // If the upper 32-bits are not used try to convert this into a simm32 by 623 // sign extending bit 32. 624 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 625 Imm = SignExtend64(Imm, 32); 626 627 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); 628 return; 629 } 630 case ISD::FrameIndex: { 631 SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT); 632 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 633 SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); 634 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm)); 635 return; 636 } 637 case ISD::SRL: { 638 // Optimize (srl (and X, C2), C) -> 639 // (srli (slli X, (XLen-C3), (XLen-C3) + C) 640 // Where C2 is a mask with C3 trailing ones. 641 // Taking into account that the C2 may have had lower bits unset by 642 // SimplifyDemandedBits. This avoids materializing the C2 immediate. 643 // This pattern occurs when type legalizing right shifts for types with 644 // less than XLen bits. 645 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 646 if (!N1C) 647 break; 648 SDValue N0 = Node->getOperand(0); 649 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 650 !isa<ConstantSDNode>(N0.getOperand(1))) 651 break; 652 unsigned ShAmt = N1C->getZExtValue(); 653 uint64_t Mask = N0.getConstantOperandVal(1); 654 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 655 if (!isMask_64(Mask)) 656 break; 657 unsigned TrailingOnes = countTrailingOnes(Mask); 658 // 32 trailing ones should use srliw via tablegen pattern. 659 if (TrailingOnes == 32 || ShAmt >= TrailingOnes) 660 break; 661 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; 662 SDNode *SLLI = 663 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 664 CurDAG->getTargetConstant(LShAmt, DL, VT)); 665 SDNode *SRLI = CurDAG->getMachineNode( 666 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 667 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 668 ReplaceNode(Node, SRLI); 669 return; 670 } 671 case ISD::SRA: { 672 // Optimize (sra (sext_inreg X, i16), C) -> 673 // (srai (slli X, (XLen-16), (XLen-16) + C) 674 // And (sra (sext_inreg X, i8), C) -> 675 // (srai (slli X, (XLen-8), (XLen-8) + C) 676 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. 677 // This transform matches the code we get without Zbb. The shifts are more 678 // compressible, and this can help expose CSE opportunities in the sdiv by 679 // constant optimization. 680 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 681 if (!N1C) 682 break; 683 SDValue N0 = Node->getOperand(0); 684 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) 685 break; 686 unsigned ShAmt = N1C->getZExtValue(); 687 unsigned ExtSize = 688 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 689 // ExtSize of 32 should use sraiw via tablegen pattern. 690 if (ExtSize >= 32 || ShAmt >= ExtSize) 691 break; 692 unsigned LShAmt = Subtarget->getXLen() - ExtSize; 693 SDNode *SLLI = 694 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 695 CurDAG->getTargetConstant(LShAmt, DL, VT)); 696 SDNode *SRAI = CurDAG->getMachineNode( 697 RISCV::SRAI, DL, VT, SDValue(SLLI, 0), 698 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 699 ReplaceNode(Node, SRAI); 700 return; 701 } 702 case ISD::AND: { 703 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 704 if (!N1C) 705 break; 706 707 SDValue N0 = Node->getOperand(0); 708 709 bool LeftShift = N0.getOpcode() == ISD::SHL; 710 if (!LeftShift && N0.getOpcode() != ISD::SRL) 711 break; 712 713 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 714 if (!C) 715 break; 716 uint64_t C2 = C->getZExtValue(); 717 unsigned XLen = Subtarget->getXLen(); 718 if (!C2 || C2 >= XLen) 719 break; 720 721 uint64_t C1 = N1C->getZExtValue(); 722 723 // Keep track of whether this is an andi. 724 bool IsANDI = isInt<12>(N1C->getSExtValue()); 725 726 // Clear irrelevant bits in the mask. 727 if (LeftShift) 728 C1 &= maskTrailingZeros<uint64_t>(C2); 729 else 730 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 731 732 // Some transforms should only be done if the shift has a single use or 733 // the AND would become (srli (slli X, 32), 32) 734 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 735 736 SDValue X = N0.getOperand(0); 737 738 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 739 // with c3 leading zeros. 740 if (!LeftShift && isMask_64(C1)) { 741 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 742 if (C2 < C3) { 743 // If the number of leading zeros is C2+32 this can be SRLIW. 744 if (C2 + 32 == C3) { 745 SDNode *SRLIW = 746 CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X, 747 CurDAG->getTargetConstant(C2, DL, XLenVT)); 748 ReplaceNode(Node, SRLIW); 749 return; 750 } 751 752 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if 753 // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 754 // 755 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 756 // legalized and goes through DAG combine. 757 if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() && 758 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 759 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { 760 SDNode *SRAIW = 761 CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, X.getOperand(0), 762 CurDAG->getTargetConstant(31, DL, XLenVT)); 763 SDNode *SRLIW = CurDAG->getMachineNode( 764 RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0), 765 CurDAG->getTargetConstant(C3 - 32, DL, XLenVT)); 766 ReplaceNode(Node, SRLIW); 767 return; 768 } 769 770 // (srli (slli x, c3-c2), c3). 771 if (OneUseOrZExtW && !IsANDI) { 772 SDNode *SLLI = CurDAG->getMachineNode( 773 RISCV::SLLI, DL, XLenVT, X, 774 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 775 SDNode *SRLI = 776 CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), 777 CurDAG->getTargetConstant(C3, DL, XLenVT)); 778 ReplaceNode(Node, SRLI); 779 return; 780 } 781 } 782 } 783 784 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 785 // shifted by c2 bits with c3 leading zeros. 786 if (LeftShift && isShiftedMask_64(C1)) { 787 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 788 789 if (C2 + C3 < XLen && 790 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) { 791 // Use slli.uw when possible. 792 if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) { 793 SDNode *SLLI_UW = 794 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, XLenVT, X, 795 CurDAG->getTargetConstant(C2, DL, XLenVT)); 796 ReplaceNode(Node, SLLI_UW); 797 return; 798 } 799 800 // (srli (slli c2+c3), c3) 801 if (OneUseOrZExtW && !IsANDI) { 802 SDNode *SLLI = CurDAG->getMachineNode( 803 RISCV::SLLI, DL, XLenVT, X, 804 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 805 SDNode *SRLI = 806 CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), 807 CurDAG->getTargetConstant(C3, DL, XLenVT)); 808 ReplaceNode(Node, SRLI); 809 return; 810 } 811 } 812 } 813 814 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 815 // shifted mask with c2 leading zeros and c3 trailing zeros. 816 if (!LeftShift && isShiftedMask_64(C1)) { 817 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 818 uint64_t C3 = countTrailingZeros(C1); 819 if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsANDI) { 820 SDNode *SRLI = CurDAG->getMachineNode( 821 RISCV::SRLI, DL, XLenVT, X, 822 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 823 SDNode *SLLI = 824 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), 825 CurDAG->getTargetConstant(C3, DL, XLenVT)); 826 ReplaceNode(Node, SLLI); 827 return; 828 } 829 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 830 if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 && 831 OneUseOrZExtW && !IsANDI) { 832 SDNode *SRLIW = CurDAG->getMachineNode( 833 RISCV::SRLIW, DL, XLenVT, X, 834 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 835 SDNode *SLLI = 836 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), 837 CurDAG->getTargetConstant(C3, DL, XLenVT)); 838 ReplaceNode(Node, SLLI); 839 return; 840 } 841 } 842 843 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 844 // shifted mask with no leading zeros and c3 trailing zeros. 845 if (LeftShift && isShiftedMask_64(C1)) { 846 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 847 uint64_t C3 = countTrailingZeros(C1); 848 if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsANDI) { 849 SDNode *SRLI = CurDAG->getMachineNode( 850 RISCV::SRLI, DL, XLenVT, X, 851 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 852 SDNode *SLLI = 853 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), 854 CurDAG->getTargetConstant(C3, DL, XLenVT)); 855 ReplaceNode(Node, SLLI); 856 return; 857 } 858 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 859 if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsANDI) { 860 SDNode *SRLIW = CurDAG->getMachineNode( 861 RISCV::SRLIW, DL, XLenVT, X, 862 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 863 SDNode *SLLI = 864 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), 865 CurDAG->getTargetConstant(C3, DL, XLenVT)); 866 ReplaceNode(Node, SLLI); 867 return; 868 } 869 } 870 871 break; 872 } 873 case ISD::MUL: { 874 // Special case for calculating (mul (and X, C2), C1) where the full product 875 // fits in XLen bits. We can shift X left by the number of leading zeros in 876 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 877 // product has XLen trailing zeros, putting it in the output of MULHU. This 878 // can avoid materializing a constant in a register for C2. 879 880 // RHS should be a constant. 881 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 882 if (!N1C || !N1C->hasOneUse()) 883 break; 884 885 // LHS should be an AND with constant. 886 SDValue N0 = Node->getOperand(0); 887 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 888 break; 889 890 uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 891 892 // Constant should be a mask. 893 if (!isMask_64(C2)) 894 break; 895 896 // This should be the only use of the AND unless we will use 897 // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND 898 // constants. 899 if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF)) 900 break; 901 902 // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this 903 // optimization. 904 if (isInt<12>(C2) || 905 (C2 == UINT64_C(0xFFFF) && 906 (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) || 907 (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())) 908 break; 909 910 // We need to shift left the AND input and C1 by a total of XLen bits. 911 912 // How far left do we need to shift the AND input? 913 unsigned XLen = Subtarget->getXLen(); 914 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2)); 915 916 // The constant gets shifted by the remaining amount unless that would 917 // shift bits out. 918 uint64_t C1 = N1C->getZExtValue(); 919 unsigned ConstantShift = XLen - LeadingZeros; 920 if (ConstantShift > (XLen - (64 - countLeadingZeros(C1)))) 921 break; 922 923 uint64_t ShiftedC1 = C1 << ConstantShift; 924 // If this RV32, we need to sign extend the constant. 925 if (XLen == 32) 926 ShiftedC1 = SignExtend64(ShiftedC1, 32); 927 928 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 929 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); 930 SDNode *SLLI = 931 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 932 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 933 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 934 SDValue(SLLI, 0), SDValue(Imm, 0)); 935 ReplaceNode(Node, MULHU); 936 return; 937 } 938 case ISD::INTRINSIC_WO_CHAIN: { 939 unsigned IntNo = Node->getConstantOperandVal(0); 940 switch (IntNo) { 941 // By default we do not custom select any intrinsic. 942 default: 943 break; 944 case Intrinsic::riscv_vmsgeu: 945 case Intrinsic::riscv_vmsge: { 946 SDValue Src1 = Node->getOperand(1); 947 SDValue Src2 = Node->getOperand(2); 948 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 949 bool IsCmpUnsignedZero = false; 950 // Only custom select scalar second operand. 951 if (Src2.getValueType() != XLenVT) 952 break; 953 // Small constants are handled with patterns. 954 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 955 int64_t CVal = C->getSExtValue(); 956 if (CVal >= -15 && CVal <= 16) { 957 if (!IsUnsigned || CVal != 0) 958 break; 959 IsCmpUnsignedZero = true; 960 } 961 } 962 MVT Src1VT = Src1.getSimpleValueType(); 963 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; 964 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 965 default: 966 llvm_unreachable("Unexpected LMUL!"); 967 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 968 case RISCVII::VLMUL::lmulenum: \ 969 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 970 : RISCV::PseudoVMSLT_VX_##suffix; \ 971 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 972 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 973 break; 974 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1) 975 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2) 976 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4) 977 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8) 978 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16) 979 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32) 980 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64) 981 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES 982 } 983 SDValue SEW = CurDAG->getTargetConstant( 984 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 985 SDValue VL; 986 selectVLOp(Node->getOperand(3), VL); 987 988 // If vmsgeu with 0 immediate, expand it to vmset. 989 if (IsCmpUnsignedZero) { 990 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW)); 991 return; 992 } 993 994 // Expand to 995 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 996 SDValue Cmp = SDValue( 997 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 998 0); 999 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 1000 {Cmp, Cmp, VL, SEW})); 1001 return; 1002 } 1003 case Intrinsic::riscv_vmsgeu_mask: 1004 case Intrinsic::riscv_vmsge_mask: { 1005 SDValue Src1 = Node->getOperand(2); 1006 SDValue Src2 = Node->getOperand(3); 1007 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 1008 bool IsCmpUnsignedZero = false; 1009 // Only custom select scalar second operand. 1010 if (Src2.getValueType() != XLenVT) 1011 break; 1012 // Small constants are handled with patterns. 1013 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1014 int64_t CVal = C->getSExtValue(); 1015 if (CVal >= -15 && CVal <= 16) { 1016 if (!IsUnsigned || CVal != 0) 1017 break; 1018 IsCmpUnsignedZero = true; 1019 } 1020 } 1021 MVT Src1VT = Src1.getSimpleValueType(); 1022 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, 1023 VMSetOpcode, VMANDOpcode; 1024 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1025 default: 1026 llvm_unreachable("Unexpected LMUL!"); 1027 #define CASE_VMSLT_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 1028 case RISCVII::VLMUL::lmulenum: \ 1029 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1030 : RISCV::PseudoVMSLT_VX_##suffix; \ 1031 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 1032 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 1033 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 1034 break; 1035 CASE_VMSLT_VMSET_OPCODES(LMUL_F8, MF8, B1) 1036 CASE_VMSLT_VMSET_OPCODES(LMUL_F4, MF4, B2) 1037 CASE_VMSLT_VMSET_OPCODES(LMUL_F2, MF2, B4) 1038 CASE_VMSLT_VMSET_OPCODES(LMUL_1, M1, B8) 1039 CASE_VMSLT_VMSET_OPCODES(LMUL_2, M2, B16) 1040 CASE_VMSLT_VMSET_OPCODES(LMUL_4, M4, B32) 1041 CASE_VMSLT_VMSET_OPCODES(LMUL_8, M8, B64) 1042 #undef CASE_VMSLT_VMSET_OPCODES 1043 } 1044 // Mask operations use the LMUL from the mask type. 1045 switch (RISCVTargetLowering::getLMUL(VT)) { 1046 default: 1047 llvm_unreachable("Unexpected LMUL!"); 1048 #define CASE_VMXOR_VMANDN_VMAND_OPCODES(lmulenum, suffix) \ 1049 case RISCVII::VLMUL::lmulenum: \ 1050 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 1051 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 1052 VMANDOpcode = RISCV::PseudoVMAND_MM_##suffix; \ 1053 break; 1054 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F8, MF8) 1055 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F4, MF4) 1056 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F2, MF2) 1057 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_1, M1) 1058 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_2, M2) 1059 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_4, M4) 1060 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_8, M8) 1061 #undef CASE_VMXOR_VMANDN_VMAND_OPCODES 1062 } 1063 SDValue SEW = CurDAG->getTargetConstant( 1064 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1065 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 1066 SDValue VL; 1067 selectVLOp(Node->getOperand(5), VL); 1068 SDValue MaskedOff = Node->getOperand(1); 1069 SDValue Mask = Node->getOperand(4); 1070 1071 // If vmsgeu_mask with 0 immediate, expand it to {vmset, vmand}. 1072 if (IsCmpUnsignedZero) { 1073 SDValue VMSet = 1074 SDValue(CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW), 0); 1075 ReplaceNode(Node, CurDAG->getMachineNode(VMANDOpcode, DL, VT, 1076 {Mask, VMSet, VL, MaskSEW})); 1077 return; 1078 } 1079 1080 // If the MaskedOff value and the Mask are the same value use 1081 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 1082 // This avoids needing to copy v0 to vd before starting the next sequence. 1083 if (Mask == MaskedOff) { 1084 SDValue Cmp = SDValue( 1085 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1086 0); 1087 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 1088 {Mask, Cmp, VL, MaskSEW})); 1089 return; 1090 } 1091 1092 // Mask needs to be copied to V0. 1093 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1094 RISCV::V0, Mask, SDValue()); 1095 SDValue Glue = Chain.getValue(1); 1096 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); 1097 1098 // Otherwise use 1099 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 1100 SDValue Cmp = SDValue( 1101 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 1102 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 1103 0); 1104 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 1105 {Cmp, Mask, VL, MaskSEW})); 1106 return; 1107 } 1108 case Intrinsic::riscv_vsetvli_opt: 1109 case Intrinsic::riscv_vsetvlimax_opt: 1110 return selectVSETVLI(Node); 1111 } 1112 break; 1113 } 1114 case ISD::INTRINSIC_W_CHAIN: { 1115 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1116 switch (IntNo) { 1117 // By default we do not custom select any intrinsic. 1118 default: 1119 break; 1120 case Intrinsic::riscv_vsetvli: 1121 case Intrinsic::riscv_vsetvlimax: 1122 return selectVSETVLI(Node); 1123 case Intrinsic::riscv_vlseg2: 1124 case Intrinsic::riscv_vlseg3: 1125 case Intrinsic::riscv_vlseg4: 1126 case Intrinsic::riscv_vlseg5: 1127 case Intrinsic::riscv_vlseg6: 1128 case Intrinsic::riscv_vlseg7: 1129 case Intrinsic::riscv_vlseg8: { 1130 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1131 return; 1132 } 1133 case Intrinsic::riscv_vlseg2_mask: 1134 case Intrinsic::riscv_vlseg3_mask: 1135 case Intrinsic::riscv_vlseg4_mask: 1136 case Intrinsic::riscv_vlseg5_mask: 1137 case Intrinsic::riscv_vlseg6_mask: 1138 case Intrinsic::riscv_vlseg7_mask: 1139 case Intrinsic::riscv_vlseg8_mask: { 1140 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1141 return; 1142 } 1143 case Intrinsic::riscv_vlsseg2: 1144 case Intrinsic::riscv_vlsseg3: 1145 case Intrinsic::riscv_vlsseg4: 1146 case Intrinsic::riscv_vlsseg5: 1147 case Intrinsic::riscv_vlsseg6: 1148 case Intrinsic::riscv_vlsseg7: 1149 case Intrinsic::riscv_vlsseg8: { 1150 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1151 return; 1152 } 1153 case Intrinsic::riscv_vlsseg2_mask: 1154 case Intrinsic::riscv_vlsseg3_mask: 1155 case Intrinsic::riscv_vlsseg4_mask: 1156 case Intrinsic::riscv_vlsseg5_mask: 1157 case Intrinsic::riscv_vlsseg6_mask: 1158 case Intrinsic::riscv_vlsseg7_mask: 1159 case Intrinsic::riscv_vlsseg8_mask: { 1160 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1161 return; 1162 } 1163 case Intrinsic::riscv_vloxseg2: 1164 case Intrinsic::riscv_vloxseg3: 1165 case Intrinsic::riscv_vloxseg4: 1166 case Intrinsic::riscv_vloxseg5: 1167 case Intrinsic::riscv_vloxseg6: 1168 case Intrinsic::riscv_vloxseg7: 1169 case Intrinsic::riscv_vloxseg8: 1170 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1171 return; 1172 case Intrinsic::riscv_vluxseg2: 1173 case Intrinsic::riscv_vluxseg3: 1174 case Intrinsic::riscv_vluxseg4: 1175 case Intrinsic::riscv_vluxseg5: 1176 case Intrinsic::riscv_vluxseg6: 1177 case Intrinsic::riscv_vluxseg7: 1178 case Intrinsic::riscv_vluxseg8: 1179 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1180 return; 1181 case Intrinsic::riscv_vloxseg2_mask: 1182 case Intrinsic::riscv_vloxseg3_mask: 1183 case Intrinsic::riscv_vloxseg4_mask: 1184 case Intrinsic::riscv_vloxseg5_mask: 1185 case Intrinsic::riscv_vloxseg6_mask: 1186 case Intrinsic::riscv_vloxseg7_mask: 1187 case Intrinsic::riscv_vloxseg8_mask: 1188 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1189 return; 1190 case Intrinsic::riscv_vluxseg2_mask: 1191 case Intrinsic::riscv_vluxseg3_mask: 1192 case Intrinsic::riscv_vluxseg4_mask: 1193 case Intrinsic::riscv_vluxseg5_mask: 1194 case Intrinsic::riscv_vluxseg6_mask: 1195 case Intrinsic::riscv_vluxseg7_mask: 1196 case Intrinsic::riscv_vluxseg8_mask: 1197 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1198 return; 1199 case Intrinsic::riscv_vlseg8ff: 1200 case Intrinsic::riscv_vlseg7ff: 1201 case Intrinsic::riscv_vlseg6ff: 1202 case Intrinsic::riscv_vlseg5ff: 1203 case Intrinsic::riscv_vlseg4ff: 1204 case Intrinsic::riscv_vlseg3ff: 1205 case Intrinsic::riscv_vlseg2ff: { 1206 selectVLSEGFF(Node, /*IsMasked*/ false); 1207 return; 1208 } 1209 case Intrinsic::riscv_vlseg8ff_mask: 1210 case Intrinsic::riscv_vlseg7ff_mask: 1211 case Intrinsic::riscv_vlseg6ff_mask: 1212 case Intrinsic::riscv_vlseg5ff_mask: 1213 case Intrinsic::riscv_vlseg4ff_mask: 1214 case Intrinsic::riscv_vlseg3ff_mask: 1215 case Intrinsic::riscv_vlseg2ff_mask: { 1216 selectVLSEGFF(Node, /*IsMasked*/ true); 1217 return; 1218 } 1219 case Intrinsic::riscv_vloxei: 1220 case Intrinsic::riscv_vloxei_mask: 1221 case Intrinsic::riscv_vluxei: 1222 case Intrinsic::riscv_vluxei_mask: { 1223 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 1224 IntNo == Intrinsic::riscv_vluxei_mask; 1225 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 1226 IntNo == Intrinsic::riscv_vloxei_mask; 1227 1228 MVT VT = Node->getSimpleValueType(0); 1229 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1230 1231 unsigned CurOp = 2; 1232 // Masked intrinsic only have TU version pseduo instructions. 1233 bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); 1234 SmallVector<SDValue, 8> Operands; 1235 if (IsTU) 1236 Operands.push_back(Node->getOperand(CurOp++)); 1237 else 1238 // Skip the undef passthru operand for nomask TA version pseudo 1239 CurOp++; 1240 1241 MVT IndexVT; 1242 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1243 /*IsStridedOrIndexed*/ true, Operands, 1244 /*IsLoad=*/true, &IndexVT); 1245 1246 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1247 "Element count mismatch"); 1248 1249 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1250 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1251 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1252 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1253 report_fatal_error("The V extension does not support EEW=64 for index " 1254 "values when XLEN=32"); 1255 } 1256 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 1257 IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1258 static_cast<unsigned>(IndexLMUL)); 1259 MachineSDNode *Load = 1260 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1261 1262 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1263 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1264 1265 ReplaceNode(Node, Load); 1266 return; 1267 } 1268 case Intrinsic::riscv_vlm: 1269 case Intrinsic::riscv_vle: 1270 case Intrinsic::riscv_vle_mask: 1271 case Intrinsic::riscv_vlse: 1272 case Intrinsic::riscv_vlse_mask: { 1273 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 1274 IntNo == Intrinsic::riscv_vlse_mask; 1275 bool IsStrided = 1276 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 1277 1278 MVT VT = Node->getSimpleValueType(0); 1279 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1280 1281 unsigned CurOp = 2; 1282 // The riscv_vlm intrinsic are always tail agnostic and no passthru operand. 1283 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; 1284 // Masked intrinsic only have TU version pseduo instructions. 1285 bool IsTU = 1286 HasPassthruOperand && 1287 ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked); 1288 SmallVector<SDValue, 8> Operands; 1289 if (IsTU) 1290 Operands.push_back(Node->getOperand(CurOp++)); 1291 else if (HasPassthruOperand) 1292 // Skip the undef passthru operand for nomask TA version pseudo 1293 CurOp++; 1294 1295 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1296 Operands, /*IsLoad=*/true); 1297 1298 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1299 const RISCV::VLEPseudo *P = 1300 RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 1301 static_cast<unsigned>(LMUL)); 1302 MachineSDNode *Load = 1303 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1304 1305 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1306 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1307 1308 ReplaceNode(Node, Load); 1309 return; 1310 } 1311 case Intrinsic::riscv_vleff: 1312 case Intrinsic::riscv_vleff_mask: { 1313 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 1314 1315 MVT VT = Node->getSimpleValueType(0); 1316 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1317 1318 unsigned CurOp = 2; 1319 // Masked intrinsic only have TU version pseduo instructions. 1320 bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); 1321 SmallVector<SDValue, 7> Operands; 1322 if (IsTU) 1323 Operands.push_back(Node->getOperand(CurOp++)); 1324 else 1325 // Skip the undef passthru operand for nomask TA version pseudo 1326 CurOp++; 1327 1328 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1329 /*IsStridedOrIndexed*/ false, Operands, 1330 /*IsLoad=*/true); 1331 1332 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1333 const RISCV::VLEPseudo *P = 1334 RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 1335 Log2SEW, static_cast<unsigned>(LMUL)); 1336 MachineSDNode *Load = 1337 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), 1338 MVT::Other, MVT::Glue, Operands); 1339 SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, 1340 /*Glue*/ SDValue(Load, 2)); 1341 1342 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1343 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1344 1345 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); 1346 ReplaceUses(SDValue(Node, 1), SDValue(ReadVL, 0)); // VL 1347 ReplaceUses(SDValue(Node, 2), SDValue(Load, 1)); // Chain 1348 CurDAG->RemoveDeadNode(Node); 1349 return; 1350 } 1351 } 1352 break; 1353 } 1354 case ISD::INTRINSIC_VOID: { 1355 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1356 switch (IntNo) { 1357 case Intrinsic::riscv_vsseg2: 1358 case Intrinsic::riscv_vsseg3: 1359 case Intrinsic::riscv_vsseg4: 1360 case Intrinsic::riscv_vsseg5: 1361 case Intrinsic::riscv_vsseg6: 1362 case Intrinsic::riscv_vsseg7: 1363 case Intrinsic::riscv_vsseg8: { 1364 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1365 return; 1366 } 1367 case Intrinsic::riscv_vsseg2_mask: 1368 case Intrinsic::riscv_vsseg3_mask: 1369 case Intrinsic::riscv_vsseg4_mask: 1370 case Intrinsic::riscv_vsseg5_mask: 1371 case Intrinsic::riscv_vsseg6_mask: 1372 case Intrinsic::riscv_vsseg7_mask: 1373 case Intrinsic::riscv_vsseg8_mask: { 1374 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1375 return; 1376 } 1377 case Intrinsic::riscv_vssseg2: 1378 case Intrinsic::riscv_vssseg3: 1379 case Intrinsic::riscv_vssseg4: 1380 case Intrinsic::riscv_vssseg5: 1381 case Intrinsic::riscv_vssseg6: 1382 case Intrinsic::riscv_vssseg7: 1383 case Intrinsic::riscv_vssseg8: { 1384 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1385 return; 1386 } 1387 case Intrinsic::riscv_vssseg2_mask: 1388 case Intrinsic::riscv_vssseg3_mask: 1389 case Intrinsic::riscv_vssseg4_mask: 1390 case Intrinsic::riscv_vssseg5_mask: 1391 case Intrinsic::riscv_vssseg6_mask: 1392 case Intrinsic::riscv_vssseg7_mask: 1393 case Intrinsic::riscv_vssseg8_mask: { 1394 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1395 return; 1396 } 1397 case Intrinsic::riscv_vsoxseg2: 1398 case Intrinsic::riscv_vsoxseg3: 1399 case Intrinsic::riscv_vsoxseg4: 1400 case Intrinsic::riscv_vsoxseg5: 1401 case Intrinsic::riscv_vsoxseg6: 1402 case Intrinsic::riscv_vsoxseg7: 1403 case Intrinsic::riscv_vsoxseg8: 1404 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1405 return; 1406 case Intrinsic::riscv_vsuxseg2: 1407 case Intrinsic::riscv_vsuxseg3: 1408 case Intrinsic::riscv_vsuxseg4: 1409 case Intrinsic::riscv_vsuxseg5: 1410 case Intrinsic::riscv_vsuxseg6: 1411 case Intrinsic::riscv_vsuxseg7: 1412 case Intrinsic::riscv_vsuxseg8: 1413 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1414 return; 1415 case Intrinsic::riscv_vsoxseg2_mask: 1416 case Intrinsic::riscv_vsoxseg3_mask: 1417 case Intrinsic::riscv_vsoxseg4_mask: 1418 case Intrinsic::riscv_vsoxseg5_mask: 1419 case Intrinsic::riscv_vsoxseg6_mask: 1420 case Intrinsic::riscv_vsoxseg7_mask: 1421 case Intrinsic::riscv_vsoxseg8_mask: 1422 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1423 return; 1424 case Intrinsic::riscv_vsuxseg2_mask: 1425 case Intrinsic::riscv_vsuxseg3_mask: 1426 case Intrinsic::riscv_vsuxseg4_mask: 1427 case Intrinsic::riscv_vsuxseg5_mask: 1428 case Intrinsic::riscv_vsuxseg6_mask: 1429 case Intrinsic::riscv_vsuxseg7_mask: 1430 case Intrinsic::riscv_vsuxseg8_mask: 1431 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1432 return; 1433 case Intrinsic::riscv_vsoxei: 1434 case Intrinsic::riscv_vsoxei_mask: 1435 case Intrinsic::riscv_vsuxei: 1436 case Intrinsic::riscv_vsuxei_mask: { 1437 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 1438 IntNo == Intrinsic::riscv_vsuxei_mask; 1439 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 1440 IntNo == Intrinsic::riscv_vsoxei_mask; 1441 1442 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1443 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1444 1445 unsigned CurOp = 2; 1446 SmallVector<SDValue, 8> Operands; 1447 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1448 1449 MVT IndexVT; 1450 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1451 /*IsStridedOrIndexed*/ true, Operands, 1452 /*IsLoad=*/false, &IndexVT); 1453 1454 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1455 "Element count mismatch"); 1456 1457 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1458 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1459 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1460 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1461 report_fatal_error("The V extension does not support EEW=64 for index " 1462 "values when XLEN=32"); 1463 } 1464 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 1465 IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW, 1466 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); 1467 MachineSDNode *Store = 1468 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1469 1470 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1471 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1472 1473 ReplaceNode(Node, Store); 1474 return; 1475 } 1476 case Intrinsic::riscv_vsm: 1477 case Intrinsic::riscv_vse: 1478 case Intrinsic::riscv_vse_mask: 1479 case Intrinsic::riscv_vsse: 1480 case Intrinsic::riscv_vsse_mask: { 1481 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 1482 IntNo == Intrinsic::riscv_vsse_mask; 1483 bool IsStrided = 1484 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 1485 1486 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1487 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1488 1489 unsigned CurOp = 2; 1490 SmallVector<SDValue, 8> Operands; 1491 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1492 1493 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1494 Operands); 1495 1496 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1497 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 1498 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 1499 MachineSDNode *Store = 1500 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1501 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1502 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1503 1504 ReplaceNode(Node, Store); 1505 return; 1506 } 1507 } 1508 break; 1509 } 1510 case ISD::BITCAST: { 1511 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 1512 // Just drop bitcasts between vectors if both are fixed or both are 1513 // scalable. 1514 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 1515 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 1516 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 1517 CurDAG->RemoveDeadNode(Node); 1518 return; 1519 } 1520 break; 1521 } 1522 case ISD::INSERT_SUBVECTOR: { 1523 SDValue V = Node->getOperand(0); 1524 SDValue SubV = Node->getOperand(1); 1525 SDLoc DL(SubV); 1526 auto Idx = Node->getConstantOperandVal(2); 1527 MVT SubVecVT = SubV.getSimpleValueType(); 1528 1529 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1530 MVT SubVecContainerVT = SubVecVT; 1531 // Establish the correct scalable-vector types for any fixed-length type. 1532 if (SubVecVT.isFixedLengthVector()) 1533 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 1534 if (VT.isFixedLengthVector()) 1535 VT = TLI.getContainerForFixedLengthVector(VT); 1536 1537 const auto *TRI = Subtarget->getRegisterInfo(); 1538 unsigned SubRegIdx; 1539 std::tie(SubRegIdx, Idx) = 1540 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1541 VT, SubVecContainerVT, Idx, TRI); 1542 1543 // If the Idx hasn't been completely eliminated then this is a subvector 1544 // insert which doesn't naturally align to a vector register. These must 1545 // be handled using instructions to manipulate the vector registers. 1546 if (Idx != 0) 1547 break; 1548 1549 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); 1550 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 1551 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 1552 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 1553 (void)IsSubVecPartReg; // Silence unused variable warning without asserts. 1554 assert((!IsSubVecPartReg || V.isUndef()) && 1555 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 1556 "the subvector is smaller than a full-sized register"); 1557 1558 // If we haven't set a SubRegIdx, then we must be going between 1559 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 1560 if (SubRegIdx == RISCV::NoSubRegister) { 1561 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT); 1562 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1563 InRegClassID && 1564 "Unexpected subvector extraction"); 1565 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1566 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 1567 DL, VT, SubV, RC); 1568 ReplaceNode(Node, NewNode); 1569 return; 1570 } 1571 1572 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 1573 ReplaceNode(Node, Insert.getNode()); 1574 return; 1575 } 1576 case ISD::EXTRACT_SUBVECTOR: { 1577 SDValue V = Node->getOperand(0); 1578 auto Idx = Node->getConstantOperandVal(1); 1579 MVT InVT = V.getSimpleValueType(); 1580 SDLoc DL(V); 1581 1582 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1583 MVT SubVecContainerVT = VT; 1584 // Establish the correct scalable-vector types for any fixed-length type. 1585 if (VT.isFixedLengthVector()) 1586 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 1587 if (InVT.isFixedLengthVector()) 1588 InVT = TLI.getContainerForFixedLengthVector(InVT); 1589 1590 const auto *TRI = Subtarget->getRegisterInfo(); 1591 unsigned SubRegIdx; 1592 std::tie(SubRegIdx, Idx) = 1593 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1594 InVT, SubVecContainerVT, Idx, TRI); 1595 1596 // If the Idx hasn't been completely eliminated then this is a subvector 1597 // extract which doesn't naturally align to a vector register. These must 1598 // be handled using instructions to manipulate the vector registers. 1599 if (Idx != 0) 1600 break; 1601 1602 // If we haven't set a SubRegIdx, then we must be going between 1603 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 1604 if (SubRegIdx == RISCV::NoSubRegister) { 1605 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 1606 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1607 InRegClassID && 1608 "Unexpected subvector extraction"); 1609 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1610 SDNode *NewNode = 1611 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 1612 ReplaceNode(Node, NewNode); 1613 return; 1614 } 1615 1616 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 1617 ReplaceNode(Node, Extract.getNode()); 1618 return; 1619 } 1620 case ISD::SPLAT_VECTOR: 1621 case RISCVISD::VMV_S_X_VL: 1622 case RISCVISD::VFMV_S_F_VL: 1623 case RISCVISD::VMV_V_X_VL: 1624 case RISCVISD::VFMV_V_F_VL: { 1625 // Try to match splat of a scalar load to a strided load with stride of x0. 1626 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || 1627 Node->getOpcode() == RISCVISD::VFMV_S_F_VL; 1628 if (IsScalarMove && !Node->getOperand(0).isUndef()) 1629 break; 1630 SDValue Src = IsScalarMove ? Node->getOperand(1) : Node->getOperand(0); 1631 auto *Ld = dyn_cast<LoadSDNode>(Src); 1632 if (!Ld) 1633 break; 1634 EVT MemVT = Ld->getMemoryVT(); 1635 // The memory VT should be the same size as the element type. 1636 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 1637 break; 1638 if (!IsProfitableToFold(Src, Node, Node) || 1639 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 1640 break; 1641 1642 SDValue VL; 1643 if (Node->getOpcode() == ISD::SPLAT_VECTOR) 1644 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); 1645 else if (IsScalarMove) { 1646 // We could deal with more VL if we update the VSETVLI insert pass to 1647 // avoid introducing more VSETVLI. 1648 if (!isOneConstant(Node->getOperand(2))) 1649 break; 1650 selectVLOp(Node->getOperand(2), VL); 1651 } else 1652 selectVLOp(Node->getOperand(1), VL); 1653 1654 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1655 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 1656 1657 SDValue Operands[] = {Ld->getBasePtr(), 1658 CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW, 1659 Ld->getChain()}; 1660 1661 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1662 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 1663 /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false, 1664 Log2SEW, static_cast<unsigned>(LMUL)); 1665 MachineSDNode *Load = 1666 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1667 1668 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1669 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1670 1671 ReplaceNode(Node, Load); 1672 return; 1673 } 1674 } 1675 1676 // Select the default instruction. 1677 SelectCode(Node); 1678 } 1679 1680 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 1681 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 1682 switch (ConstraintID) { 1683 case InlineAsm::Constraint_m: 1684 // We just support simple memory operands that have a single address 1685 // operand and need no special handling. 1686 OutOps.push_back(Op); 1687 return false; 1688 case InlineAsm::Constraint_A: 1689 OutOps.push_back(Op); 1690 return false; 1691 default: 1692 break; 1693 } 1694 1695 return true; 1696 } 1697 1698 bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) { 1699 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 1700 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1701 return true; 1702 } 1703 return false; 1704 } 1705 1706 bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { 1707 // If this is FrameIndex, select it directly. Otherwise just let it get 1708 // selected to a register independently. 1709 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) 1710 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1711 else 1712 Base = Addr; 1713 return true; 1714 } 1715 1716 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 1717 SDValue &ShAmt) { 1718 // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift 1719 // amount. If there is an AND on the shift amount, we can bypass it if it 1720 // doesn't affect any of those bits. 1721 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 1722 const APInt &AndMask = N->getConstantOperandAPInt(1); 1723 1724 // Since the max shift amount is a power of 2 we can subtract 1 to make a 1725 // mask that covers the bits needed to represent all shift amounts. 1726 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 1727 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 1728 1729 if (ShMask.isSubsetOf(AndMask)) { 1730 ShAmt = N.getOperand(0); 1731 return true; 1732 } 1733 1734 // SimplifyDemandedBits may have optimized the mask so try restoring any 1735 // bits that are known zero. 1736 KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0)); 1737 if (ShMask.isSubsetOf(AndMask | Known.Zero)) { 1738 ShAmt = N.getOperand(0); 1739 return true; 1740 } 1741 } else if (N.getOpcode() == ISD::SUB && 1742 isa<ConstantSDNode>(N.getOperand(0))) { 1743 uint64_t Imm = N.getConstantOperandVal(0); 1744 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 1745 // generate a NEG instead of a SUB of a constant. 1746 if (Imm != 0 && Imm % ShiftWidth == 0) { 1747 SDLoc DL(N); 1748 EVT VT = N.getValueType(); 1749 SDValue Zero = 1750 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT); 1751 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; 1752 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, 1753 N.getOperand(1)); 1754 ShAmt = SDValue(Neg, 0); 1755 return true; 1756 } 1757 } 1758 1759 ShAmt = N; 1760 return true; 1761 } 1762 1763 bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { 1764 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 1765 cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { 1766 Val = N.getOperand(0); 1767 return true; 1768 } 1769 MVT VT = N.getSimpleValueType(); 1770 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { 1771 Val = N; 1772 return true; 1773 } 1774 1775 return false; 1776 } 1777 1778 bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { 1779 if (N.getOpcode() == ISD::AND) { 1780 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1781 if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) { 1782 Val = N.getOperand(0); 1783 return true; 1784 } 1785 } 1786 MVT VT = N.getSimpleValueType(); 1787 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32); 1788 if (CurDAG->MaskedValueIsZero(N, Mask)) { 1789 Val = N; 1790 return true; 1791 } 1792 1793 return false; 1794 } 1795 1796 // Return true if all users of this SDNode* only consume the lower \p Bits. 1797 // This can be used to form W instructions for add/sub/mul/shl even when the 1798 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 1799 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 1800 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 1801 // the add/sub/mul/shl to become non-W instructions. By checking the users we 1802 // may be able to use a W instruction and CSE with the other instruction if 1803 // this has happened. We could try to detect that the CSE opportunity exists 1804 // before doing this, but that would be more complicated. 1805 // TODO: Does this need to look through AND/OR/XOR to their users to find more 1806 // opportunities. 1807 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { 1808 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 1809 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 1810 Node->getOpcode() == ISD::SRL || 1811 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 1812 isa<ConstantSDNode>(Node)) && 1813 "Unexpected opcode"); 1814 1815 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 1816 SDNode *User = *UI; 1817 // Users of this node should have already been instruction selected 1818 if (!User->isMachineOpcode()) 1819 return false; 1820 1821 // TODO: Add more opcodes? 1822 switch (User->getMachineOpcode()) { 1823 default: 1824 return false; 1825 case RISCV::ADDW: 1826 case RISCV::ADDIW: 1827 case RISCV::SUBW: 1828 case RISCV::MULW: 1829 case RISCV::SLLW: 1830 case RISCV::SLLIW: 1831 case RISCV::SRAW: 1832 case RISCV::SRAIW: 1833 case RISCV::SRLW: 1834 case RISCV::SRLIW: 1835 case RISCV::DIVW: 1836 case RISCV::DIVUW: 1837 case RISCV::REMW: 1838 case RISCV::REMUW: 1839 case RISCV::ROLW: 1840 case RISCV::RORW: 1841 case RISCV::RORIW: 1842 case RISCV::CLZW: 1843 case RISCV::CTZW: 1844 case RISCV::CPOPW: 1845 case RISCV::SLLI_UW: 1846 case RISCV::FCVT_H_W: 1847 case RISCV::FCVT_H_WU: 1848 case RISCV::FCVT_S_W: 1849 case RISCV::FCVT_S_WU: 1850 case RISCV::FCVT_D_W: 1851 case RISCV::FCVT_D_WU: 1852 if (Bits < 32) 1853 return false; 1854 break; 1855 case RISCV::SLLI: 1856 // SLLI only uses the lower (XLen - ShAmt) bits. 1857 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1)) 1858 return false; 1859 break; 1860 case RISCV::ANDI: 1861 if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1)))) 1862 return false; 1863 break; 1864 case RISCV::SEXT_B: 1865 if (Bits < 8) 1866 return false; 1867 break; 1868 case RISCV::SEXT_H: 1869 case RISCV::ZEXT_H_RV32: 1870 case RISCV::ZEXT_H_RV64: 1871 if (Bits < 16) 1872 return false; 1873 break; 1874 case RISCV::ADD_UW: 1875 case RISCV::SH1ADD_UW: 1876 case RISCV::SH2ADD_UW: 1877 case RISCV::SH3ADD_UW: 1878 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 1879 // 32 bits. 1880 if (UI.getOperandNo() != 0 || Bits < 32) 1881 return false; 1882 break; 1883 case RISCV::SB: 1884 if (UI.getOperandNo() != 0 || Bits < 8) 1885 return false; 1886 break; 1887 case RISCV::SH: 1888 if (UI.getOperandNo() != 0 || Bits < 16) 1889 return false; 1890 break; 1891 case RISCV::SW: 1892 if (UI.getOperandNo() != 0 || Bits < 32) 1893 return false; 1894 break; 1895 } 1896 } 1897 1898 return true; 1899 } 1900 1901 // Select VL as a 5 bit immediate or a value that will become a register. This 1902 // allows us to choose betwen VSETIVLI or VSETVLI later. 1903 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 1904 auto *C = dyn_cast<ConstantSDNode>(N); 1905 if (C && isUInt<5>(C->getZExtValue())) 1906 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 1907 N->getValueType(0)); 1908 else if (C && C->isAllOnesValue() && C->getOpcode() != ISD::TargetConstant) 1909 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 1910 N->getValueType(0)); 1911 else 1912 VL = N; 1913 1914 return true; 1915 } 1916 1917 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 1918 if (N.getOpcode() != RISCVISD::VMV_V_X_VL) 1919 return false; 1920 SplatVal = N.getOperand(0); 1921 return true; 1922 } 1923 1924 using ValidateFn = bool (*)(int64_t); 1925 1926 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, 1927 SelectionDAG &DAG, 1928 const RISCVSubtarget &Subtarget, 1929 ValidateFn ValidateImm) { 1930 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || 1931 !isa<ConstantSDNode>(N.getOperand(0))) 1932 return false; 1933 1934 int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue(); 1935 1936 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand 1937 // type is wider than the resulting vector element type: an implicit 1938 // truncation first takes place. Therefore, perform a manual 1939 // truncation/sign-extension in order to ignore any truncated bits and catch 1940 // any zero-extended immediate. 1941 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 1942 // sign-extending to (XLenVT -1). 1943 MVT XLenVT = Subtarget.getXLenVT(); 1944 assert(XLenVT == N.getOperand(0).getSimpleValueType() && 1945 "Unexpected splat operand type"); 1946 MVT EltVT = N.getSimpleValueType().getVectorElementType(); 1947 if (EltVT.bitsLT(XLenVT)) 1948 SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits()); 1949 1950 if (!ValidateImm(SplatImm)) 1951 return false; 1952 1953 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT); 1954 return true; 1955 } 1956 1957 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 1958 return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget, 1959 [](int64_t Imm) { return isInt<5>(Imm); }); 1960 } 1961 1962 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 1963 return selectVSplatSimmHelper( 1964 N, SplatVal, *CurDAG, *Subtarget, 1965 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); 1966 } 1967 1968 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 1969 SDValue &SplatVal) { 1970 return selectVSplatSimmHelper( 1971 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { 1972 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 1973 }); 1974 } 1975 1976 bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { 1977 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || 1978 !isa<ConstantSDNode>(N.getOperand(0))) 1979 return false; 1980 1981 int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue(); 1982 1983 if (!isUInt<5>(SplatImm)) 1984 return false; 1985 1986 SplatVal = 1987 CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT()); 1988 1989 return true; 1990 } 1991 1992 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 1993 SDValue &Imm) { 1994 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 1995 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 1996 1997 if (!isInt<5>(ImmVal)) 1998 return false; 1999 2000 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); 2001 return true; 2002 } 2003 2004 return false; 2005 } 2006 2007 // Merge an ADDI into the offset of a load/store instruction where possible. 2008 // (load (addi base, off1), off2) -> (load base, off1+off2) 2009 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) 2010 // This is possible when off1+off2 fits a 12-bit immediate. 2011 bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { 2012 int OffsetOpIdx; 2013 int BaseOpIdx; 2014 2015 // Only attempt this optimisation for I-type loads and S-type stores. 2016 switch (N->getMachineOpcode()) { 2017 default: 2018 return false; 2019 case RISCV::LB: 2020 case RISCV::LH: 2021 case RISCV::LW: 2022 case RISCV::LBU: 2023 case RISCV::LHU: 2024 case RISCV::LWU: 2025 case RISCV::LD: 2026 case RISCV::FLH: 2027 case RISCV::FLW: 2028 case RISCV::FLD: 2029 BaseOpIdx = 0; 2030 OffsetOpIdx = 1; 2031 break; 2032 case RISCV::SB: 2033 case RISCV::SH: 2034 case RISCV::SW: 2035 case RISCV::SD: 2036 case RISCV::FSH: 2037 case RISCV::FSW: 2038 case RISCV::FSD: 2039 BaseOpIdx = 1; 2040 OffsetOpIdx = 2; 2041 break; 2042 } 2043 2044 if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx))) 2045 return false; 2046 2047 SDValue Base = N->getOperand(BaseOpIdx); 2048 2049 // If the base is an ADDI, we can merge it in to the load/store. 2050 if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI) 2051 return false; 2052 2053 SDValue ImmOperand = Base.getOperand(1); 2054 uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); 2055 2056 if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) { 2057 int64_t Offset1 = Const->getSExtValue(); 2058 int64_t CombinedOffset = Offset1 + Offset2; 2059 if (!isInt<12>(CombinedOffset)) 2060 return false; 2061 ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand), 2062 ImmOperand.getValueType()); 2063 } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) { 2064 // If the off1 in (addi base, off1) is a global variable's address (its 2065 // low part, really), then we can rely on the alignment of that variable 2066 // to provide a margin of safety before off1 can overflow the 12 bits. 2067 // Check if off2 falls within that margin; if so off1+off2 can't overflow. 2068 const DataLayout &DL = CurDAG->getDataLayout(); 2069 Align Alignment = GA->getGlobal()->getPointerAlignment(DL); 2070 if (Offset2 != 0 && Alignment <= Offset2) 2071 return false; 2072 int64_t Offset1 = GA->getOffset(); 2073 int64_t CombinedOffset = Offset1 + Offset2; 2074 ImmOperand = CurDAG->getTargetGlobalAddress( 2075 GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), 2076 CombinedOffset, GA->getTargetFlags()); 2077 } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) { 2078 // Ditto. 2079 Align Alignment = CP->getAlign(); 2080 if (Offset2 != 0 && Alignment <= Offset2) 2081 return false; 2082 int64_t Offset1 = CP->getOffset(); 2083 int64_t CombinedOffset = Offset1 + Offset2; 2084 ImmOperand = CurDAG->getTargetConstantPool( 2085 CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(), 2086 CombinedOffset, CP->getTargetFlags()); 2087 } else { 2088 return false; 2089 } 2090 2091 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 2092 LLVM_DEBUG(Base->dump(CurDAG)); 2093 LLVM_DEBUG(dbgs() << "\nN: "); 2094 LLVM_DEBUG(N->dump(CurDAG)); 2095 LLVM_DEBUG(dbgs() << "\n"); 2096 2097 // Modify the offset operand of the load/store. 2098 if (BaseOpIdx == 0) // Load 2099 CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, 2100 N->getOperand(2)); 2101 else // Store 2102 CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), 2103 ImmOperand, N->getOperand(3)); 2104 2105 return true; 2106 } 2107 2108 // Try to remove sext.w if the input is a W instruction or can be made into 2109 // a W instruction cheaply. 2110 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 2111 // Look for the sext.w pattern, addiw rd, rs1, 0. 2112 if (N->getMachineOpcode() != RISCV::ADDIW || 2113 !isNullConstant(N->getOperand(1))) 2114 return false; 2115 2116 SDValue N0 = N->getOperand(0); 2117 if (!N0.isMachineOpcode()) 2118 return false; 2119 2120 switch (N0.getMachineOpcode()) { 2121 default: 2122 break; 2123 case RISCV::ADD: 2124 case RISCV::ADDI: 2125 case RISCV::SUB: 2126 case RISCV::MUL: 2127 case RISCV::SLLI: { 2128 // Convert sext.w+add/sub/mul to their W instructions. This will create 2129 // a new independent instruction. This improves latency. 2130 unsigned Opc; 2131 switch (N0.getMachineOpcode()) { 2132 default: 2133 llvm_unreachable("Unexpected opcode!"); 2134 case RISCV::ADD: Opc = RISCV::ADDW; break; 2135 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 2136 case RISCV::SUB: Opc = RISCV::SUBW; break; 2137 case RISCV::MUL: Opc = RISCV::MULW; break; 2138 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 2139 } 2140 2141 SDValue N00 = N0.getOperand(0); 2142 SDValue N01 = N0.getOperand(1); 2143 2144 // Shift amount needs to be uimm5. 2145 if (N0.getMachineOpcode() == RISCV::SLLI && 2146 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 2147 break; 2148 2149 SDNode *Result = 2150 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 2151 N00, N01); 2152 ReplaceUses(N, Result); 2153 return true; 2154 } 2155 case RISCV::ADDW: 2156 case RISCV::ADDIW: 2157 case RISCV::SUBW: 2158 case RISCV::MULW: 2159 case RISCV::SLLIW: 2160 // Result is already sign extended just remove the sext.w. 2161 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 2162 ReplaceUses(N, N0.getNode()); 2163 return true; 2164 } 2165 2166 return false; 2167 } 2168 2169 // Optimize masked RVV pseudo instructions with a known all-ones mask to their 2170 // corresponding "unmasked" pseudo versions. The mask we're interested in will 2171 // take the form of a V0 physical register operand, with a glued 2172 // register-setting instruction. 2173 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { 2174 const RISCV::RISCVMaskedPseudoInfo *I = 2175 RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); 2176 if (!I) 2177 return false; 2178 2179 unsigned MaskOpIdx = I->MaskOpIdx; 2180 2181 // Check that we're using V0 as a mask register. 2182 if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) || 2183 cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0) 2184 return false; 2185 2186 // The glued user defines V0. 2187 const auto *Glued = N->getGluedNode(); 2188 2189 if (!Glued || Glued->getOpcode() != ISD::CopyToReg) 2190 return false; 2191 2192 // Check that we're defining V0 as a mask register. 2193 if (!isa<RegisterSDNode>(Glued->getOperand(1)) || 2194 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0) 2195 return false; 2196 2197 // Check the instruction defining V0; it needs to be a VMSET pseudo. 2198 SDValue MaskSetter = Glued->getOperand(2); 2199 2200 const auto IsVMSet = [](unsigned Opc) { 2201 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || 2202 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || 2203 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || 2204 Opc == RISCV::PseudoVMSET_M_B8; 2205 }; 2206 2207 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has 2208 // undefined behaviour if it's the wrong bitwidth, so we could choose to 2209 // assume that it's all-ones? Same applies to its VL. 2210 if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode())) 2211 return false; 2212 2213 // Retrieve the tail policy operand index, if any. 2214 Optional<unsigned> TailPolicyOpIdx; 2215 const RISCVInstrInfo *TII = static_cast<const RISCVInstrInfo *>( 2216 CurDAG->getSubtarget().getInstrInfo()); 2217 2218 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode()); 2219 2220 if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) { 2221 // The last operand of the pseudo is the policy op, but we're expecting a 2222 // Glue operand last. We may also have a chain. 2223 TailPolicyOpIdx = N->getNumOperands() - 1; 2224 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Glue) 2225 (*TailPolicyOpIdx)--; 2226 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Other) 2227 (*TailPolicyOpIdx)--; 2228 2229 // If the policy isn't TAIL_AGNOSTIC we can't perform this optimization. 2230 if (N->getConstantOperandVal(*TailPolicyOpIdx) != RISCVII::TAIL_AGNOSTIC) 2231 return false; 2232 } 2233 2234 const MCInstrDesc &UnmaskedMCID = TII->get(I->UnmaskedPseudo); 2235 2236 // Check that we're dropping the merge operand, the mask operand, and any 2237 // policy operand when we transform to this unmasked pseudo. 2238 assert(!RISCVII::hasMergeOp(UnmaskedMCID.TSFlags) && 2239 RISCVII::hasDummyMaskOp(UnmaskedMCID.TSFlags) && 2240 !RISCVII::hasVecPolicyOp(UnmaskedMCID.TSFlags) && 2241 "Unexpected pseudo to transform to"); 2242 2243 SmallVector<SDValue, 8> Ops; 2244 // Skip the merge operand at index 0. 2245 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) { 2246 // Skip the mask, the policy, and the Glue. 2247 SDValue Op = N->getOperand(I); 2248 if (I == MaskOpIdx || I == TailPolicyOpIdx || 2249 Op.getValueType() == MVT::Glue) 2250 continue; 2251 Ops.push_back(Op); 2252 } 2253 2254 // Transitively apply any node glued to our new node. 2255 if (auto *TGlued = Glued->getGluedNode()) 2256 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); 2257 2258 SDNode *Result = 2259 CurDAG->getMachineNode(I->UnmaskedPseudo, SDLoc(N), N->getVTList(), Ops); 2260 ReplaceUses(N, Result); 2261 2262 return true; 2263 } 2264 2265 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 2266 // for instruction scheduling. 2267 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) { 2268 return new RISCVDAGToDAGISel(TM); 2269 } 2270