1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISCV target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVMCTargetDesc.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCVISelLowering.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/IR/IntrinsicsRISCV.h" 20 #include "llvm/Support/Alignment.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/KnownBits.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "riscv-isel" 29 30 namespace llvm { 31 namespace RISCV { 32 #define GET_RISCVVSSEGTable_IMPL 33 #define GET_RISCVVLSEGTable_IMPL 34 #define GET_RISCVVLXSEGTable_IMPL 35 #define GET_RISCVVSXSEGTable_IMPL 36 #define GET_RISCVVLETable_IMPL 37 #define GET_RISCVVSETable_IMPL 38 #define GET_RISCVVLXTable_IMPL 39 #define GET_RISCVVSXTable_IMPL 40 #include "RISCVGenSearchableTables.inc" 41 } // namespace RISCV 42 } // namespace llvm 43 44 void RISCVDAGToDAGISel::PreprocessISelDAG() { 45 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 46 E = CurDAG->allnodes_end(); 47 I != E;) { 48 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 49 50 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 51 // load. Done after lowering and combining so that we have a chance to 52 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 53 if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) 54 continue; 55 56 assert(N->getNumOperands() == 3 && "Unexpected number of operands"); 57 MVT VT = N->getSimpleValueType(0); 58 SDValue Lo = N->getOperand(0); 59 SDValue Hi = N->getOperand(1); 60 SDValue VL = N->getOperand(2); 61 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 62 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 63 "Unexpected VTs!"); 64 MachineFunction &MF = CurDAG->getMachineFunction(); 65 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 66 SDLoc DL(N); 67 68 // We use the same frame index we use for moving two i32s into 64-bit FPR. 69 // This is an analogous operation. 70 int FI = FuncInfo->getMoveF64FrameIndex(MF); 71 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 72 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); 73 SDValue StackSlot = 74 CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); 75 76 SDValue Chain = CurDAG->getEntryNode(); 77 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 78 79 SDValue OffsetSlot = 80 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); 81 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 82 Align(8)); 83 84 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 85 86 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 87 SDValue IntID = 88 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 89 SDValue Ops[] = {Chain, IntID, StackSlot, 90 CurDAG->getRegister(RISCV::X0, MVT::i64), VL}; 91 92 SDValue Result = CurDAG->getMemIntrinsicNode( 93 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8), 94 MachineMemOperand::MOLoad); 95 96 // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the 97 // vlse we created. This will cause general havok on the dag because 98 // anything below the conversion could be folded into other existing nodes. 99 // To avoid invalidating 'I', back it up to the convert node. 100 --I; 101 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 102 103 // Now that we did that, the node is dead. Increment the iterator to the 104 // next node to process, then delete N. 105 ++I; 106 CurDAG->DeleteNode(N); 107 } 108 } 109 110 void RISCVDAGToDAGISel::PostprocessISelDAG() { 111 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 112 113 bool MadeChange = false; 114 while (Position != CurDAG->allnodes_begin()) { 115 SDNode *N = &*--Position; 116 // Skip dead nodes and any non-machine opcodes. 117 if (N->use_empty() || !N->isMachineOpcode()) 118 continue; 119 120 MadeChange |= doPeepholeSExtW(N); 121 MadeChange |= doPeepholeLoadStoreADDI(N); 122 } 123 124 if (MadeChange) 125 CurDAG->RemoveDeadNodes(); 126 } 127 128 static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL, 129 const MVT VT, int64_t Imm, 130 const RISCVSubtarget &Subtarget) { 131 assert(VT == MVT::i64 && "Expecting MVT::i64"); 132 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering(); 133 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(CurDAG->getConstantPool( 134 ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT)); 135 SDValue Addr = TLI->getAddr(CP, *CurDAG); 136 SDValue Offset = CurDAG->getTargetConstant(0, DL, VT); 137 // Since there is no data race, the chain can be the entry node. 138 SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset, 139 CurDAG->getEntryNode()); 140 MachineFunction &MF = CurDAG->getMachineFunction(); 141 MachineMemOperand *MemOp = MF.getMachineMemOperand( 142 MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, 143 LLT(VT), CP->getAlign()); 144 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Load), {MemOp}); 145 return Load; 146 } 147 148 static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 149 int64_t Imm, const RISCVSubtarget &Subtarget) { 150 MVT XLenVT = Subtarget.getXLenVT(); 151 RISCVMatInt::InstSeq Seq = 152 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); 153 154 // If Imm is expensive to build, then we put it into constant pool. 155 if (Subtarget.useConstantPoolForLargeInts() && 156 Seq.size() > Subtarget.getMaxBuildIntsCost()) 157 return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget); 158 159 SDNode *Result = nullptr; 160 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); 161 for (RISCVMatInt::Inst &Inst : Seq) { 162 SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT); 163 if (Inst.Opc == RISCV::LUI) 164 Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm); 165 else if (Inst.Opc == RISCV::ADDUW) 166 Result = CurDAG->getMachineNode(RISCV::ADDUW, DL, XLenVT, SrcReg, 167 CurDAG->getRegister(RISCV::X0, XLenVT)); 168 else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD || 169 Inst.Opc == RISCV::SH3ADD) 170 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg); 171 else 172 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm); 173 174 // Only the first instruction has X0 as its source. 175 SrcReg = SDValue(Result, 0); 176 } 177 178 return Result; 179 } 180 181 static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 182 unsigned RegClassID, unsigned SubReg0) { 183 assert(Regs.size() >= 2 && Regs.size() <= 8); 184 185 SDLoc DL(Regs[0]); 186 SmallVector<SDValue, 8> Ops; 187 188 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); 189 190 for (unsigned I = 0; I < Regs.size(); ++I) { 191 Ops.push_back(Regs[I]); 192 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); 193 } 194 SDNode *N = 195 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 196 return SDValue(N, 0); 197 } 198 199 static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 200 unsigned NF) { 201 static const unsigned RegClassIDs[] = { 202 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, 203 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, 204 RISCV::VRN8M1RegClassID}; 205 206 return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0); 207 } 208 209 static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 210 unsigned NF) { 211 static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID, 212 RISCV::VRN3M2RegClassID, 213 RISCV::VRN4M2RegClassID}; 214 215 return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0); 216 } 217 218 static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 219 unsigned NF) { 220 return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID, 221 RISCV::sub_vrm4_0); 222 } 223 224 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 225 unsigned NF, RISCVII::VLMUL LMUL) { 226 switch (LMUL) { 227 default: 228 llvm_unreachable("Invalid LMUL."); 229 case RISCVII::VLMUL::LMUL_F8: 230 case RISCVII::VLMUL::LMUL_F4: 231 case RISCVII::VLMUL::LMUL_F2: 232 case RISCVII::VLMUL::LMUL_1: 233 return createM1Tuple(CurDAG, Regs, NF); 234 case RISCVII::VLMUL::LMUL_2: 235 return createM2Tuple(CurDAG, Regs, NF); 236 case RISCVII::VLMUL::LMUL_4: 237 return createM4Tuple(CurDAG, Regs, NF); 238 } 239 } 240 241 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 242 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 243 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 244 bool IsLoad, MVT *IndexVT) { 245 SDValue Chain = Node->getOperand(0); 246 SDValue Glue; 247 248 SDValue Base; 249 SelectBaseAddr(Node->getOperand(CurOp++), Base); 250 Operands.push_back(Base); // Base pointer. 251 252 if (IsStridedOrIndexed) { 253 Operands.push_back(Node->getOperand(CurOp++)); // Index. 254 if (IndexVT) 255 *IndexVT = Operands.back()->getSimpleValueType(0); 256 } 257 258 if (IsMasked) { 259 // Mask needs to be copied to V0. 260 SDValue Mask = Node->getOperand(CurOp++); 261 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); 262 Glue = Chain.getValue(1); 263 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); 264 } 265 SDValue VL; 266 selectVLOp(Node->getOperand(CurOp++), VL); 267 Operands.push_back(VL); 268 269 MVT XLenVT = Subtarget->getXLenVT(); 270 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 271 Operands.push_back(SEWOp); 272 273 // Masked load has the tail policy argument. 274 if (IsMasked && IsLoad) { 275 // Policy must be a constant. 276 uint64_t Policy = Node->getConstantOperandVal(CurOp++); 277 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 278 Operands.push_back(PolicyOp); 279 } 280 281 Operands.push_back(Chain); // Chain. 282 if (Glue) 283 Operands.push_back(Glue); 284 } 285 286 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, 287 bool IsStrided) { 288 SDLoc DL(Node); 289 unsigned NF = Node->getNumValues() - 1; 290 MVT VT = Node->getSimpleValueType(0); 291 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 292 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 293 294 unsigned CurOp = 2; 295 SmallVector<SDValue, 8> Operands; 296 if (IsMasked) { 297 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 298 Node->op_begin() + CurOp + NF); 299 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 300 Operands.push_back(MaskedOff); 301 CurOp += NF; 302 } 303 304 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 305 Operands, /*IsLoad=*/true); 306 307 const RISCV::VLSEGPseudo *P = 308 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW, 309 static_cast<unsigned>(LMUL)); 310 MachineSDNode *Load = 311 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 312 313 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 314 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 315 316 SDValue SuperReg = SDValue(Load, 0); 317 for (unsigned I = 0; I < NF; ++I) { 318 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 319 ReplaceUses(SDValue(Node, I), 320 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 321 } 322 323 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 324 CurDAG->RemoveDeadNode(Node); 325 } 326 327 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { 328 SDLoc DL(Node); 329 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. 330 MVT VT = Node->getSimpleValueType(0); 331 MVT XLenVT = Subtarget->getXLenVT(); 332 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 333 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 334 335 unsigned CurOp = 2; 336 SmallVector<SDValue, 7> Operands; 337 if (IsMasked) { 338 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 339 Node->op_begin() + CurOp + NF); 340 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 341 Operands.push_back(MaskedOff); 342 CurOp += NF; 343 } 344 345 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 346 /*IsStridedOrIndexed*/ false, Operands, 347 /*IsLoad=*/true); 348 349 const RISCV::VLSEGPseudo *P = 350 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true, 351 Log2SEW, static_cast<unsigned>(LMUL)); 352 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 353 MVT::Other, MVT::Glue, Operands); 354 SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, 355 /*Glue*/ SDValue(Load, 2)); 356 357 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 358 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 359 360 SDValue SuperReg = SDValue(Load, 0); 361 for (unsigned I = 0; I < NF; ++I) { 362 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 363 ReplaceUses(SDValue(Node, I), 364 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 365 } 366 367 ReplaceUses(SDValue(Node, NF), SDValue(ReadVL, 0)); // VL 368 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 1)); // Chain 369 CurDAG->RemoveDeadNode(Node); 370 } 371 372 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, 373 bool IsOrdered) { 374 SDLoc DL(Node); 375 unsigned NF = Node->getNumValues() - 1; 376 MVT VT = Node->getSimpleValueType(0); 377 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 378 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 379 380 unsigned CurOp = 2; 381 SmallVector<SDValue, 8> Operands; 382 if (IsMasked) { 383 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 384 Node->op_begin() + CurOp + NF); 385 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 386 Operands.push_back(MaskedOff); 387 CurOp += NF; 388 } 389 390 MVT IndexVT; 391 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 392 /*IsStridedOrIndexed*/ true, Operands, 393 /*IsLoad=*/true, &IndexVT); 394 395 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 396 "Element count mismatch"); 397 398 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 399 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 400 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 401 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 402 static_cast<unsigned>(IndexLMUL)); 403 MachineSDNode *Load = 404 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 405 406 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 407 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 408 409 SDValue SuperReg = SDValue(Load, 0); 410 for (unsigned I = 0; I < NF; ++I) { 411 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 412 ReplaceUses(SDValue(Node, I), 413 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 414 } 415 416 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 417 CurDAG->RemoveDeadNode(Node); 418 } 419 420 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, 421 bool IsStrided) { 422 SDLoc DL(Node); 423 unsigned NF = Node->getNumOperands() - 4; 424 if (IsStrided) 425 NF--; 426 if (IsMasked) 427 NF--; 428 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 429 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 430 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 431 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 432 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 433 434 SmallVector<SDValue, 8> Operands; 435 Operands.push_back(StoreVal); 436 unsigned CurOp = 2 + NF; 437 438 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 439 Operands); 440 441 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 442 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 443 MachineSDNode *Store = 444 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 445 446 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 447 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 448 449 ReplaceNode(Node, Store); 450 } 451 452 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, 453 bool IsOrdered) { 454 SDLoc DL(Node); 455 unsigned NF = Node->getNumOperands() - 5; 456 if (IsMasked) 457 --NF; 458 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 459 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 460 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 461 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 462 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 463 464 SmallVector<SDValue, 8> Operands; 465 Operands.push_back(StoreVal); 466 unsigned CurOp = 2 + NF; 467 468 MVT IndexVT; 469 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 470 /*IsStridedOrIndexed*/ true, Operands, 471 /*IsLoad=*/false, &IndexVT); 472 473 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 474 "Element count mismatch"); 475 476 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 477 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 478 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 479 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 480 static_cast<unsigned>(IndexLMUL)); 481 MachineSDNode *Store = 482 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 483 484 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 485 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 486 487 ReplaceNode(Node, Store); 488 } 489 490 491 void RISCVDAGToDAGISel::Select(SDNode *Node) { 492 // If we have a custom node, we have already selected. 493 if (Node->isMachineOpcode()) { 494 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 495 Node->setNodeId(-1); 496 return; 497 } 498 499 // Instruction Selection not handled by the auto-generated tablegen selection 500 // should be handled here. 501 unsigned Opcode = Node->getOpcode(); 502 MVT XLenVT = Subtarget->getXLenVT(); 503 SDLoc DL(Node); 504 MVT VT = Node->getSimpleValueType(0); 505 506 switch (Opcode) { 507 case ISD::Constant: { 508 auto *ConstNode = cast<ConstantSDNode>(Node); 509 if (VT == XLenVT && ConstNode->isZero()) { 510 SDValue New = 511 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT); 512 ReplaceNode(Node, New.getNode()); 513 return; 514 } 515 int64_t Imm = ConstNode->getSExtValue(); 516 // If the upper XLen-16 bits are not used, try to convert this to a simm12 517 // by sign extending bit 15. 518 if (isUInt<16>(Imm) && isInt<12>(SignExtend64(Imm, 16)) && 519 hasAllHUsers(Node)) 520 Imm = SignExtend64(Imm, 16); 521 // If the upper 32-bits are not used try to convert this into a simm32 by 522 // sign extending bit 32. 523 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 524 Imm = SignExtend64(Imm, 32); 525 526 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); 527 return; 528 } 529 case ISD::FrameIndex: { 530 SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT); 531 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 532 SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); 533 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm)); 534 return; 535 } 536 case ISD::SRL: { 537 // We don't need this transform if zext.h is supported. 538 if (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp()) 539 break; 540 // Optimize (srl (and X, 0xffff), C) -> 541 // (srli (slli X, (XLen-16), (XLen-16) + C) 542 // Taking into account that the 0xffff may have had lower bits unset by 543 // SimplifyDemandedBits. This avoids materializing the 0xffff immediate. 544 // This pattern occurs when type legalizing i16 right shifts. 545 // FIXME: This could be extended to other AND masks. 546 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 547 if (N1C) { 548 uint64_t ShAmt = N1C->getZExtValue(); 549 SDValue N0 = Node->getOperand(0); 550 if (ShAmt < 16 && N0.getOpcode() == ISD::AND && N0.hasOneUse() && 551 isa<ConstantSDNode>(N0.getOperand(1))) { 552 uint64_t Mask = N0.getConstantOperandVal(1); 553 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 554 if (Mask == 0xffff) { 555 unsigned LShAmt = Subtarget->getXLen() - 16; 556 SDNode *SLLI = 557 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 558 CurDAG->getTargetConstant(LShAmt, DL, VT)); 559 SDNode *SRLI = CurDAG->getMachineNode( 560 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 561 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 562 ReplaceNode(Node, SRLI); 563 return; 564 } 565 } 566 } 567 568 break; 569 } 570 case ISD::AND: { 571 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 572 if (!N1C) 573 break; 574 575 SDValue N0 = Node->getOperand(0); 576 577 bool LeftShift = N0.getOpcode() == ISD::SHL; 578 if (!LeftShift && N0.getOpcode() != ISD::SRL) 579 break; 580 581 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 582 if (!C) 583 break; 584 uint64_t C2 = C->getZExtValue(); 585 unsigned XLen = Subtarget->getXLen(); 586 if (!C2 || C2 >= XLen) 587 break; 588 589 uint64_t C1 = N1C->getZExtValue(); 590 591 // Keep track of whether this is a andi, zext.h, or zext.w. 592 bool ZExtOrANDI = isInt<12>(N1C->getSExtValue()); 593 if (C1 == UINT64_C(0xFFFF) && 594 (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) 595 ZExtOrANDI = true; 596 if (C1 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba()) 597 ZExtOrANDI = true; 598 599 // Clear irrelevant bits in the mask. 600 if (LeftShift) 601 C1 &= maskTrailingZeros<uint64_t>(C2); 602 else 603 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 604 605 // Some transforms should only be done if the shift has a single use or 606 // the AND would become (srli (slli X, 32), 32) 607 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 608 609 SDValue X = N0.getOperand(0); 610 611 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 612 // with c3 leading zeros. 613 if (!LeftShift && isMask_64(C1)) { 614 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 615 if (C2 < C3) { 616 // If the number of leading zeros is C2+32 this can be SRLIW. 617 if (C2 + 32 == C3) { 618 SDNode *SRLIW = 619 CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X, 620 CurDAG->getTargetConstant(C2, DL, XLenVT)); 621 ReplaceNode(Node, SRLIW); 622 return; 623 } 624 625 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if 626 // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 627 // 628 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 629 // legalized and goes through DAG combine. 630 SDValue Y; 631 if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() && 632 selectSExti32(X, Y)) { 633 SDNode *SRAIW = 634 CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, Y, 635 CurDAG->getTargetConstant(31, DL, XLenVT)); 636 SDNode *SRLIW = CurDAG->getMachineNode( 637 RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0), 638 CurDAG->getTargetConstant(C3 - 32, DL, XLenVT)); 639 ReplaceNode(Node, SRLIW); 640 return; 641 } 642 643 // (srli (slli x, c3-c2), c3). 644 if (OneUseOrZExtW && !ZExtOrANDI) { 645 SDNode *SLLI = CurDAG->getMachineNode( 646 RISCV::SLLI, DL, XLenVT, X, 647 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 648 SDNode *SRLI = 649 CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), 650 CurDAG->getTargetConstant(C3, DL, XLenVT)); 651 ReplaceNode(Node, SRLI); 652 return; 653 } 654 } 655 } 656 657 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 658 // shifted by c2 bits with c3 leading zeros. 659 if (LeftShift && isShiftedMask_64(C1)) { 660 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 661 662 if (C2 + C3 < XLen && 663 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) { 664 // Use slli.uw when possible. 665 if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) { 666 SDNode *SLLIUW = 667 CurDAG->getMachineNode(RISCV::SLLIUW, DL, XLenVT, X, 668 CurDAG->getTargetConstant(C2, DL, XLenVT)); 669 ReplaceNode(Node, SLLIUW); 670 return; 671 } 672 673 // (srli (slli c2+c3), c3) 674 if (OneUseOrZExtW && !ZExtOrANDI) { 675 SDNode *SLLI = CurDAG->getMachineNode( 676 RISCV::SLLI, DL, XLenVT, X, 677 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 678 SDNode *SRLI = 679 CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), 680 CurDAG->getTargetConstant(C3, DL, XLenVT)); 681 ReplaceNode(Node, SRLI); 682 return; 683 } 684 } 685 } 686 687 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 688 // shifted mask with c2 leading zeros and c3 trailing zeros. 689 if (!LeftShift && isShiftedMask_64(C1)) { 690 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 691 uint64_t C3 = countTrailingZeros(C1); 692 if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !ZExtOrANDI) { 693 SDNode *SRLI = CurDAG->getMachineNode( 694 RISCV::SRLI, DL, XLenVT, X, 695 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 696 SDNode *SLLI = 697 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), 698 CurDAG->getTargetConstant(C3, DL, XLenVT)); 699 ReplaceNode(Node, SLLI); 700 return; 701 } 702 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 703 if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 && 704 OneUseOrZExtW && !ZExtOrANDI) { 705 SDNode *SRLIW = CurDAG->getMachineNode( 706 RISCV::SRLIW, DL, XLenVT, X, 707 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 708 SDNode *SLLI = 709 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), 710 CurDAG->getTargetConstant(C3, DL, XLenVT)); 711 ReplaceNode(Node, SLLI); 712 return; 713 } 714 } 715 716 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 717 // shifted mask with no leading zeros and c3 trailing zeros. 718 if (LeftShift && isShiftedMask_64(C1)) { 719 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 720 uint64_t C3 = countTrailingZeros(C1); 721 if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !ZExtOrANDI) { 722 SDNode *SRLI = CurDAG->getMachineNode( 723 RISCV::SRLI, DL, XLenVT, X, 724 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 725 SDNode *SLLI = 726 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), 727 CurDAG->getTargetConstant(C3, DL, XLenVT)); 728 ReplaceNode(Node, SLLI); 729 return; 730 } 731 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 732 if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !ZExtOrANDI) { 733 SDNode *SRLIW = CurDAG->getMachineNode( 734 RISCV::SRLIW, DL, XLenVT, X, 735 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 736 SDNode *SLLI = 737 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), 738 CurDAG->getTargetConstant(C3, DL, XLenVT)); 739 ReplaceNode(Node, SLLI); 740 return; 741 } 742 } 743 744 break; 745 } 746 case ISD::MUL: { 747 // Special case for calculating (mul (and X, C2), C1) where the full product 748 // fits in XLen bits. We can shift X left by the number of leading zeros in 749 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 750 // product has XLen trailing zeros, putting it in the output of MULHU. This 751 // can avoid materializing a constant in a register for C2. 752 753 // RHS should be a constant. 754 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 755 if (!N1C || !N1C->hasOneUse()) 756 break; 757 758 // LHS should be an AND with constant. 759 SDValue N0 = Node->getOperand(0); 760 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 761 break; 762 763 uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 764 765 // Constant should be a mask. 766 if (!isMask_64(C2)) 767 break; 768 769 // This should be the only use of the AND unless we will use 770 // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND 771 // constants. 772 if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF)) 773 break; 774 775 // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this 776 // optimization. 777 if (isInt<12>(C2) || 778 (C2 == UINT64_C(0xFFFF) && 779 (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) || 780 (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())) 781 break; 782 783 // We need to shift left the AND input and C1 by a total of XLen bits. 784 785 // How far left do we need to shift the AND input? 786 unsigned XLen = Subtarget->getXLen(); 787 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2)); 788 789 // The constant gets shifted by the remaining amount unless that would 790 // shift bits out. 791 uint64_t C1 = N1C->getZExtValue(); 792 unsigned ConstantShift = XLen - LeadingZeros; 793 if (ConstantShift > (XLen - (64 - countLeadingZeros(C1)))) 794 break; 795 796 uint64_t ShiftedC1 = C1 << ConstantShift; 797 // If this RV32, we need to sign extend the constant. 798 if (XLen == 32) 799 ShiftedC1 = SignExtend64(ShiftedC1, 32); 800 801 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 802 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); 803 SDNode *SLLI = 804 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 805 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 806 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 807 SDValue(SLLI, 0), SDValue(Imm, 0)); 808 ReplaceNode(Node, MULHU); 809 return; 810 } 811 case ISD::INTRINSIC_WO_CHAIN: { 812 unsigned IntNo = Node->getConstantOperandVal(0); 813 switch (IntNo) { 814 // By default we do not custom select any intrinsic. 815 default: 816 break; 817 case Intrinsic::riscv_vmsgeu: 818 case Intrinsic::riscv_vmsge: { 819 SDValue Src1 = Node->getOperand(1); 820 SDValue Src2 = Node->getOperand(2); 821 // Only custom select scalar second operand. 822 if (Src2.getValueType() != XLenVT) 823 break; 824 // Small constants are handled with patterns. 825 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 826 int64_t CVal = C->getSExtValue(); 827 if (CVal >= -15 && CVal <= 16) 828 break; 829 } 830 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 831 MVT Src1VT = Src1.getSimpleValueType(); 832 unsigned VMSLTOpcode, VMNANDOpcode; 833 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 834 default: 835 llvm_unreachable("Unexpected LMUL!"); 836 #define CASE_VMSLT_VMNAND_OPCODES(lmulenum, suffix) \ 837 case RISCVII::VLMUL::lmulenum: \ 838 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 839 : RISCV::PseudoVMSLT_VX_##suffix; \ 840 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 841 break; 842 CASE_VMSLT_VMNAND_OPCODES(LMUL_F8, MF8) 843 CASE_VMSLT_VMNAND_OPCODES(LMUL_F4, MF4) 844 CASE_VMSLT_VMNAND_OPCODES(LMUL_F2, MF2) 845 CASE_VMSLT_VMNAND_OPCODES(LMUL_1, M1) 846 CASE_VMSLT_VMNAND_OPCODES(LMUL_2, M2) 847 CASE_VMSLT_VMNAND_OPCODES(LMUL_4, M4) 848 CASE_VMSLT_VMNAND_OPCODES(LMUL_8, M8) 849 #undef CASE_VMSLT_VMNAND_OPCODES 850 } 851 SDValue SEW = CurDAG->getTargetConstant( 852 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 853 SDValue VL; 854 selectVLOp(Node->getOperand(3), VL); 855 856 // Expand to 857 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 858 SDValue Cmp = SDValue( 859 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 860 0); 861 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 862 {Cmp, Cmp, VL, SEW})); 863 return; 864 } 865 case Intrinsic::riscv_vmsgeu_mask: 866 case Intrinsic::riscv_vmsge_mask: { 867 SDValue Src1 = Node->getOperand(2); 868 SDValue Src2 = Node->getOperand(3); 869 // Only custom select scalar second operand. 870 if (Src2.getValueType() != XLenVT) 871 break; 872 // Small constants are handled with patterns. 873 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 874 int64_t CVal = C->getSExtValue(); 875 if (CVal >= -15 && CVal <= 16) 876 break; 877 } 878 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 879 MVT Src1VT = Src1.getSimpleValueType(); 880 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode; 881 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 882 default: 883 llvm_unreachable("Unexpected LMUL!"); 884 #define CASE_VMSLT_OPCODES(lmulenum, suffix) \ 885 case RISCVII::VLMUL::lmulenum: \ 886 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 887 : RISCV::PseudoVMSLT_VX_##suffix; \ 888 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 889 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 890 break; 891 CASE_VMSLT_OPCODES(LMUL_F8, MF8) 892 CASE_VMSLT_OPCODES(LMUL_F4, MF4) 893 CASE_VMSLT_OPCODES(LMUL_F2, MF2) 894 CASE_VMSLT_OPCODES(LMUL_1, M1) 895 CASE_VMSLT_OPCODES(LMUL_2, M2) 896 CASE_VMSLT_OPCODES(LMUL_4, M4) 897 CASE_VMSLT_OPCODES(LMUL_8, M8) 898 #undef CASE_VMSLT_OPCODES 899 } 900 // Mask operations use the LMUL from the mask type. 901 switch (RISCVTargetLowering::getLMUL(VT)) { 902 default: 903 llvm_unreachable("Unexpected LMUL!"); 904 #define CASE_VMXOR_VANDN_OPCODES(lmulenum, suffix) \ 905 case RISCVII::VLMUL::lmulenum: \ 906 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 907 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 908 break; 909 CASE_VMXOR_VANDN_OPCODES(LMUL_F8, MF8) 910 CASE_VMXOR_VANDN_OPCODES(LMUL_F4, MF4) 911 CASE_VMXOR_VANDN_OPCODES(LMUL_F2, MF2) 912 CASE_VMXOR_VANDN_OPCODES(LMUL_1, M1) 913 CASE_VMXOR_VANDN_OPCODES(LMUL_2, M2) 914 CASE_VMXOR_VANDN_OPCODES(LMUL_4, M4) 915 CASE_VMXOR_VANDN_OPCODES(LMUL_8, M8) 916 #undef CASE_VMXOR_VANDN_OPCODES 917 } 918 SDValue SEW = CurDAG->getTargetConstant( 919 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 920 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 921 SDValue VL; 922 selectVLOp(Node->getOperand(5), VL); 923 SDValue MaskedOff = Node->getOperand(1); 924 SDValue Mask = Node->getOperand(4); 925 // If the MaskedOff value and the Mask are the same value use 926 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 927 // This avoids needing to copy v0 to vd before starting the next sequence. 928 if (Mask == MaskedOff) { 929 SDValue Cmp = SDValue( 930 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 931 0); 932 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 933 {Mask, Cmp, VL, MaskSEW})); 934 return; 935 } 936 937 // Mask needs to be copied to V0. 938 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 939 RISCV::V0, Mask, SDValue()); 940 SDValue Glue = Chain.getValue(1); 941 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); 942 943 // Otherwise use 944 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 945 SDValue Cmp = SDValue( 946 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 947 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 948 0); 949 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 950 {Cmp, Mask, VL, MaskSEW})); 951 return; 952 } 953 } 954 break; 955 } 956 case ISD::INTRINSIC_W_CHAIN: { 957 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 958 switch (IntNo) { 959 // By default we do not custom select any intrinsic. 960 default: 961 break; 962 963 case Intrinsic::riscv_vsetvli: 964 case Intrinsic::riscv_vsetvlimax: { 965 if (!Subtarget->hasVInstructions()) 966 break; 967 968 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax; 969 unsigned Offset = VLMax ? 2 : 3; 970 971 assert(Node->getNumOperands() == Offset + 2 && 972 "Unexpected number of operands"); 973 974 unsigned SEW = 975 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 976 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( 977 Node->getConstantOperandVal(Offset + 1) & 0x7); 978 979 unsigned VTypeI = RISCVVType::encodeVTYPE( 980 VLMul, SEW, /*TailAgnostic*/ true, /*MaskAgnostic*/ false); 981 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 982 983 SDValue VLOperand; 984 unsigned Opcode = RISCV::PseudoVSETVLI; 985 if (VLMax) { 986 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 987 Opcode = RISCV::PseudoVSETVLIX0; 988 } else { 989 VLOperand = Node->getOperand(2); 990 991 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 992 uint64_t AVL = C->getZExtValue(); 993 if (isUInt<5>(AVL)) { 994 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 995 ReplaceNode( 996 Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, XLenVT, 997 MVT::Other, VLImm, VTypeIOp, 998 /* Chain */ Node->getOperand(0))); 999 return; 1000 } 1001 } 1002 } 1003 1004 ReplaceNode(Node, 1005 CurDAG->getMachineNode(Opcode, DL, XLenVT, 1006 MVT::Other, VLOperand, VTypeIOp, 1007 /* Chain */ Node->getOperand(0))); 1008 return; 1009 } 1010 case Intrinsic::riscv_vlseg2: 1011 case Intrinsic::riscv_vlseg3: 1012 case Intrinsic::riscv_vlseg4: 1013 case Intrinsic::riscv_vlseg5: 1014 case Intrinsic::riscv_vlseg6: 1015 case Intrinsic::riscv_vlseg7: 1016 case Intrinsic::riscv_vlseg8: { 1017 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1018 return; 1019 } 1020 case Intrinsic::riscv_vlseg2_mask: 1021 case Intrinsic::riscv_vlseg3_mask: 1022 case Intrinsic::riscv_vlseg4_mask: 1023 case Intrinsic::riscv_vlseg5_mask: 1024 case Intrinsic::riscv_vlseg6_mask: 1025 case Intrinsic::riscv_vlseg7_mask: 1026 case Intrinsic::riscv_vlseg8_mask: { 1027 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1028 return; 1029 } 1030 case Intrinsic::riscv_vlsseg2: 1031 case Intrinsic::riscv_vlsseg3: 1032 case Intrinsic::riscv_vlsseg4: 1033 case Intrinsic::riscv_vlsseg5: 1034 case Intrinsic::riscv_vlsseg6: 1035 case Intrinsic::riscv_vlsseg7: 1036 case Intrinsic::riscv_vlsseg8: { 1037 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1038 return; 1039 } 1040 case Intrinsic::riscv_vlsseg2_mask: 1041 case Intrinsic::riscv_vlsseg3_mask: 1042 case Intrinsic::riscv_vlsseg4_mask: 1043 case Intrinsic::riscv_vlsseg5_mask: 1044 case Intrinsic::riscv_vlsseg6_mask: 1045 case Intrinsic::riscv_vlsseg7_mask: 1046 case Intrinsic::riscv_vlsseg8_mask: { 1047 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1048 return; 1049 } 1050 case Intrinsic::riscv_vloxseg2: 1051 case Intrinsic::riscv_vloxseg3: 1052 case Intrinsic::riscv_vloxseg4: 1053 case Intrinsic::riscv_vloxseg5: 1054 case Intrinsic::riscv_vloxseg6: 1055 case Intrinsic::riscv_vloxseg7: 1056 case Intrinsic::riscv_vloxseg8: 1057 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1058 return; 1059 case Intrinsic::riscv_vluxseg2: 1060 case Intrinsic::riscv_vluxseg3: 1061 case Intrinsic::riscv_vluxseg4: 1062 case Intrinsic::riscv_vluxseg5: 1063 case Intrinsic::riscv_vluxseg6: 1064 case Intrinsic::riscv_vluxseg7: 1065 case Intrinsic::riscv_vluxseg8: 1066 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1067 return; 1068 case Intrinsic::riscv_vloxseg2_mask: 1069 case Intrinsic::riscv_vloxseg3_mask: 1070 case Intrinsic::riscv_vloxseg4_mask: 1071 case Intrinsic::riscv_vloxseg5_mask: 1072 case Intrinsic::riscv_vloxseg6_mask: 1073 case Intrinsic::riscv_vloxseg7_mask: 1074 case Intrinsic::riscv_vloxseg8_mask: 1075 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1076 return; 1077 case Intrinsic::riscv_vluxseg2_mask: 1078 case Intrinsic::riscv_vluxseg3_mask: 1079 case Intrinsic::riscv_vluxseg4_mask: 1080 case Intrinsic::riscv_vluxseg5_mask: 1081 case Intrinsic::riscv_vluxseg6_mask: 1082 case Intrinsic::riscv_vluxseg7_mask: 1083 case Intrinsic::riscv_vluxseg8_mask: 1084 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1085 return; 1086 case Intrinsic::riscv_vlseg8ff: 1087 case Intrinsic::riscv_vlseg7ff: 1088 case Intrinsic::riscv_vlseg6ff: 1089 case Intrinsic::riscv_vlseg5ff: 1090 case Intrinsic::riscv_vlseg4ff: 1091 case Intrinsic::riscv_vlseg3ff: 1092 case Intrinsic::riscv_vlseg2ff: { 1093 selectVLSEGFF(Node, /*IsMasked*/ false); 1094 return; 1095 } 1096 case Intrinsic::riscv_vlseg8ff_mask: 1097 case Intrinsic::riscv_vlseg7ff_mask: 1098 case Intrinsic::riscv_vlseg6ff_mask: 1099 case Intrinsic::riscv_vlseg5ff_mask: 1100 case Intrinsic::riscv_vlseg4ff_mask: 1101 case Intrinsic::riscv_vlseg3ff_mask: 1102 case Intrinsic::riscv_vlseg2ff_mask: { 1103 selectVLSEGFF(Node, /*IsMasked*/ true); 1104 return; 1105 } 1106 case Intrinsic::riscv_vloxei: 1107 case Intrinsic::riscv_vloxei_mask: 1108 case Intrinsic::riscv_vluxei: 1109 case Intrinsic::riscv_vluxei_mask: { 1110 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 1111 IntNo == Intrinsic::riscv_vluxei_mask; 1112 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 1113 IntNo == Intrinsic::riscv_vloxei_mask; 1114 1115 MVT VT = Node->getSimpleValueType(0); 1116 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1117 1118 unsigned CurOp = 2; 1119 SmallVector<SDValue, 8> Operands; 1120 if (IsMasked) 1121 Operands.push_back(Node->getOperand(CurOp++)); 1122 1123 MVT IndexVT; 1124 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1125 /*IsStridedOrIndexed*/ true, Operands, 1126 /*IsLoad=*/true, &IndexVT); 1127 1128 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1129 "Element count mismatch"); 1130 1131 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1132 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1133 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1134 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 1135 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1136 static_cast<unsigned>(IndexLMUL)); 1137 MachineSDNode *Load = 1138 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1139 1140 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1141 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1142 1143 ReplaceNode(Node, Load); 1144 return; 1145 } 1146 case Intrinsic::riscv_vlm: 1147 case Intrinsic::riscv_vle: 1148 case Intrinsic::riscv_vle_mask: 1149 case Intrinsic::riscv_vlse: 1150 case Intrinsic::riscv_vlse_mask: { 1151 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 1152 IntNo == Intrinsic::riscv_vlse_mask; 1153 bool IsStrided = 1154 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 1155 1156 MVT VT = Node->getSimpleValueType(0); 1157 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1158 1159 unsigned CurOp = 2; 1160 SmallVector<SDValue, 8> Operands; 1161 if (IsMasked) 1162 Operands.push_back(Node->getOperand(CurOp++)); 1163 1164 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1165 Operands, /*IsLoad=*/true); 1166 1167 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1168 const RISCV::VLEPseudo *P = 1169 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW, 1170 static_cast<unsigned>(LMUL)); 1171 MachineSDNode *Load = 1172 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1173 1174 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1175 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1176 1177 ReplaceNode(Node, Load); 1178 return; 1179 } 1180 case Intrinsic::riscv_vleff: 1181 case Intrinsic::riscv_vleff_mask: { 1182 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 1183 1184 MVT VT = Node->getSimpleValueType(0); 1185 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1186 1187 unsigned CurOp = 2; 1188 SmallVector<SDValue, 7> Operands; 1189 if (IsMasked) 1190 Operands.push_back(Node->getOperand(CurOp++)); 1191 1192 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1193 /*IsStridedOrIndexed*/ false, Operands, 1194 /*IsLoad=*/true); 1195 1196 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1197 const RISCV::VLEPseudo *P = 1198 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true, Log2SEW, 1199 static_cast<unsigned>(LMUL)); 1200 MachineSDNode *Load = 1201 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), 1202 MVT::Other, MVT::Glue, Operands); 1203 SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, 1204 /*Glue*/ SDValue(Load, 2)); 1205 1206 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1207 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1208 1209 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); 1210 ReplaceUses(SDValue(Node, 1), SDValue(ReadVL, 0)); // VL 1211 ReplaceUses(SDValue(Node, 2), SDValue(Load, 1)); // Chain 1212 CurDAG->RemoveDeadNode(Node); 1213 return; 1214 } 1215 } 1216 break; 1217 } 1218 case ISD::INTRINSIC_VOID: { 1219 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1220 switch (IntNo) { 1221 case Intrinsic::riscv_vsseg2: 1222 case Intrinsic::riscv_vsseg3: 1223 case Intrinsic::riscv_vsseg4: 1224 case Intrinsic::riscv_vsseg5: 1225 case Intrinsic::riscv_vsseg6: 1226 case Intrinsic::riscv_vsseg7: 1227 case Intrinsic::riscv_vsseg8: { 1228 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1229 return; 1230 } 1231 case Intrinsic::riscv_vsseg2_mask: 1232 case Intrinsic::riscv_vsseg3_mask: 1233 case Intrinsic::riscv_vsseg4_mask: 1234 case Intrinsic::riscv_vsseg5_mask: 1235 case Intrinsic::riscv_vsseg6_mask: 1236 case Intrinsic::riscv_vsseg7_mask: 1237 case Intrinsic::riscv_vsseg8_mask: { 1238 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1239 return; 1240 } 1241 case Intrinsic::riscv_vssseg2: 1242 case Intrinsic::riscv_vssseg3: 1243 case Intrinsic::riscv_vssseg4: 1244 case Intrinsic::riscv_vssseg5: 1245 case Intrinsic::riscv_vssseg6: 1246 case Intrinsic::riscv_vssseg7: 1247 case Intrinsic::riscv_vssseg8: { 1248 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1249 return; 1250 } 1251 case Intrinsic::riscv_vssseg2_mask: 1252 case Intrinsic::riscv_vssseg3_mask: 1253 case Intrinsic::riscv_vssseg4_mask: 1254 case Intrinsic::riscv_vssseg5_mask: 1255 case Intrinsic::riscv_vssseg6_mask: 1256 case Intrinsic::riscv_vssseg7_mask: 1257 case Intrinsic::riscv_vssseg8_mask: { 1258 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1259 return; 1260 } 1261 case Intrinsic::riscv_vsoxseg2: 1262 case Intrinsic::riscv_vsoxseg3: 1263 case Intrinsic::riscv_vsoxseg4: 1264 case Intrinsic::riscv_vsoxseg5: 1265 case Intrinsic::riscv_vsoxseg6: 1266 case Intrinsic::riscv_vsoxseg7: 1267 case Intrinsic::riscv_vsoxseg8: 1268 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1269 return; 1270 case Intrinsic::riscv_vsuxseg2: 1271 case Intrinsic::riscv_vsuxseg3: 1272 case Intrinsic::riscv_vsuxseg4: 1273 case Intrinsic::riscv_vsuxseg5: 1274 case Intrinsic::riscv_vsuxseg6: 1275 case Intrinsic::riscv_vsuxseg7: 1276 case Intrinsic::riscv_vsuxseg8: 1277 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1278 return; 1279 case Intrinsic::riscv_vsoxseg2_mask: 1280 case Intrinsic::riscv_vsoxseg3_mask: 1281 case Intrinsic::riscv_vsoxseg4_mask: 1282 case Intrinsic::riscv_vsoxseg5_mask: 1283 case Intrinsic::riscv_vsoxseg6_mask: 1284 case Intrinsic::riscv_vsoxseg7_mask: 1285 case Intrinsic::riscv_vsoxseg8_mask: 1286 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1287 return; 1288 case Intrinsic::riscv_vsuxseg2_mask: 1289 case Intrinsic::riscv_vsuxseg3_mask: 1290 case Intrinsic::riscv_vsuxseg4_mask: 1291 case Intrinsic::riscv_vsuxseg5_mask: 1292 case Intrinsic::riscv_vsuxseg6_mask: 1293 case Intrinsic::riscv_vsuxseg7_mask: 1294 case Intrinsic::riscv_vsuxseg8_mask: 1295 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1296 return; 1297 case Intrinsic::riscv_vsoxei: 1298 case Intrinsic::riscv_vsoxei_mask: 1299 case Intrinsic::riscv_vsuxei: 1300 case Intrinsic::riscv_vsuxei_mask: { 1301 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 1302 IntNo == Intrinsic::riscv_vsuxei_mask; 1303 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 1304 IntNo == Intrinsic::riscv_vsoxei_mask; 1305 1306 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1307 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1308 1309 unsigned CurOp = 2; 1310 SmallVector<SDValue, 8> Operands; 1311 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1312 1313 MVT IndexVT; 1314 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1315 /*IsStridedOrIndexed*/ true, Operands, 1316 /*IsLoad=*/false, &IndexVT); 1317 1318 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1319 "Element count mismatch"); 1320 1321 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1322 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1323 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1324 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 1325 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1326 static_cast<unsigned>(IndexLMUL)); 1327 MachineSDNode *Store = 1328 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1329 1330 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1331 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1332 1333 ReplaceNode(Node, Store); 1334 return; 1335 } 1336 case Intrinsic::riscv_vsm: 1337 case Intrinsic::riscv_vse: 1338 case Intrinsic::riscv_vse_mask: 1339 case Intrinsic::riscv_vsse: 1340 case Intrinsic::riscv_vsse_mask: { 1341 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 1342 IntNo == Intrinsic::riscv_vsse_mask; 1343 bool IsStrided = 1344 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 1345 1346 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1347 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1348 1349 unsigned CurOp = 2; 1350 SmallVector<SDValue, 8> Operands; 1351 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1352 1353 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1354 Operands); 1355 1356 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1357 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 1358 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 1359 MachineSDNode *Store = 1360 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1361 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1362 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1363 1364 ReplaceNode(Node, Store); 1365 return; 1366 } 1367 } 1368 break; 1369 } 1370 case ISD::BITCAST: { 1371 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 1372 // Just drop bitcasts between vectors if both are fixed or both are 1373 // scalable. 1374 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 1375 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 1376 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 1377 CurDAG->RemoveDeadNode(Node); 1378 return; 1379 } 1380 break; 1381 } 1382 case ISD::INSERT_SUBVECTOR: { 1383 SDValue V = Node->getOperand(0); 1384 SDValue SubV = Node->getOperand(1); 1385 SDLoc DL(SubV); 1386 auto Idx = Node->getConstantOperandVal(2); 1387 MVT SubVecVT = SubV.getSimpleValueType(); 1388 1389 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1390 MVT SubVecContainerVT = SubVecVT; 1391 // Establish the correct scalable-vector types for any fixed-length type. 1392 if (SubVecVT.isFixedLengthVector()) 1393 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 1394 if (VT.isFixedLengthVector()) 1395 VT = TLI.getContainerForFixedLengthVector(VT); 1396 1397 const auto *TRI = Subtarget->getRegisterInfo(); 1398 unsigned SubRegIdx; 1399 std::tie(SubRegIdx, Idx) = 1400 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1401 VT, SubVecContainerVT, Idx, TRI); 1402 1403 // If the Idx hasn't been completely eliminated then this is a subvector 1404 // insert which doesn't naturally align to a vector register. These must 1405 // be handled using instructions to manipulate the vector registers. 1406 if (Idx != 0) 1407 break; 1408 1409 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); 1410 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 1411 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 1412 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 1413 (void)IsSubVecPartReg; // Silence unused variable warning without asserts. 1414 assert((!IsSubVecPartReg || V.isUndef()) && 1415 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 1416 "the subvector is smaller than a full-sized register"); 1417 1418 // If we haven't set a SubRegIdx, then we must be going between 1419 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 1420 if (SubRegIdx == RISCV::NoSubRegister) { 1421 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT); 1422 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1423 InRegClassID && 1424 "Unexpected subvector extraction"); 1425 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1426 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 1427 DL, VT, SubV, RC); 1428 ReplaceNode(Node, NewNode); 1429 return; 1430 } 1431 1432 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 1433 ReplaceNode(Node, Insert.getNode()); 1434 return; 1435 } 1436 case ISD::EXTRACT_SUBVECTOR: { 1437 SDValue V = Node->getOperand(0); 1438 auto Idx = Node->getConstantOperandVal(1); 1439 MVT InVT = V.getSimpleValueType(); 1440 SDLoc DL(V); 1441 1442 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1443 MVT SubVecContainerVT = VT; 1444 // Establish the correct scalable-vector types for any fixed-length type. 1445 if (VT.isFixedLengthVector()) 1446 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 1447 if (InVT.isFixedLengthVector()) 1448 InVT = TLI.getContainerForFixedLengthVector(InVT); 1449 1450 const auto *TRI = Subtarget->getRegisterInfo(); 1451 unsigned SubRegIdx; 1452 std::tie(SubRegIdx, Idx) = 1453 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1454 InVT, SubVecContainerVT, Idx, TRI); 1455 1456 // If the Idx hasn't been completely eliminated then this is a subvector 1457 // extract which doesn't naturally align to a vector register. These must 1458 // be handled using instructions to manipulate the vector registers. 1459 if (Idx != 0) 1460 break; 1461 1462 // If we haven't set a SubRegIdx, then we must be going between 1463 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 1464 if (SubRegIdx == RISCV::NoSubRegister) { 1465 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 1466 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1467 InRegClassID && 1468 "Unexpected subvector extraction"); 1469 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1470 SDNode *NewNode = 1471 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 1472 ReplaceNode(Node, NewNode); 1473 return; 1474 } 1475 1476 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 1477 ReplaceNode(Node, Extract.getNode()); 1478 return; 1479 } 1480 case ISD::SPLAT_VECTOR: 1481 case RISCVISD::VMV_V_X_VL: 1482 case RISCVISD::VFMV_V_F_VL: { 1483 // Try to match splat of a scalar load to a strided load with stride of x0. 1484 SDValue Src = Node->getOperand(0); 1485 auto *Ld = dyn_cast<LoadSDNode>(Src); 1486 if (!Ld) 1487 break; 1488 EVT MemVT = Ld->getMemoryVT(); 1489 // The memory VT should be the same size as the element type. 1490 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 1491 break; 1492 if (!IsProfitableToFold(Src, Node, Node) || 1493 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 1494 break; 1495 1496 SDValue VL; 1497 if (Node->getOpcode() == ISD::SPLAT_VECTOR) 1498 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); 1499 else 1500 selectVLOp(Node->getOperand(1), VL); 1501 1502 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1503 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 1504 1505 SDValue Operands[] = {Ld->getBasePtr(), 1506 CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW, 1507 Ld->getChain()}; 1508 1509 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1510 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 1511 /*IsMasked*/ false, /*IsStrided*/ true, /*FF*/ false, Log2SEW, 1512 static_cast<unsigned>(LMUL)); 1513 MachineSDNode *Load = 1514 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1515 1516 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1517 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1518 1519 ReplaceNode(Node, Load); 1520 return; 1521 } 1522 } 1523 1524 // Select the default instruction. 1525 SelectCode(Node); 1526 } 1527 1528 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 1529 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 1530 switch (ConstraintID) { 1531 case InlineAsm::Constraint_m: 1532 // We just support simple memory operands that have a single address 1533 // operand and need no special handling. 1534 OutOps.push_back(Op); 1535 return false; 1536 case InlineAsm::Constraint_A: 1537 OutOps.push_back(Op); 1538 return false; 1539 default: 1540 break; 1541 } 1542 1543 return true; 1544 } 1545 1546 bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) { 1547 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 1548 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1549 return true; 1550 } 1551 return false; 1552 } 1553 1554 bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { 1555 // If this is FrameIndex, select it directly. Otherwise just let it get 1556 // selected to a register independently. 1557 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) 1558 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1559 else 1560 Base = Addr; 1561 return true; 1562 } 1563 1564 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 1565 SDValue &ShAmt) { 1566 // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift 1567 // amount. If there is an AND on the shift amount, we can bypass it if it 1568 // doesn't affect any of those bits. 1569 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 1570 const APInt &AndMask = N->getConstantOperandAPInt(1); 1571 1572 // Since the max shift amount is a power of 2 we can subtract 1 to make a 1573 // mask that covers the bits needed to represent all shift amounts. 1574 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 1575 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 1576 1577 if (ShMask.isSubsetOf(AndMask)) { 1578 ShAmt = N.getOperand(0); 1579 return true; 1580 } 1581 1582 // SimplifyDemandedBits may have optimized the mask so try restoring any 1583 // bits that are known zero. 1584 KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0)); 1585 if (ShMask.isSubsetOf(AndMask | Known.Zero)) { 1586 ShAmt = N.getOperand(0); 1587 return true; 1588 } 1589 } 1590 1591 ShAmt = N; 1592 return true; 1593 } 1594 1595 bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { 1596 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 1597 cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { 1598 Val = N.getOperand(0); 1599 return true; 1600 } 1601 MVT VT = N.getSimpleValueType(); 1602 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { 1603 Val = N; 1604 return true; 1605 } 1606 1607 return false; 1608 } 1609 1610 bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { 1611 if (N.getOpcode() == ISD::AND) { 1612 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1613 if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) { 1614 Val = N.getOperand(0); 1615 return true; 1616 } 1617 } 1618 MVT VT = N.getSimpleValueType(); 1619 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32); 1620 if (CurDAG->MaskedValueIsZero(N, Mask)) { 1621 Val = N; 1622 return true; 1623 } 1624 1625 return false; 1626 } 1627 1628 // Return true if all users of this SDNode* only consume the lower \p Bits. 1629 // This can be used to form W instructions for add/sub/mul/shl even when the 1630 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 1631 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 1632 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 1633 // the add/sub/mul/shl to become non-W instructions. By checking the users we 1634 // may be able to use a W instruction and CSE with the other instruction if 1635 // this has happened. We could try to detect that the CSE opportunity exists 1636 // before doing this, but that would be more complicated. 1637 // TODO: Does this need to look through AND/OR/XOR to their users to find more 1638 // opportunities. 1639 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { 1640 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 1641 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 1642 Node->getOpcode() == ISD::SRL || 1643 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 1644 isa<ConstantSDNode>(Node)) && 1645 "Unexpected opcode"); 1646 1647 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 1648 SDNode *User = *UI; 1649 // Users of this node should have already been instruction selected 1650 if (!User->isMachineOpcode()) 1651 return false; 1652 1653 // TODO: Add more opcodes? 1654 switch (User->getMachineOpcode()) { 1655 default: 1656 return false; 1657 case RISCV::ADDW: 1658 case RISCV::ADDIW: 1659 case RISCV::SUBW: 1660 case RISCV::MULW: 1661 case RISCV::SLLW: 1662 case RISCV::SLLIW: 1663 case RISCV::SRAW: 1664 case RISCV::SRAIW: 1665 case RISCV::SRLW: 1666 case RISCV::SRLIW: 1667 case RISCV::DIVW: 1668 case RISCV::DIVUW: 1669 case RISCV::REMW: 1670 case RISCV::REMUW: 1671 case RISCV::ROLW: 1672 case RISCV::RORW: 1673 case RISCV::RORIW: 1674 case RISCV::CLZW: 1675 case RISCV::CTZW: 1676 case RISCV::CPOPW: 1677 case RISCV::SLLIUW: 1678 case RISCV::FCVT_H_W: 1679 case RISCV::FCVT_H_WU: 1680 case RISCV::FCVT_S_W: 1681 case RISCV::FCVT_S_WU: 1682 case RISCV::FCVT_D_W: 1683 case RISCV::FCVT_D_WU: 1684 if (Bits < 32) 1685 return false; 1686 break; 1687 case RISCV::SLLI: 1688 // SLLI only uses the lower (XLen - ShAmt) bits. 1689 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1)) 1690 return false; 1691 break; 1692 case RISCV::ANDI: 1693 if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1)))) 1694 return false; 1695 break; 1696 case RISCV::SEXTB: 1697 if (Bits < 8) 1698 return false; 1699 break; 1700 case RISCV::SEXTH: 1701 case RISCV::ZEXTH_RV32: 1702 case RISCV::ZEXTH_RV64: 1703 if (Bits < 16) 1704 return false; 1705 break; 1706 case RISCV::ADDUW: 1707 case RISCV::SH1ADDUW: 1708 case RISCV::SH2ADDUW: 1709 case RISCV::SH3ADDUW: 1710 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 1711 // 32 bits. 1712 if (UI.getOperandNo() != 0 || Bits < 32) 1713 return false; 1714 break; 1715 case RISCV::SB: 1716 if (UI.getOperandNo() != 0 || Bits < 8) 1717 return false; 1718 break; 1719 case RISCV::SH: 1720 if (UI.getOperandNo() != 0 || Bits < 16) 1721 return false; 1722 break; 1723 case RISCV::SW: 1724 if (UI.getOperandNo() != 0 || Bits < 32) 1725 return false; 1726 break; 1727 } 1728 } 1729 1730 return true; 1731 } 1732 1733 // Select VL as a 5 bit immediate or a value that will become a register. This 1734 // allows us to choose betwen VSETIVLI or VSETVLI later. 1735 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 1736 auto *C = dyn_cast<ConstantSDNode>(N); 1737 if (C && isUInt<5>(C->getZExtValue())) 1738 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 1739 N->getValueType(0)); 1740 else 1741 VL = N; 1742 1743 return true; 1744 } 1745 1746 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 1747 if (N.getOpcode() != ISD::SPLAT_VECTOR && 1748 N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && 1749 N.getOpcode() != RISCVISD::VMV_V_X_VL) 1750 return false; 1751 SplatVal = N.getOperand(0); 1752 return true; 1753 } 1754 1755 using ValidateFn = bool (*)(int64_t); 1756 1757 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, 1758 SelectionDAG &DAG, 1759 const RISCVSubtarget &Subtarget, 1760 ValidateFn ValidateImm) { 1761 if ((N.getOpcode() != ISD::SPLAT_VECTOR && 1762 N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && 1763 N.getOpcode() != RISCVISD::VMV_V_X_VL) || 1764 !isa<ConstantSDNode>(N.getOperand(0))) 1765 return false; 1766 1767 int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue(); 1768 1769 // ISD::SPLAT_VECTOR, RISCVISD::SPLAT_VECTOR_I64 and RISCVISD::VMV_V_X_VL 1770 // share semantics when the operand type is wider than the resulting vector 1771 // element type: an implicit truncation first takes place. Therefore, perform 1772 // a manual truncation/sign-extension in order to ignore any truncated bits 1773 // and catch any zero-extended immediate. 1774 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 1775 // sign-extending to (XLenVT -1). 1776 MVT XLenVT = Subtarget.getXLenVT(); 1777 assert(XLenVT == N.getOperand(0).getSimpleValueType() && 1778 "Unexpected splat operand type"); 1779 MVT EltVT = N.getSimpleValueType().getVectorElementType(); 1780 if (EltVT.bitsLT(XLenVT)) 1781 SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits()); 1782 1783 if (!ValidateImm(SplatImm)) 1784 return false; 1785 1786 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT); 1787 return true; 1788 } 1789 1790 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 1791 return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget, 1792 [](int64_t Imm) { return isInt<5>(Imm); }); 1793 } 1794 1795 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 1796 return selectVSplatSimmHelper( 1797 N, SplatVal, *CurDAG, *Subtarget, 1798 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); 1799 } 1800 1801 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 1802 SDValue &SplatVal) { 1803 return selectVSplatSimmHelper( 1804 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { 1805 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 1806 }); 1807 } 1808 1809 bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { 1810 if ((N.getOpcode() != ISD::SPLAT_VECTOR && 1811 N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && 1812 N.getOpcode() != RISCVISD::VMV_V_X_VL) || 1813 !isa<ConstantSDNode>(N.getOperand(0))) 1814 return false; 1815 1816 int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue(); 1817 1818 if (!isUInt<5>(SplatImm)) 1819 return false; 1820 1821 SplatVal = 1822 CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT()); 1823 1824 return true; 1825 } 1826 1827 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 1828 SDValue &Imm) { 1829 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 1830 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 1831 1832 if (!isInt<5>(ImmVal)) 1833 return false; 1834 1835 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); 1836 return true; 1837 } 1838 1839 return false; 1840 } 1841 1842 // Merge an ADDI into the offset of a load/store instruction where possible. 1843 // (load (addi base, off1), off2) -> (load base, off1+off2) 1844 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) 1845 // This is possible when off1+off2 fits a 12-bit immediate. 1846 bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { 1847 int OffsetOpIdx; 1848 int BaseOpIdx; 1849 1850 // Only attempt this optimisation for I-type loads and S-type stores. 1851 switch (N->getMachineOpcode()) { 1852 default: 1853 return false; 1854 case RISCV::LB: 1855 case RISCV::LH: 1856 case RISCV::LW: 1857 case RISCV::LBU: 1858 case RISCV::LHU: 1859 case RISCV::LWU: 1860 case RISCV::LD: 1861 case RISCV::FLH: 1862 case RISCV::FLW: 1863 case RISCV::FLD: 1864 BaseOpIdx = 0; 1865 OffsetOpIdx = 1; 1866 break; 1867 case RISCV::SB: 1868 case RISCV::SH: 1869 case RISCV::SW: 1870 case RISCV::SD: 1871 case RISCV::FSH: 1872 case RISCV::FSW: 1873 case RISCV::FSD: 1874 BaseOpIdx = 1; 1875 OffsetOpIdx = 2; 1876 break; 1877 } 1878 1879 if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx))) 1880 return false; 1881 1882 SDValue Base = N->getOperand(BaseOpIdx); 1883 1884 // If the base is an ADDI, we can merge it in to the load/store. 1885 if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI) 1886 return false; 1887 1888 SDValue ImmOperand = Base.getOperand(1); 1889 uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); 1890 1891 if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) { 1892 int64_t Offset1 = Const->getSExtValue(); 1893 int64_t CombinedOffset = Offset1 + Offset2; 1894 if (!isInt<12>(CombinedOffset)) 1895 return false; 1896 ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand), 1897 ImmOperand.getValueType()); 1898 } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) { 1899 // If the off1 in (addi base, off1) is a global variable's address (its 1900 // low part, really), then we can rely on the alignment of that variable 1901 // to provide a margin of safety before off1 can overflow the 12 bits. 1902 // Check if off2 falls within that margin; if so off1+off2 can't overflow. 1903 const DataLayout &DL = CurDAG->getDataLayout(); 1904 Align Alignment = GA->getGlobal()->getPointerAlignment(DL); 1905 if (Offset2 != 0 && Alignment <= Offset2) 1906 return false; 1907 int64_t Offset1 = GA->getOffset(); 1908 int64_t CombinedOffset = Offset1 + Offset2; 1909 ImmOperand = CurDAG->getTargetGlobalAddress( 1910 GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), 1911 CombinedOffset, GA->getTargetFlags()); 1912 } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) { 1913 // Ditto. 1914 Align Alignment = CP->getAlign(); 1915 if (Offset2 != 0 && Alignment <= Offset2) 1916 return false; 1917 int64_t Offset1 = CP->getOffset(); 1918 int64_t CombinedOffset = Offset1 + Offset2; 1919 ImmOperand = CurDAG->getTargetConstantPool( 1920 CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(), 1921 CombinedOffset, CP->getTargetFlags()); 1922 } else { 1923 return false; 1924 } 1925 1926 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 1927 LLVM_DEBUG(Base->dump(CurDAG)); 1928 LLVM_DEBUG(dbgs() << "\nN: "); 1929 LLVM_DEBUG(N->dump(CurDAG)); 1930 LLVM_DEBUG(dbgs() << "\n"); 1931 1932 // Modify the offset operand of the load/store. 1933 if (BaseOpIdx == 0) // Load 1934 CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, 1935 N->getOperand(2)); 1936 else // Store 1937 CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), 1938 ImmOperand, N->getOperand(3)); 1939 1940 return true; 1941 } 1942 1943 // Try to remove sext.w if the input is a W instruction or can be made into 1944 // a W instruction cheaply. 1945 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 1946 // Look for the sext.w pattern, addiw rd, rs1, 0. 1947 if (N->getMachineOpcode() != RISCV::ADDIW || 1948 !isNullConstant(N->getOperand(1))) 1949 return false; 1950 1951 SDValue N0 = N->getOperand(0); 1952 if (!N0.isMachineOpcode()) 1953 return false; 1954 1955 switch (N0.getMachineOpcode()) { 1956 default: 1957 break; 1958 case RISCV::ADD: 1959 case RISCV::ADDI: 1960 case RISCV::SUB: 1961 case RISCV::MUL: 1962 case RISCV::SLLI: { 1963 // Convert sext.w+add/sub/mul to their W instructions. This will create 1964 // a new independent instruction. This improves latency. 1965 unsigned Opc; 1966 switch (N0.getMachineOpcode()) { 1967 default: 1968 llvm_unreachable("Unexpected opcode!"); 1969 case RISCV::ADD: Opc = RISCV::ADDW; break; 1970 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 1971 case RISCV::SUB: Opc = RISCV::SUBW; break; 1972 case RISCV::MUL: Opc = RISCV::MULW; break; 1973 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 1974 } 1975 1976 SDValue N00 = N0.getOperand(0); 1977 SDValue N01 = N0.getOperand(1); 1978 1979 // Shift amount needs to be uimm5. 1980 if (N0.getMachineOpcode() == RISCV::SLLI && 1981 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 1982 break; 1983 1984 SDNode *Result = 1985 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 1986 N00, N01); 1987 ReplaceUses(N, Result); 1988 return true; 1989 } 1990 case RISCV::ADDW: 1991 case RISCV::ADDIW: 1992 case RISCV::SUBW: 1993 case RISCV::MULW: 1994 case RISCV::SLLIW: 1995 // Result is already sign extended just remove the sext.w. 1996 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 1997 ReplaceUses(N, N0.getNode()); 1998 return true; 1999 } 2000 2001 return false; 2002 } 2003 2004 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 2005 // for instruction scheduling. 2006 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) { 2007 return new RISCVDAGToDAGISel(TM); 2008 } 2009