1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISCV target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVMCTargetDesc.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCVISelLowering.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/IR/IntrinsicsRISCV.h" 20 #include "llvm/Support/Alignment.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/KnownBits.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "riscv-isel" 29 30 namespace llvm { 31 namespace RISCV { 32 #define GET_RISCVVSSEGTable_IMPL 33 #define GET_RISCVVLSEGTable_IMPL 34 #define GET_RISCVVLXSEGTable_IMPL 35 #define GET_RISCVVSXSEGTable_IMPL 36 #define GET_RISCVVLETable_IMPL 37 #define GET_RISCVVSETable_IMPL 38 #define GET_RISCVVLXTable_IMPL 39 #define GET_RISCVVSXTable_IMPL 40 #include "RISCVGenSearchableTables.inc" 41 } // namespace RISCV 42 } // namespace llvm 43 44 void RISCVDAGToDAGISel::PreprocessISelDAG() { 45 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 46 E = CurDAG->allnodes_end(); 47 I != E;) { 48 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 49 50 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 51 // load. Done after lowering and combining so that we have a chance to 52 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 53 if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) 54 continue; 55 56 assert(N->getNumOperands() == 3 && "Unexpected number of operands"); 57 MVT VT = N->getSimpleValueType(0); 58 SDValue Lo = N->getOperand(0); 59 SDValue Hi = N->getOperand(1); 60 SDValue VL = N->getOperand(2); 61 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 62 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 63 "Unexpected VTs!"); 64 MachineFunction &MF = CurDAG->getMachineFunction(); 65 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 66 SDLoc DL(N); 67 68 // We use the same frame index we use for moving two i32s into 64-bit FPR. 69 // This is an analogous operation. 70 int FI = FuncInfo->getMoveF64FrameIndex(MF); 71 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 72 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); 73 SDValue StackSlot = 74 CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); 75 76 SDValue Chain = CurDAG->getEntryNode(); 77 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 78 79 SDValue OffsetSlot = 80 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); 81 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 82 Align(8)); 83 84 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 85 86 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 87 SDValue IntID = 88 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 89 SDValue Ops[] = {Chain, IntID, StackSlot, 90 CurDAG->getRegister(RISCV::X0, MVT::i64), VL}; 91 92 SDValue Result = CurDAG->getMemIntrinsicNode( 93 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8), 94 MachineMemOperand::MOLoad); 95 96 // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the 97 // vlse we created. This will cause general havok on the dag because 98 // anything below the conversion could be folded into other existing nodes. 99 // To avoid invalidating 'I', back it up to the convert node. 100 --I; 101 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 102 103 // Now that we did that, the node is dead. Increment the iterator to the 104 // next node to process, then delete N. 105 ++I; 106 CurDAG->DeleteNode(N); 107 } 108 } 109 110 void RISCVDAGToDAGISel::PostprocessISelDAG() { 111 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 112 113 bool MadeChange = false; 114 while (Position != CurDAG->allnodes_begin()) { 115 SDNode *N = &*--Position; 116 // Skip dead nodes and any non-machine opcodes. 117 if (N->use_empty() || !N->isMachineOpcode()) 118 continue; 119 120 MadeChange |= doPeepholeSExtW(N); 121 MadeChange |= doPeepholeLoadStoreADDI(N); 122 } 123 124 if (MadeChange) 125 CurDAG->RemoveDeadNodes(); 126 } 127 128 static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL, 129 const MVT VT, int64_t Imm, 130 const RISCVSubtarget &Subtarget) { 131 assert(VT == MVT::i64 && "Expecting MVT::i64"); 132 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering(); 133 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(CurDAG->getConstantPool( 134 ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT)); 135 SDValue Addr = TLI->getAddr(CP, *CurDAG); 136 SDValue Offset = CurDAG->getTargetConstant(0, DL, VT); 137 // Since there is no data race, the chain can be the entry node. 138 SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset, 139 CurDAG->getEntryNode()); 140 MachineFunction &MF = CurDAG->getMachineFunction(); 141 MachineMemOperand *MemOp = MF.getMachineMemOperand( 142 MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, 143 LLT(VT), CP->getAlign()); 144 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Load), {MemOp}); 145 return Load; 146 } 147 148 static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 149 int64_t Imm, const RISCVSubtarget &Subtarget) { 150 MVT XLenVT = Subtarget.getXLenVT(); 151 RISCVMatInt::InstSeq Seq = 152 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); 153 154 // If Imm is expensive to build, then we put it into constant pool. 155 if (Subtarget.useConstantPoolForLargeInts() && 156 Seq.size() > Subtarget.getMaxBuildIntsCost()) 157 return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget); 158 159 SDNode *Result = nullptr; 160 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); 161 for (RISCVMatInt::Inst &Inst : Seq) { 162 SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT); 163 if (Inst.Opc == RISCV::LUI) 164 Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm); 165 else if (Inst.Opc == RISCV::ADDUW) 166 Result = CurDAG->getMachineNode(RISCV::ADDUW, DL, XLenVT, SrcReg, 167 CurDAG->getRegister(RISCV::X0, XLenVT)); 168 else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD || 169 Inst.Opc == RISCV::SH3ADD) 170 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg); 171 else 172 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm); 173 174 // Only the first instruction has X0 as its source. 175 SrcReg = SDValue(Result, 0); 176 } 177 178 return Result; 179 } 180 181 static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 182 unsigned RegClassID, unsigned SubReg0) { 183 assert(Regs.size() >= 2 && Regs.size() <= 8); 184 185 SDLoc DL(Regs[0]); 186 SmallVector<SDValue, 8> Ops; 187 188 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); 189 190 for (unsigned I = 0; I < Regs.size(); ++I) { 191 Ops.push_back(Regs[I]); 192 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); 193 } 194 SDNode *N = 195 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 196 return SDValue(N, 0); 197 } 198 199 static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 200 unsigned NF) { 201 static const unsigned RegClassIDs[] = { 202 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, 203 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, 204 RISCV::VRN8M1RegClassID}; 205 206 return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0); 207 } 208 209 static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 210 unsigned NF) { 211 static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID, 212 RISCV::VRN3M2RegClassID, 213 RISCV::VRN4M2RegClassID}; 214 215 return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0); 216 } 217 218 static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 219 unsigned NF) { 220 return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID, 221 RISCV::sub_vrm4_0); 222 } 223 224 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 225 unsigned NF, RISCVII::VLMUL LMUL) { 226 switch (LMUL) { 227 default: 228 llvm_unreachable("Invalid LMUL."); 229 case RISCVII::VLMUL::LMUL_F8: 230 case RISCVII::VLMUL::LMUL_F4: 231 case RISCVII::VLMUL::LMUL_F2: 232 case RISCVII::VLMUL::LMUL_1: 233 return createM1Tuple(CurDAG, Regs, NF); 234 case RISCVII::VLMUL::LMUL_2: 235 return createM2Tuple(CurDAG, Regs, NF); 236 case RISCVII::VLMUL::LMUL_4: 237 return createM4Tuple(CurDAG, Regs, NF); 238 } 239 } 240 241 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 242 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 243 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 244 bool IsLoad, MVT *IndexVT) { 245 SDValue Chain = Node->getOperand(0); 246 SDValue Glue; 247 248 SDValue Base; 249 SelectBaseAddr(Node->getOperand(CurOp++), Base); 250 Operands.push_back(Base); // Base pointer. 251 252 if (IsStridedOrIndexed) { 253 Operands.push_back(Node->getOperand(CurOp++)); // Index. 254 if (IndexVT) 255 *IndexVT = Operands.back()->getSimpleValueType(0); 256 } 257 258 if (IsMasked) { 259 // Mask needs to be copied to V0. 260 SDValue Mask = Node->getOperand(CurOp++); 261 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); 262 Glue = Chain.getValue(1); 263 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); 264 } 265 SDValue VL; 266 selectVLOp(Node->getOperand(CurOp++), VL); 267 Operands.push_back(VL); 268 269 MVT XLenVT = Subtarget->getXLenVT(); 270 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 271 Operands.push_back(SEWOp); 272 273 // Masked load has the tail policy argument. 274 if (IsMasked && IsLoad) { 275 // Policy must be a constant. 276 uint64_t Policy = Node->getConstantOperandVal(CurOp++); 277 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 278 Operands.push_back(PolicyOp); 279 } 280 281 Operands.push_back(Chain); // Chain. 282 if (Glue) 283 Operands.push_back(Glue); 284 } 285 286 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, 287 bool IsStrided) { 288 SDLoc DL(Node); 289 unsigned NF = Node->getNumValues() - 1; 290 MVT VT = Node->getSimpleValueType(0); 291 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 292 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 293 294 unsigned CurOp = 2; 295 SmallVector<SDValue, 8> Operands; 296 if (IsMasked) { 297 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 298 Node->op_begin() + CurOp + NF); 299 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 300 Operands.push_back(MaskedOff); 301 CurOp += NF; 302 } 303 304 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 305 Operands, /*IsLoad=*/true); 306 307 const RISCV::VLSEGPseudo *P = 308 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW, 309 static_cast<unsigned>(LMUL)); 310 MachineSDNode *Load = 311 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 312 313 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 314 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 315 316 SDValue SuperReg = SDValue(Load, 0); 317 for (unsigned I = 0; I < NF; ++I) { 318 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 319 ReplaceUses(SDValue(Node, I), 320 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 321 } 322 323 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 324 CurDAG->RemoveDeadNode(Node); 325 } 326 327 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { 328 SDLoc DL(Node); 329 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. 330 MVT VT = Node->getSimpleValueType(0); 331 MVT XLenVT = Subtarget->getXLenVT(); 332 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 333 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 334 335 unsigned CurOp = 2; 336 SmallVector<SDValue, 7> Operands; 337 if (IsMasked) { 338 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 339 Node->op_begin() + CurOp + NF); 340 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 341 Operands.push_back(MaskedOff); 342 CurOp += NF; 343 } 344 345 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 346 /*IsStridedOrIndexed*/ false, Operands, 347 /*IsLoad=*/true); 348 349 const RISCV::VLSEGPseudo *P = 350 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true, 351 Log2SEW, static_cast<unsigned>(LMUL)); 352 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 353 MVT::Other, MVT::Glue, Operands); 354 SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, 355 /*Glue*/ SDValue(Load, 2)); 356 357 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 358 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 359 360 SDValue SuperReg = SDValue(Load, 0); 361 for (unsigned I = 0; I < NF; ++I) { 362 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 363 ReplaceUses(SDValue(Node, I), 364 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 365 } 366 367 ReplaceUses(SDValue(Node, NF), SDValue(ReadVL, 0)); // VL 368 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 1)); // Chain 369 CurDAG->RemoveDeadNode(Node); 370 } 371 372 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, 373 bool IsOrdered) { 374 SDLoc DL(Node); 375 unsigned NF = Node->getNumValues() - 1; 376 MVT VT = Node->getSimpleValueType(0); 377 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 378 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 379 380 unsigned CurOp = 2; 381 SmallVector<SDValue, 8> Operands; 382 if (IsMasked) { 383 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 384 Node->op_begin() + CurOp + NF); 385 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 386 Operands.push_back(MaskedOff); 387 CurOp += NF; 388 } 389 390 MVT IndexVT; 391 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 392 /*IsStridedOrIndexed*/ true, Operands, 393 /*IsLoad=*/true, &IndexVT); 394 395 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 396 "Element count mismatch"); 397 398 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 399 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 400 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 401 report_fatal_error("The V extension does not support EEW=64 for index " 402 "values when XLEN=32"); 403 } 404 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 405 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 406 static_cast<unsigned>(IndexLMUL)); 407 MachineSDNode *Load = 408 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 409 410 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 411 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 412 413 SDValue SuperReg = SDValue(Load, 0); 414 for (unsigned I = 0; I < NF; ++I) { 415 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 416 ReplaceUses(SDValue(Node, I), 417 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 418 } 419 420 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 421 CurDAG->RemoveDeadNode(Node); 422 } 423 424 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, 425 bool IsStrided) { 426 SDLoc DL(Node); 427 unsigned NF = Node->getNumOperands() - 4; 428 if (IsStrided) 429 NF--; 430 if (IsMasked) 431 NF--; 432 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 433 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 434 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 435 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 436 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 437 438 SmallVector<SDValue, 8> Operands; 439 Operands.push_back(StoreVal); 440 unsigned CurOp = 2 + NF; 441 442 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 443 Operands); 444 445 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 446 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 447 MachineSDNode *Store = 448 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 449 450 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 451 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 452 453 ReplaceNode(Node, Store); 454 } 455 456 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, 457 bool IsOrdered) { 458 SDLoc DL(Node); 459 unsigned NF = Node->getNumOperands() - 5; 460 if (IsMasked) 461 --NF; 462 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 463 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 464 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 465 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 466 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 467 468 SmallVector<SDValue, 8> Operands; 469 Operands.push_back(StoreVal); 470 unsigned CurOp = 2 + NF; 471 472 MVT IndexVT; 473 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 474 /*IsStridedOrIndexed*/ true, Operands, 475 /*IsLoad=*/false, &IndexVT); 476 477 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 478 "Element count mismatch"); 479 480 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 481 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 482 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 483 report_fatal_error("The V extension does not support EEW=64 for index " 484 "values when XLEN=32"); 485 } 486 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 487 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 488 static_cast<unsigned>(IndexLMUL)); 489 MachineSDNode *Store = 490 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 491 492 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 493 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 494 495 ReplaceNode(Node, Store); 496 } 497 498 499 void RISCVDAGToDAGISel::Select(SDNode *Node) { 500 // If we have a custom node, we have already selected. 501 if (Node->isMachineOpcode()) { 502 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 503 Node->setNodeId(-1); 504 return; 505 } 506 507 // Instruction Selection not handled by the auto-generated tablegen selection 508 // should be handled here. 509 unsigned Opcode = Node->getOpcode(); 510 MVT XLenVT = Subtarget->getXLenVT(); 511 SDLoc DL(Node); 512 MVT VT = Node->getSimpleValueType(0); 513 514 switch (Opcode) { 515 case ISD::Constant: { 516 auto *ConstNode = cast<ConstantSDNode>(Node); 517 if (VT == XLenVT && ConstNode->isZero()) { 518 SDValue New = 519 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT); 520 ReplaceNode(Node, New.getNode()); 521 return; 522 } 523 int64_t Imm = ConstNode->getSExtValue(); 524 // If the upper XLen-16 bits are not used, try to convert this to a simm12 525 // by sign extending bit 15. 526 if (isUInt<16>(Imm) && isInt<12>(SignExtend64(Imm, 16)) && 527 hasAllHUsers(Node)) 528 Imm = SignExtend64(Imm, 16); 529 // If the upper 32-bits are not used try to convert this into a simm32 by 530 // sign extending bit 32. 531 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 532 Imm = SignExtend64(Imm, 32); 533 534 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); 535 return; 536 } 537 case ISD::FrameIndex: { 538 SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT); 539 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 540 SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); 541 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm)); 542 return; 543 } 544 case ISD::SRL: { 545 // Optimize (srl (and X, C2), C) -> 546 // (srli (slli X, (XLen-C3), (XLen-C3) + C) 547 // Where C2 is a mask with C3 trailing ones. 548 // Taking into account that the C2 may have had lower bits unset by 549 // SimplifyDemandedBits. This avoids materializing the C2 immediate. 550 // This pattern occurs when type legalizing right shifts for types with 551 // less than XLen bits. 552 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 553 if (!N1C) 554 break; 555 SDValue N0 = Node->getOperand(0); 556 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 557 !isa<ConstantSDNode>(N0.getOperand(1))) 558 break; 559 unsigned ShAmt = N1C->getZExtValue(); 560 uint64_t Mask = N0.getConstantOperandVal(1); 561 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 562 if (!isMask_64(Mask)) 563 break; 564 unsigned TrailingOnes = countTrailingOnes(Mask); 565 // 32 trailing ones should use srliw via tablegen pattern. 566 if (TrailingOnes == 32 || ShAmt >= TrailingOnes) 567 break; 568 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; 569 SDNode *SLLI = 570 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 571 CurDAG->getTargetConstant(LShAmt, DL, VT)); 572 SDNode *SRLI = CurDAG->getMachineNode( 573 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 574 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 575 ReplaceNode(Node, SRLI); 576 return; 577 } 578 case ISD::SRA: { 579 // Optimize (sra (sext_inreg X, i16), C) -> 580 // (srai (slli X, (XLen-16), (XLen-16) + C) 581 // And (sra (sext_inreg X, i8), C) -> 582 // (srai (slli X, (XLen-8), (XLen-8) + C) 583 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. 584 // This transform matches the code we get without Zbb. The shifts are more 585 // compressible, and this can help expose CSE opportunities in the sdiv by 586 // constant optimization. 587 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 588 if (!N1C) 589 break; 590 SDValue N0 = Node->getOperand(0); 591 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) 592 break; 593 unsigned ShAmt = N1C->getZExtValue(); 594 unsigned ExtSize = 595 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 596 // ExtSize of 32 should use sraiw via tablegen pattern. 597 if (ExtSize >= 32 || ShAmt >= ExtSize) 598 break; 599 unsigned LShAmt = Subtarget->getXLen() - ExtSize; 600 SDNode *SLLI = 601 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 602 CurDAG->getTargetConstant(LShAmt, DL, VT)); 603 SDNode *SRAI = CurDAG->getMachineNode( 604 RISCV::SRAI, DL, VT, SDValue(SLLI, 0), 605 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 606 ReplaceNode(Node, SRAI); 607 return; 608 } 609 case ISD::AND: { 610 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 611 if (!N1C) 612 break; 613 614 SDValue N0 = Node->getOperand(0); 615 616 bool LeftShift = N0.getOpcode() == ISD::SHL; 617 if (!LeftShift && N0.getOpcode() != ISD::SRL) 618 break; 619 620 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 621 if (!C) 622 break; 623 uint64_t C2 = C->getZExtValue(); 624 unsigned XLen = Subtarget->getXLen(); 625 if (!C2 || C2 >= XLen) 626 break; 627 628 uint64_t C1 = N1C->getZExtValue(); 629 630 // Keep track of whether this is a andi, zext.h, or zext.w. 631 bool ZExtOrANDI = isInt<12>(N1C->getSExtValue()); 632 if (C1 == UINT64_C(0xFFFF) && 633 (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) 634 ZExtOrANDI = true; 635 if (C1 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba()) 636 ZExtOrANDI = true; 637 638 // Clear irrelevant bits in the mask. 639 if (LeftShift) 640 C1 &= maskTrailingZeros<uint64_t>(C2); 641 else 642 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 643 644 // Some transforms should only be done if the shift has a single use or 645 // the AND would become (srli (slli X, 32), 32) 646 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 647 648 SDValue X = N0.getOperand(0); 649 650 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 651 // with c3 leading zeros. 652 if (!LeftShift && isMask_64(C1)) { 653 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 654 if (C2 < C3) { 655 // If the number of leading zeros is C2+32 this can be SRLIW. 656 if (C2 + 32 == C3) { 657 SDNode *SRLIW = 658 CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X, 659 CurDAG->getTargetConstant(C2, DL, XLenVT)); 660 ReplaceNode(Node, SRLIW); 661 return; 662 } 663 664 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if 665 // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 666 // 667 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 668 // legalized and goes through DAG combine. 669 SDValue Y; 670 if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() && 671 selectSExti32(X, Y)) { 672 SDNode *SRAIW = 673 CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, Y, 674 CurDAG->getTargetConstant(31, DL, XLenVT)); 675 SDNode *SRLIW = CurDAG->getMachineNode( 676 RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0), 677 CurDAG->getTargetConstant(C3 - 32, DL, XLenVT)); 678 ReplaceNode(Node, SRLIW); 679 return; 680 } 681 682 // (srli (slli x, c3-c2), c3). 683 if (OneUseOrZExtW && !ZExtOrANDI) { 684 SDNode *SLLI = CurDAG->getMachineNode( 685 RISCV::SLLI, DL, XLenVT, X, 686 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 687 SDNode *SRLI = 688 CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), 689 CurDAG->getTargetConstant(C3, DL, XLenVT)); 690 ReplaceNode(Node, SRLI); 691 return; 692 } 693 } 694 } 695 696 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 697 // shifted by c2 bits with c3 leading zeros. 698 if (LeftShift && isShiftedMask_64(C1)) { 699 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 700 701 if (C2 + C3 < XLen && 702 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) { 703 // Use slli.uw when possible. 704 if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) { 705 SDNode *SLLIUW = 706 CurDAG->getMachineNode(RISCV::SLLIUW, DL, XLenVT, X, 707 CurDAG->getTargetConstant(C2, DL, XLenVT)); 708 ReplaceNode(Node, SLLIUW); 709 return; 710 } 711 712 // (srli (slli c2+c3), c3) 713 if (OneUseOrZExtW && !ZExtOrANDI) { 714 SDNode *SLLI = CurDAG->getMachineNode( 715 RISCV::SLLI, DL, XLenVT, X, 716 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 717 SDNode *SRLI = 718 CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), 719 CurDAG->getTargetConstant(C3, DL, XLenVT)); 720 ReplaceNode(Node, SRLI); 721 return; 722 } 723 } 724 } 725 726 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 727 // shifted mask with c2 leading zeros and c3 trailing zeros. 728 if (!LeftShift && isShiftedMask_64(C1)) { 729 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 730 uint64_t C3 = countTrailingZeros(C1); 731 if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !ZExtOrANDI) { 732 SDNode *SRLI = CurDAG->getMachineNode( 733 RISCV::SRLI, DL, XLenVT, X, 734 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 735 SDNode *SLLI = 736 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), 737 CurDAG->getTargetConstant(C3, DL, XLenVT)); 738 ReplaceNode(Node, SLLI); 739 return; 740 } 741 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 742 if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 && 743 OneUseOrZExtW && !ZExtOrANDI) { 744 SDNode *SRLIW = CurDAG->getMachineNode( 745 RISCV::SRLIW, DL, XLenVT, X, 746 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 747 SDNode *SLLI = 748 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), 749 CurDAG->getTargetConstant(C3, DL, XLenVT)); 750 ReplaceNode(Node, SLLI); 751 return; 752 } 753 } 754 755 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 756 // shifted mask with no leading zeros and c3 trailing zeros. 757 if (LeftShift && isShiftedMask_64(C1)) { 758 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 759 uint64_t C3 = countTrailingZeros(C1); 760 if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !ZExtOrANDI) { 761 SDNode *SRLI = CurDAG->getMachineNode( 762 RISCV::SRLI, DL, XLenVT, X, 763 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 764 SDNode *SLLI = 765 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), 766 CurDAG->getTargetConstant(C3, DL, XLenVT)); 767 ReplaceNode(Node, SLLI); 768 return; 769 } 770 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 771 if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !ZExtOrANDI) { 772 SDNode *SRLIW = CurDAG->getMachineNode( 773 RISCV::SRLIW, DL, XLenVT, X, 774 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 775 SDNode *SLLI = 776 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), 777 CurDAG->getTargetConstant(C3, DL, XLenVT)); 778 ReplaceNode(Node, SLLI); 779 return; 780 } 781 } 782 783 break; 784 } 785 case ISD::MUL: { 786 // Special case for calculating (mul (and X, C2), C1) where the full product 787 // fits in XLen bits. We can shift X left by the number of leading zeros in 788 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 789 // product has XLen trailing zeros, putting it in the output of MULHU. This 790 // can avoid materializing a constant in a register for C2. 791 792 // RHS should be a constant. 793 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 794 if (!N1C || !N1C->hasOneUse()) 795 break; 796 797 // LHS should be an AND with constant. 798 SDValue N0 = Node->getOperand(0); 799 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 800 break; 801 802 uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 803 804 // Constant should be a mask. 805 if (!isMask_64(C2)) 806 break; 807 808 // This should be the only use of the AND unless we will use 809 // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND 810 // constants. 811 if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF)) 812 break; 813 814 // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this 815 // optimization. 816 if (isInt<12>(C2) || 817 (C2 == UINT64_C(0xFFFF) && 818 (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) || 819 (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())) 820 break; 821 822 // We need to shift left the AND input and C1 by a total of XLen bits. 823 824 // How far left do we need to shift the AND input? 825 unsigned XLen = Subtarget->getXLen(); 826 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2)); 827 828 // The constant gets shifted by the remaining amount unless that would 829 // shift bits out. 830 uint64_t C1 = N1C->getZExtValue(); 831 unsigned ConstantShift = XLen - LeadingZeros; 832 if (ConstantShift > (XLen - (64 - countLeadingZeros(C1)))) 833 break; 834 835 uint64_t ShiftedC1 = C1 << ConstantShift; 836 // If this RV32, we need to sign extend the constant. 837 if (XLen == 32) 838 ShiftedC1 = SignExtend64(ShiftedC1, 32); 839 840 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 841 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); 842 SDNode *SLLI = 843 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 844 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 845 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 846 SDValue(SLLI, 0), SDValue(Imm, 0)); 847 ReplaceNode(Node, MULHU); 848 return; 849 } 850 case ISD::INTRINSIC_WO_CHAIN: { 851 unsigned IntNo = Node->getConstantOperandVal(0); 852 switch (IntNo) { 853 // By default we do not custom select any intrinsic. 854 default: 855 break; 856 case Intrinsic::riscv_vmsgeu: 857 case Intrinsic::riscv_vmsge: { 858 SDValue Src1 = Node->getOperand(1); 859 SDValue Src2 = Node->getOperand(2); 860 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 861 bool IsCmpUnsignedZero = false; 862 // Only custom select scalar second operand. 863 if (Src2.getValueType() != XLenVT) 864 break; 865 // Small constants are handled with patterns. 866 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 867 int64_t CVal = C->getSExtValue(); 868 if (CVal >= -15 && CVal <= 16) { 869 if (!IsUnsigned || CVal != 0) 870 break; 871 IsCmpUnsignedZero = true; 872 } 873 } 874 MVT Src1VT = Src1.getSimpleValueType(); 875 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; 876 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 877 default: 878 llvm_unreachable("Unexpected LMUL!"); 879 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 880 case RISCVII::VLMUL::lmulenum: \ 881 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 882 : RISCV::PseudoVMSLT_VX_##suffix; \ 883 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 884 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 885 break; 886 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1) 887 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2) 888 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4) 889 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8) 890 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16) 891 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32) 892 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64) 893 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES 894 } 895 SDValue SEW = CurDAG->getTargetConstant( 896 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 897 SDValue VL; 898 selectVLOp(Node->getOperand(3), VL); 899 900 // If vmsgeu with 0 immediate, expand it to vmset. 901 if (IsCmpUnsignedZero) { 902 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW)); 903 return; 904 } 905 906 // Expand to 907 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 908 SDValue Cmp = SDValue( 909 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 910 0); 911 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 912 {Cmp, Cmp, VL, SEW})); 913 return; 914 } 915 case Intrinsic::riscv_vmsgeu_mask: 916 case Intrinsic::riscv_vmsge_mask: { 917 SDValue Src1 = Node->getOperand(2); 918 SDValue Src2 = Node->getOperand(3); 919 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 920 bool IsCmpUnsignedZero = false; 921 // Only custom select scalar second operand. 922 if (Src2.getValueType() != XLenVT) 923 break; 924 // Small constants are handled with patterns. 925 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 926 int64_t CVal = C->getSExtValue(); 927 if (CVal >= -15 && CVal <= 16) { 928 if (!IsUnsigned || CVal != 0) 929 break; 930 IsCmpUnsignedZero = true; 931 } 932 } 933 MVT Src1VT = Src1.getSimpleValueType(); 934 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, 935 VMSetOpcode, VMANDOpcode; 936 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 937 default: 938 llvm_unreachable("Unexpected LMUL!"); 939 #define CASE_VMSLT_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 940 case RISCVII::VLMUL::lmulenum: \ 941 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 942 : RISCV::PseudoVMSLT_VX_##suffix; \ 943 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 944 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 945 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 946 break; 947 CASE_VMSLT_VMSET_OPCODES(LMUL_F8, MF8, B1) 948 CASE_VMSLT_VMSET_OPCODES(LMUL_F4, MF4, B2) 949 CASE_VMSLT_VMSET_OPCODES(LMUL_F2, MF2, B4) 950 CASE_VMSLT_VMSET_OPCODES(LMUL_1, M1, B8) 951 CASE_VMSLT_VMSET_OPCODES(LMUL_2, M2, B16) 952 CASE_VMSLT_VMSET_OPCODES(LMUL_4, M4, B32) 953 CASE_VMSLT_VMSET_OPCODES(LMUL_8, M8, B64) 954 #undef CASE_VMSLT_VMSET_OPCODES 955 } 956 // Mask operations use the LMUL from the mask type. 957 switch (RISCVTargetLowering::getLMUL(VT)) { 958 default: 959 llvm_unreachable("Unexpected LMUL!"); 960 #define CASE_VMXOR_VMANDN_VMAND_OPCODES(lmulenum, suffix) \ 961 case RISCVII::VLMUL::lmulenum: \ 962 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 963 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 964 VMANDOpcode = RISCV::PseudoVMAND_MM_##suffix; \ 965 break; 966 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F8, MF8) 967 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F4, MF4) 968 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F2, MF2) 969 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_1, M1) 970 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_2, M2) 971 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_4, M4) 972 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_8, M8) 973 #undef CASE_VMXOR_VMANDN_VMAND_OPCODES 974 } 975 SDValue SEW = CurDAG->getTargetConstant( 976 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 977 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 978 SDValue VL; 979 selectVLOp(Node->getOperand(5), VL); 980 SDValue MaskedOff = Node->getOperand(1); 981 SDValue Mask = Node->getOperand(4); 982 983 // If vmsgeu_mask with 0 immediate, expand it to {vmset, vmand}. 984 if (IsCmpUnsignedZero) { 985 SDValue VMSet = 986 SDValue(CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW), 0); 987 ReplaceNode(Node, CurDAG->getMachineNode(VMANDOpcode, DL, VT, 988 {Mask, VMSet, VL, MaskSEW})); 989 return; 990 } 991 992 // If the MaskedOff value and the Mask are the same value use 993 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 994 // This avoids needing to copy v0 to vd before starting the next sequence. 995 if (Mask == MaskedOff) { 996 SDValue Cmp = SDValue( 997 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 998 0); 999 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 1000 {Mask, Cmp, VL, MaskSEW})); 1001 return; 1002 } 1003 1004 // Mask needs to be copied to V0. 1005 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1006 RISCV::V0, Mask, SDValue()); 1007 SDValue Glue = Chain.getValue(1); 1008 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); 1009 1010 // Otherwise use 1011 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 1012 SDValue Cmp = SDValue( 1013 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 1014 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 1015 0); 1016 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 1017 {Cmp, Mask, VL, MaskSEW})); 1018 return; 1019 } 1020 } 1021 break; 1022 } 1023 case ISD::INTRINSIC_W_CHAIN: { 1024 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1025 switch (IntNo) { 1026 // By default we do not custom select any intrinsic. 1027 default: 1028 break; 1029 1030 case Intrinsic::riscv_vsetvli: 1031 case Intrinsic::riscv_vsetvlimax: { 1032 if (!Subtarget->hasVInstructions()) 1033 break; 1034 1035 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax; 1036 unsigned Offset = VLMax ? 2 : 3; 1037 1038 assert(Node->getNumOperands() == Offset + 2 && 1039 "Unexpected number of operands"); 1040 1041 unsigned SEW = 1042 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 1043 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( 1044 Node->getConstantOperandVal(Offset + 1) & 0x7); 1045 1046 unsigned VTypeI = RISCVVType::encodeVTYPE( 1047 VLMul, SEW, /*TailAgnostic*/ true, /*MaskAgnostic*/ false); 1048 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 1049 1050 SDValue VLOperand; 1051 unsigned Opcode = RISCV::PseudoVSETVLI; 1052 if (VLMax) { 1053 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 1054 Opcode = RISCV::PseudoVSETVLIX0; 1055 } else { 1056 VLOperand = Node->getOperand(2); 1057 1058 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 1059 uint64_t AVL = C->getZExtValue(); 1060 if (isUInt<5>(AVL)) { 1061 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 1062 ReplaceNode( 1063 Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, XLenVT, 1064 MVT::Other, VLImm, VTypeIOp, 1065 /* Chain */ Node->getOperand(0))); 1066 return; 1067 } 1068 } 1069 } 1070 1071 ReplaceNode(Node, 1072 CurDAG->getMachineNode(Opcode, DL, XLenVT, 1073 MVT::Other, VLOperand, VTypeIOp, 1074 /* Chain */ Node->getOperand(0))); 1075 return; 1076 } 1077 case Intrinsic::riscv_vlseg2: 1078 case Intrinsic::riscv_vlseg3: 1079 case Intrinsic::riscv_vlseg4: 1080 case Intrinsic::riscv_vlseg5: 1081 case Intrinsic::riscv_vlseg6: 1082 case Intrinsic::riscv_vlseg7: 1083 case Intrinsic::riscv_vlseg8: { 1084 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1085 return; 1086 } 1087 case Intrinsic::riscv_vlseg2_mask: 1088 case Intrinsic::riscv_vlseg3_mask: 1089 case Intrinsic::riscv_vlseg4_mask: 1090 case Intrinsic::riscv_vlseg5_mask: 1091 case Intrinsic::riscv_vlseg6_mask: 1092 case Intrinsic::riscv_vlseg7_mask: 1093 case Intrinsic::riscv_vlseg8_mask: { 1094 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1095 return; 1096 } 1097 case Intrinsic::riscv_vlsseg2: 1098 case Intrinsic::riscv_vlsseg3: 1099 case Intrinsic::riscv_vlsseg4: 1100 case Intrinsic::riscv_vlsseg5: 1101 case Intrinsic::riscv_vlsseg6: 1102 case Intrinsic::riscv_vlsseg7: 1103 case Intrinsic::riscv_vlsseg8: { 1104 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1105 return; 1106 } 1107 case Intrinsic::riscv_vlsseg2_mask: 1108 case Intrinsic::riscv_vlsseg3_mask: 1109 case Intrinsic::riscv_vlsseg4_mask: 1110 case Intrinsic::riscv_vlsseg5_mask: 1111 case Intrinsic::riscv_vlsseg6_mask: 1112 case Intrinsic::riscv_vlsseg7_mask: 1113 case Intrinsic::riscv_vlsseg8_mask: { 1114 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1115 return; 1116 } 1117 case Intrinsic::riscv_vloxseg2: 1118 case Intrinsic::riscv_vloxseg3: 1119 case Intrinsic::riscv_vloxseg4: 1120 case Intrinsic::riscv_vloxseg5: 1121 case Intrinsic::riscv_vloxseg6: 1122 case Intrinsic::riscv_vloxseg7: 1123 case Intrinsic::riscv_vloxseg8: 1124 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1125 return; 1126 case Intrinsic::riscv_vluxseg2: 1127 case Intrinsic::riscv_vluxseg3: 1128 case Intrinsic::riscv_vluxseg4: 1129 case Intrinsic::riscv_vluxseg5: 1130 case Intrinsic::riscv_vluxseg6: 1131 case Intrinsic::riscv_vluxseg7: 1132 case Intrinsic::riscv_vluxseg8: 1133 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1134 return; 1135 case Intrinsic::riscv_vloxseg2_mask: 1136 case Intrinsic::riscv_vloxseg3_mask: 1137 case Intrinsic::riscv_vloxseg4_mask: 1138 case Intrinsic::riscv_vloxseg5_mask: 1139 case Intrinsic::riscv_vloxseg6_mask: 1140 case Intrinsic::riscv_vloxseg7_mask: 1141 case Intrinsic::riscv_vloxseg8_mask: 1142 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1143 return; 1144 case Intrinsic::riscv_vluxseg2_mask: 1145 case Intrinsic::riscv_vluxseg3_mask: 1146 case Intrinsic::riscv_vluxseg4_mask: 1147 case Intrinsic::riscv_vluxseg5_mask: 1148 case Intrinsic::riscv_vluxseg6_mask: 1149 case Intrinsic::riscv_vluxseg7_mask: 1150 case Intrinsic::riscv_vluxseg8_mask: 1151 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1152 return; 1153 case Intrinsic::riscv_vlseg8ff: 1154 case Intrinsic::riscv_vlseg7ff: 1155 case Intrinsic::riscv_vlseg6ff: 1156 case Intrinsic::riscv_vlseg5ff: 1157 case Intrinsic::riscv_vlseg4ff: 1158 case Intrinsic::riscv_vlseg3ff: 1159 case Intrinsic::riscv_vlseg2ff: { 1160 selectVLSEGFF(Node, /*IsMasked*/ false); 1161 return; 1162 } 1163 case Intrinsic::riscv_vlseg8ff_mask: 1164 case Intrinsic::riscv_vlseg7ff_mask: 1165 case Intrinsic::riscv_vlseg6ff_mask: 1166 case Intrinsic::riscv_vlseg5ff_mask: 1167 case Intrinsic::riscv_vlseg4ff_mask: 1168 case Intrinsic::riscv_vlseg3ff_mask: 1169 case Intrinsic::riscv_vlseg2ff_mask: { 1170 selectVLSEGFF(Node, /*IsMasked*/ true); 1171 return; 1172 } 1173 case Intrinsic::riscv_vloxei: 1174 case Intrinsic::riscv_vloxei_mask: 1175 case Intrinsic::riscv_vluxei: 1176 case Intrinsic::riscv_vluxei_mask: { 1177 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 1178 IntNo == Intrinsic::riscv_vluxei_mask; 1179 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 1180 IntNo == Intrinsic::riscv_vloxei_mask; 1181 1182 MVT VT = Node->getSimpleValueType(0); 1183 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1184 1185 unsigned CurOp = 2; 1186 SmallVector<SDValue, 8> Operands; 1187 if (IsMasked) 1188 Operands.push_back(Node->getOperand(CurOp++)); 1189 1190 MVT IndexVT; 1191 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1192 /*IsStridedOrIndexed*/ true, Operands, 1193 /*IsLoad=*/true, &IndexVT); 1194 1195 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1196 "Element count mismatch"); 1197 1198 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1199 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1200 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1201 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1202 report_fatal_error("The V extension does not support EEW=64 for index " 1203 "values when XLEN=32"); 1204 } 1205 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 1206 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1207 static_cast<unsigned>(IndexLMUL)); 1208 MachineSDNode *Load = 1209 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1210 1211 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1212 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1213 1214 ReplaceNode(Node, Load); 1215 return; 1216 } 1217 case Intrinsic::riscv_vlm: 1218 case Intrinsic::riscv_vle: 1219 case Intrinsic::riscv_vle_mask: 1220 case Intrinsic::riscv_vlse: 1221 case Intrinsic::riscv_vlse_mask: { 1222 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 1223 IntNo == Intrinsic::riscv_vlse_mask; 1224 bool IsStrided = 1225 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 1226 1227 MVT VT = Node->getSimpleValueType(0); 1228 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1229 1230 unsigned CurOp = 2; 1231 SmallVector<SDValue, 8> Operands; 1232 if (IsMasked) 1233 Operands.push_back(Node->getOperand(CurOp++)); 1234 1235 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1236 Operands, /*IsLoad=*/true); 1237 1238 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1239 const RISCV::VLEPseudo *P = 1240 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW, 1241 static_cast<unsigned>(LMUL)); 1242 MachineSDNode *Load = 1243 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1244 1245 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1246 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1247 1248 ReplaceNode(Node, Load); 1249 return; 1250 } 1251 case Intrinsic::riscv_vleff: 1252 case Intrinsic::riscv_vleff_mask: { 1253 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 1254 1255 MVT VT = Node->getSimpleValueType(0); 1256 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1257 1258 unsigned CurOp = 2; 1259 SmallVector<SDValue, 7> Operands; 1260 if (IsMasked) 1261 Operands.push_back(Node->getOperand(CurOp++)); 1262 1263 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1264 /*IsStridedOrIndexed*/ false, Operands, 1265 /*IsLoad=*/true); 1266 1267 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1268 const RISCV::VLEPseudo *P = 1269 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true, Log2SEW, 1270 static_cast<unsigned>(LMUL)); 1271 MachineSDNode *Load = 1272 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), 1273 MVT::Other, MVT::Glue, Operands); 1274 SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, 1275 /*Glue*/ SDValue(Load, 2)); 1276 1277 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1278 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1279 1280 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); 1281 ReplaceUses(SDValue(Node, 1), SDValue(ReadVL, 0)); // VL 1282 ReplaceUses(SDValue(Node, 2), SDValue(Load, 1)); // Chain 1283 CurDAG->RemoveDeadNode(Node); 1284 return; 1285 } 1286 } 1287 break; 1288 } 1289 case ISD::INTRINSIC_VOID: { 1290 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1291 switch (IntNo) { 1292 case Intrinsic::riscv_vsseg2: 1293 case Intrinsic::riscv_vsseg3: 1294 case Intrinsic::riscv_vsseg4: 1295 case Intrinsic::riscv_vsseg5: 1296 case Intrinsic::riscv_vsseg6: 1297 case Intrinsic::riscv_vsseg7: 1298 case Intrinsic::riscv_vsseg8: { 1299 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1300 return; 1301 } 1302 case Intrinsic::riscv_vsseg2_mask: 1303 case Intrinsic::riscv_vsseg3_mask: 1304 case Intrinsic::riscv_vsseg4_mask: 1305 case Intrinsic::riscv_vsseg5_mask: 1306 case Intrinsic::riscv_vsseg6_mask: 1307 case Intrinsic::riscv_vsseg7_mask: 1308 case Intrinsic::riscv_vsseg8_mask: { 1309 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1310 return; 1311 } 1312 case Intrinsic::riscv_vssseg2: 1313 case Intrinsic::riscv_vssseg3: 1314 case Intrinsic::riscv_vssseg4: 1315 case Intrinsic::riscv_vssseg5: 1316 case Intrinsic::riscv_vssseg6: 1317 case Intrinsic::riscv_vssseg7: 1318 case Intrinsic::riscv_vssseg8: { 1319 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1320 return; 1321 } 1322 case Intrinsic::riscv_vssseg2_mask: 1323 case Intrinsic::riscv_vssseg3_mask: 1324 case Intrinsic::riscv_vssseg4_mask: 1325 case Intrinsic::riscv_vssseg5_mask: 1326 case Intrinsic::riscv_vssseg6_mask: 1327 case Intrinsic::riscv_vssseg7_mask: 1328 case Intrinsic::riscv_vssseg8_mask: { 1329 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1330 return; 1331 } 1332 case Intrinsic::riscv_vsoxseg2: 1333 case Intrinsic::riscv_vsoxseg3: 1334 case Intrinsic::riscv_vsoxseg4: 1335 case Intrinsic::riscv_vsoxseg5: 1336 case Intrinsic::riscv_vsoxseg6: 1337 case Intrinsic::riscv_vsoxseg7: 1338 case Intrinsic::riscv_vsoxseg8: 1339 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1340 return; 1341 case Intrinsic::riscv_vsuxseg2: 1342 case Intrinsic::riscv_vsuxseg3: 1343 case Intrinsic::riscv_vsuxseg4: 1344 case Intrinsic::riscv_vsuxseg5: 1345 case Intrinsic::riscv_vsuxseg6: 1346 case Intrinsic::riscv_vsuxseg7: 1347 case Intrinsic::riscv_vsuxseg8: 1348 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1349 return; 1350 case Intrinsic::riscv_vsoxseg2_mask: 1351 case Intrinsic::riscv_vsoxseg3_mask: 1352 case Intrinsic::riscv_vsoxseg4_mask: 1353 case Intrinsic::riscv_vsoxseg5_mask: 1354 case Intrinsic::riscv_vsoxseg6_mask: 1355 case Intrinsic::riscv_vsoxseg7_mask: 1356 case Intrinsic::riscv_vsoxseg8_mask: 1357 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1358 return; 1359 case Intrinsic::riscv_vsuxseg2_mask: 1360 case Intrinsic::riscv_vsuxseg3_mask: 1361 case Intrinsic::riscv_vsuxseg4_mask: 1362 case Intrinsic::riscv_vsuxseg5_mask: 1363 case Intrinsic::riscv_vsuxseg6_mask: 1364 case Intrinsic::riscv_vsuxseg7_mask: 1365 case Intrinsic::riscv_vsuxseg8_mask: 1366 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1367 return; 1368 case Intrinsic::riscv_vsoxei: 1369 case Intrinsic::riscv_vsoxei_mask: 1370 case Intrinsic::riscv_vsuxei: 1371 case Intrinsic::riscv_vsuxei_mask: { 1372 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 1373 IntNo == Intrinsic::riscv_vsuxei_mask; 1374 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 1375 IntNo == Intrinsic::riscv_vsoxei_mask; 1376 1377 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1378 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1379 1380 unsigned CurOp = 2; 1381 SmallVector<SDValue, 8> Operands; 1382 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1383 1384 MVT IndexVT; 1385 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1386 /*IsStridedOrIndexed*/ true, Operands, 1387 /*IsLoad=*/false, &IndexVT); 1388 1389 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1390 "Element count mismatch"); 1391 1392 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1393 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1394 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1395 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1396 report_fatal_error("The V extension does not support EEW=64 for index " 1397 "values when XLEN=32"); 1398 } 1399 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 1400 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1401 static_cast<unsigned>(IndexLMUL)); 1402 MachineSDNode *Store = 1403 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1404 1405 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1406 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1407 1408 ReplaceNode(Node, Store); 1409 return; 1410 } 1411 case Intrinsic::riscv_vsm: 1412 case Intrinsic::riscv_vse: 1413 case Intrinsic::riscv_vse_mask: 1414 case Intrinsic::riscv_vsse: 1415 case Intrinsic::riscv_vsse_mask: { 1416 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 1417 IntNo == Intrinsic::riscv_vsse_mask; 1418 bool IsStrided = 1419 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 1420 1421 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1422 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1423 1424 unsigned CurOp = 2; 1425 SmallVector<SDValue, 8> Operands; 1426 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1427 1428 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1429 Operands); 1430 1431 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1432 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 1433 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 1434 MachineSDNode *Store = 1435 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1436 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1437 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1438 1439 ReplaceNode(Node, Store); 1440 return; 1441 } 1442 } 1443 break; 1444 } 1445 case ISD::BITCAST: { 1446 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 1447 // Just drop bitcasts between vectors if both are fixed or both are 1448 // scalable. 1449 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 1450 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 1451 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 1452 CurDAG->RemoveDeadNode(Node); 1453 return; 1454 } 1455 break; 1456 } 1457 case ISD::INSERT_SUBVECTOR: { 1458 SDValue V = Node->getOperand(0); 1459 SDValue SubV = Node->getOperand(1); 1460 SDLoc DL(SubV); 1461 auto Idx = Node->getConstantOperandVal(2); 1462 MVT SubVecVT = SubV.getSimpleValueType(); 1463 1464 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1465 MVT SubVecContainerVT = SubVecVT; 1466 // Establish the correct scalable-vector types for any fixed-length type. 1467 if (SubVecVT.isFixedLengthVector()) 1468 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 1469 if (VT.isFixedLengthVector()) 1470 VT = TLI.getContainerForFixedLengthVector(VT); 1471 1472 const auto *TRI = Subtarget->getRegisterInfo(); 1473 unsigned SubRegIdx; 1474 std::tie(SubRegIdx, Idx) = 1475 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1476 VT, SubVecContainerVT, Idx, TRI); 1477 1478 // If the Idx hasn't been completely eliminated then this is a subvector 1479 // insert which doesn't naturally align to a vector register. These must 1480 // be handled using instructions to manipulate the vector registers. 1481 if (Idx != 0) 1482 break; 1483 1484 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); 1485 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 1486 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 1487 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 1488 (void)IsSubVecPartReg; // Silence unused variable warning without asserts. 1489 assert((!IsSubVecPartReg || V.isUndef()) && 1490 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 1491 "the subvector is smaller than a full-sized register"); 1492 1493 // If we haven't set a SubRegIdx, then we must be going between 1494 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 1495 if (SubRegIdx == RISCV::NoSubRegister) { 1496 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT); 1497 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1498 InRegClassID && 1499 "Unexpected subvector extraction"); 1500 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1501 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 1502 DL, VT, SubV, RC); 1503 ReplaceNode(Node, NewNode); 1504 return; 1505 } 1506 1507 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 1508 ReplaceNode(Node, Insert.getNode()); 1509 return; 1510 } 1511 case ISD::EXTRACT_SUBVECTOR: { 1512 SDValue V = Node->getOperand(0); 1513 auto Idx = Node->getConstantOperandVal(1); 1514 MVT InVT = V.getSimpleValueType(); 1515 SDLoc DL(V); 1516 1517 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1518 MVT SubVecContainerVT = VT; 1519 // Establish the correct scalable-vector types for any fixed-length type. 1520 if (VT.isFixedLengthVector()) 1521 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 1522 if (InVT.isFixedLengthVector()) 1523 InVT = TLI.getContainerForFixedLengthVector(InVT); 1524 1525 const auto *TRI = Subtarget->getRegisterInfo(); 1526 unsigned SubRegIdx; 1527 std::tie(SubRegIdx, Idx) = 1528 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1529 InVT, SubVecContainerVT, Idx, TRI); 1530 1531 // If the Idx hasn't been completely eliminated then this is a subvector 1532 // extract which doesn't naturally align to a vector register. These must 1533 // be handled using instructions to manipulate the vector registers. 1534 if (Idx != 0) 1535 break; 1536 1537 // If we haven't set a SubRegIdx, then we must be going between 1538 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 1539 if (SubRegIdx == RISCV::NoSubRegister) { 1540 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 1541 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1542 InRegClassID && 1543 "Unexpected subvector extraction"); 1544 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1545 SDNode *NewNode = 1546 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 1547 ReplaceNode(Node, NewNode); 1548 return; 1549 } 1550 1551 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 1552 ReplaceNode(Node, Extract.getNode()); 1553 return; 1554 } 1555 case ISD::SPLAT_VECTOR: 1556 case RISCVISD::VMV_S_X_VL: 1557 case RISCVISD::VFMV_S_F_VL: 1558 case RISCVISD::VMV_V_X_VL: 1559 case RISCVISD::VFMV_V_F_VL: { 1560 // Try to match splat of a scalar load to a strided load with stride of x0. 1561 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || 1562 Node->getOpcode() == RISCVISD::VFMV_S_F_VL; 1563 if (IsScalarMove && !Node->getOperand(0).isUndef()) 1564 break; 1565 SDValue Src = IsScalarMove ? Node->getOperand(1) : Node->getOperand(0); 1566 auto *Ld = dyn_cast<LoadSDNode>(Src); 1567 if (!Ld) 1568 break; 1569 EVT MemVT = Ld->getMemoryVT(); 1570 // The memory VT should be the same size as the element type. 1571 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 1572 break; 1573 if (!IsProfitableToFold(Src, Node, Node) || 1574 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 1575 break; 1576 1577 SDValue VL; 1578 if (Node->getOpcode() == ISD::SPLAT_VECTOR) 1579 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); 1580 else if (IsScalarMove) { 1581 // We could deal with more VL if we update the VSETVLI insert pass to 1582 // avoid introducing more VSETVLI. 1583 if (!isOneConstant(Node->getOperand(2))) 1584 break; 1585 selectVLOp(Node->getOperand(2), VL); 1586 } else 1587 selectVLOp(Node->getOperand(1), VL); 1588 1589 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1590 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 1591 1592 SDValue Operands[] = {Ld->getBasePtr(), 1593 CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW, 1594 Ld->getChain()}; 1595 1596 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1597 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 1598 /*IsMasked*/ false, /*IsStrided*/ true, /*FF*/ false, Log2SEW, 1599 static_cast<unsigned>(LMUL)); 1600 MachineSDNode *Load = 1601 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1602 1603 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1604 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1605 1606 ReplaceNode(Node, Load); 1607 return; 1608 } 1609 } 1610 1611 // Select the default instruction. 1612 SelectCode(Node); 1613 } 1614 1615 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 1616 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 1617 switch (ConstraintID) { 1618 case InlineAsm::Constraint_m: 1619 // We just support simple memory operands that have a single address 1620 // operand and need no special handling. 1621 OutOps.push_back(Op); 1622 return false; 1623 case InlineAsm::Constraint_A: 1624 OutOps.push_back(Op); 1625 return false; 1626 default: 1627 break; 1628 } 1629 1630 return true; 1631 } 1632 1633 bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) { 1634 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 1635 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1636 return true; 1637 } 1638 return false; 1639 } 1640 1641 bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { 1642 // If this is FrameIndex, select it directly. Otherwise just let it get 1643 // selected to a register independently. 1644 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) 1645 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1646 else 1647 Base = Addr; 1648 return true; 1649 } 1650 1651 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 1652 SDValue &ShAmt) { 1653 // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift 1654 // amount. If there is an AND on the shift amount, we can bypass it if it 1655 // doesn't affect any of those bits. 1656 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 1657 const APInt &AndMask = N->getConstantOperandAPInt(1); 1658 1659 // Since the max shift amount is a power of 2 we can subtract 1 to make a 1660 // mask that covers the bits needed to represent all shift amounts. 1661 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 1662 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 1663 1664 if (ShMask.isSubsetOf(AndMask)) { 1665 ShAmt = N.getOperand(0); 1666 return true; 1667 } 1668 1669 // SimplifyDemandedBits may have optimized the mask so try restoring any 1670 // bits that are known zero. 1671 KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0)); 1672 if (ShMask.isSubsetOf(AndMask | Known.Zero)) { 1673 ShAmt = N.getOperand(0); 1674 return true; 1675 } 1676 } 1677 1678 ShAmt = N; 1679 return true; 1680 } 1681 1682 bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { 1683 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 1684 cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { 1685 Val = N.getOperand(0); 1686 return true; 1687 } 1688 MVT VT = N.getSimpleValueType(); 1689 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { 1690 Val = N; 1691 return true; 1692 } 1693 1694 return false; 1695 } 1696 1697 bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { 1698 if (N.getOpcode() == ISD::AND) { 1699 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1700 if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) { 1701 Val = N.getOperand(0); 1702 return true; 1703 } 1704 } 1705 MVT VT = N.getSimpleValueType(); 1706 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32); 1707 if (CurDAG->MaskedValueIsZero(N, Mask)) { 1708 Val = N; 1709 return true; 1710 } 1711 1712 return false; 1713 } 1714 1715 // Return true if all users of this SDNode* only consume the lower \p Bits. 1716 // This can be used to form W instructions for add/sub/mul/shl even when the 1717 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 1718 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 1719 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 1720 // the add/sub/mul/shl to become non-W instructions. By checking the users we 1721 // may be able to use a W instruction and CSE with the other instruction if 1722 // this has happened. We could try to detect that the CSE opportunity exists 1723 // before doing this, but that would be more complicated. 1724 // TODO: Does this need to look through AND/OR/XOR to their users to find more 1725 // opportunities. 1726 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { 1727 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 1728 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 1729 Node->getOpcode() == ISD::SRL || 1730 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 1731 isa<ConstantSDNode>(Node)) && 1732 "Unexpected opcode"); 1733 1734 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 1735 SDNode *User = *UI; 1736 // Users of this node should have already been instruction selected 1737 if (!User->isMachineOpcode()) 1738 return false; 1739 1740 // TODO: Add more opcodes? 1741 switch (User->getMachineOpcode()) { 1742 default: 1743 return false; 1744 case RISCV::ADDW: 1745 case RISCV::ADDIW: 1746 case RISCV::SUBW: 1747 case RISCV::MULW: 1748 case RISCV::SLLW: 1749 case RISCV::SLLIW: 1750 case RISCV::SRAW: 1751 case RISCV::SRAIW: 1752 case RISCV::SRLW: 1753 case RISCV::SRLIW: 1754 case RISCV::DIVW: 1755 case RISCV::DIVUW: 1756 case RISCV::REMW: 1757 case RISCV::REMUW: 1758 case RISCV::ROLW: 1759 case RISCV::RORW: 1760 case RISCV::RORIW: 1761 case RISCV::CLZW: 1762 case RISCV::CTZW: 1763 case RISCV::CPOPW: 1764 case RISCV::SLLIUW: 1765 case RISCV::FCVT_H_W: 1766 case RISCV::FCVT_H_WU: 1767 case RISCV::FCVT_S_W: 1768 case RISCV::FCVT_S_WU: 1769 case RISCV::FCVT_D_W: 1770 case RISCV::FCVT_D_WU: 1771 if (Bits < 32) 1772 return false; 1773 break; 1774 case RISCV::SLLI: 1775 // SLLI only uses the lower (XLen - ShAmt) bits. 1776 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1)) 1777 return false; 1778 break; 1779 case RISCV::ANDI: 1780 if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1)))) 1781 return false; 1782 break; 1783 case RISCV::SEXTB: 1784 if (Bits < 8) 1785 return false; 1786 break; 1787 case RISCV::SEXTH: 1788 case RISCV::ZEXTH_RV32: 1789 case RISCV::ZEXTH_RV64: 1790 if (Bits < 16) 1791 return false; 1792 break; 1793 case RISCV::ADDUW: 1794 case RISCV::SH1ADDUW: 1795 case RISCV::SH2ADDUW: 1796 case RISCV::SH3ADDUW: 1797 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 1798 // 32 bits. 1799 if (UI.getOperandNo() != 0 || Bits < 32) 1800 return false; 1801 break; 1802 case RISCV::SB: 1803 if (UI.getOperandNo() != 0 || Bits < 8) 1804 return false; 1805 break; 1806 case RISCV::SH: 1807 if (UI.getOperandNo() != 0 || Bits < 16) 1808 return false; 1809 break; 1810 case RISCV::SW: 1811 if (UI.getOperandNo() != 0 || Bits < 32) 1812 return false; 1813 break; 1814 } 1815 } 1816 1817 return true; 1818 } 1819 1820 // Select VL as a 5 bit immediate or a value that will become a register. This 1821 // allows us to choose betwen VSETIVLI or VSETVLI later. 1822 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 1823 auto *C = dyn_cast<ConstantSDNode>(N); 1824 if (C && (isUInt<5>(C->getZExtValue()) || 1825 C->getSExtValue() == RISCV::VLMaxSentinel)) 1826 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 1827 N->getValueType(0)); 1828 else 1829 VL = N; 1830 1831 return true; 1832 } 1833 1834 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 1835 if (N.getOpcode() != ISD::SPLAT_VECTOR && 1836 N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && 1837 N.getOpcode() != RISCVISD::VMV_V_X_VL) 1838 return false; 1839 SplatVal = N.getOperand(0); 1840 return true; 1841 } 1842 1843 using ValidateFn = bool (*)(int64_t); 1844 1845 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, 1846 SelectionDAG &DAG, 1847 const RISCVSubtarget &Subtarget, 1848 ValidateFn ValidateImm) { 1849 if ((N.getOpcode() != ISD::SPLAT_VECTOR && 1850 N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && 1851 N.getOpcode() != RISCVISD::VMV_V_X_VL) || 1852 !isa<ConstantSDNode>(N.getOperand(0))) 1853 return false; 1854 1855 int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue(); 1856 1857 // ISD::SPLAT_VECTOR, RISCVISD::SPLAT_VECTOR_I64 and RISCVISD::VMV_V_X_VL 1858 // share semantics when the operand type is wider than the resulting vector 1859 // element type: an implicit truncation first takes place. Therefore, perform 1860 // a manual truncation/sign-extension in order to ignore any truncated bits 1861 // and catch any zero-extended immediate. 1862 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 1863 // sign-extending to (XLenVT -1). 1864 MVT XLenVT = Subtarget.getXLenVT(); 1865 assert(XLenVT == N.getOperand(0).getSimpleValueType() && 1866 "Unexpected splat operand type"); 1867 MVT EltVT = N.getSimpleValueType().getVectorElementType(); 1868 if (EltVT.bitsLT(XLenVT)) 1869 SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits()); 1870 1871 if (!ValidateImm(SplatImm)) 1872 return false; 1873 1874 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT); 1875 return true; 1876 } 1877 1878 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 1879 return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget, 1880 [](int64_t Imm) { return isInt<5>(Imm); }); 1881 } 1882 1883 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 1884 return selectVSplatSimmHelper( 1885 N, SplatVal, *CurDAG, *Subtarget, 1886 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); 1887 } 1888 1889 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 1890 SDValue &SplatVal) { 1891 return selectVSplatSimmHelper( 1892 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { 1893 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 1894 }); 1895 } 1896 1897 bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { 1898 if ((N.getOpcode() != ISD::SPLAT_VECTOR && 1899 N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && 1900 N.getOpcode() != RISCVISD::VMV_V_X_VL) || 1901 !isa<ConstantSDNode>(N.getOperand(0))) 1902 return false; 1903 1904 int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue(); 1905 1906 if (!isUInt<5>(SplatImm)) 1907 return false; 1908 1909 SplatVal = 1910 CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT()); 1911 1912 return true; 1913 } 1914 1915 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 1916 SDValue &Imm) { 1917 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 1918 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 1919 1920 if (!isInt<5>(ImmVal)) 1921 return false; 1922 1923 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); 1924 return true; 1925 } 1926 1927 return false; 1928 } 1929 1930 // Merge an ADDI into the offset of a load/store instruction where possible. 1931 // (load (addi base, off1), off2) -> (load base, off1+off2) 1932 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) 1933 // This is possible when off1+off2 fits a 12-bit immediate. 1934 bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { 1935 int OffsetOpIdx; 1936 int BaseOpIdx; 1937 1938 // Only attempt this optimisation for I-type loads and S-type stores. 1939 switch (N->getMachineOpcode()) { 1940 default: 1941 return false; 1942 case RISCV::LB: 1943 case RISCV::LH: 1944 case RISCV::LW: 1945 case RISCV::LBU: 1946 case RISCV::LHU: 1947 case RISCV::LWU: 1948 case RISCV::LD: 1949 case RISCV::FLH: 1950 case RISCV::FLW: 1951 case RISCV::FLD: 1952 BaseOpIdx = 0; 1953 OffsetOpIdx = 1; 1954 break; 1955 case RISCV::SB: 1956 case RISCV::SH: 1957 case RISCV::SW: 1958 case RISCV::SD: 1959 case RISCV::FSH: 1960 case RISCV::FSW: 1961 case RISCV::FSD: 1962 BaseOpIdx = 1; 1963 OffsetOpIdx = 2; 1964 break; 1965 } 1966 1967 if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx))) 1968 return false; 1969 1970 SDValue Base = N->getOperand(BaseOpIdx); 1971 1972 // If the base is an ADDI, we can merge it in to the load/store. 1973 if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI) 1974 return false; 1975 1976 SDValue ImmOperand = Base.getOperand(1); 1977 uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); 1978 1979 if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) { 1980 int64_t Offset1 = Const->getSExtValue(); 1981 int64_t CombinedOffset = Offset1 + Offset2; 1982 if (!isInt<12>(CombinedOffset)) 1983 return false; 1984 ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand), 1985 ImmOperand.getValueType()); 1986 } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) { 1987 // If the off1 in (addi base, off1) is a global variable's address (its 1988 // low part, really), then we can rely on the alignment of that variable 1989 // to provide a margin of safety before off1 can overflow the 12 bits. 1990 // Check if off2 falls within that margin; if so off1+off2 can't overflow. 1991 const DataLayout &DL = CurDAG->getDataLayout(); 1992 Align Alignment = GA->getGlobal()->getPointerAlignment(DL); 1993 if (Offset2 != 0 && Alignment <= Offset2) 1994 return false; 1995 int64_t Offset1 = GA->getOffset(); 1996 int64_t CombinedOffset = Offset1 + Offset2; 1997 ImmOperand = CurDAG->getTargetGlobalAddress( 1998 GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), 1999 CombinedOffset, GA->getTargetFlags()); 2000 } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) { 2001 // Ditto. 2002 Align Alignment = CP->getAlign(); 2003 if (Offset2 != 0 && Alignment <= Offset2) 2004 return false; 2005 int64_t Offset1 = CP->getOffset(); 2006 int64_t CombinedOffset = Offset1 + Offset2; 2007 ImmOperand = CurDAG->getTargetConstantPool( 2008 CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(), 2009 CombinedOffset, CP->getTargetFlags()); 2010 } else { 2011 return false; 2012 } 2013 2014 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 2015 LLVM_DEBUG(Base->dump(CurDAG)); 2016 LLVM_DEBUG(dbgs() << "\nN: "); 2017 LLVM_DEBUG(N->dump(CurDAG)); 2018 LLVM_DEBUG(dbgs() << "\n"); 2019 2020 // Modify the offset operand of the load/store. 2021 if (BaseOpIdx == 0) // Load 2022 CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, 2023 N->getOperand(2)); 2024 else // Store 2025 CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), 2026 ImmOperand, N->getOperand(3)); 2027 2028 return true; 2029 } 2030 2031 // Try to remove sext.w if the input is a W instruction or can be made into 2032 // a W instruction cheaply. 2033 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 2034 // Look for the sext.w pattern, addiw rd, rs1, 0. 2035 if (N->getMachineOpcode() != RISCV::ADDIW || 2036 !isNullConstant(N->getOperand(1))) 2037 return false; 2038 2039 SDValue N0 = N->getOperand(0); 2040 if (!N0.isMachineOpcode()) 2041 return false; 2042 2043 switch (N0.getMachineOpcode()) { 2044 default: 2045 break; 2046 case RISCV::ADD: 2047 case RISCV::ADDI: 2048 case RISCV::SUB: 2049 case RISCV::MUL: 2050 case RISCV::SLLI: { 2051 // Convert sext.w+add/sub/mul to their W instructions. This will create 2052 // a new independent instruction. This improves latency. 2053 unsigned Opc; 2054 switch (N0.getMachineOpcode()) { 2055 default: 2056 llvm_unreachable("Unexpected opcode!"); 2057 case RISCV::ADD: Opc = RISCV::ADDW; break; 2058 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 2059 case RISCV::SUB: Opc = RISCV::SUBW; break; 2060 case RISCV::MUL: Opc = RISCV::MULW; break; 2061 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 2062 } 2063 2064 SDValue N00 = N0.getOperand(0); 2065 SDValue N01 = N0.getOperand(1); 2066 2067 // Shift amount needs to be uimm5. 2068 if (N0.getMachineOpcode() == RISCV::SLLI && 2069 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 2070 break; 2071 2072 SDNode *Result = 2073 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 2074 N00, N01); 2075 ReplaceUses(N, Result); 2076 return true; 2077 } 2078 case RISCV::ADDW: 2079 case RISCV::ADDIW: 2080 case RISCV::SUBW: 2081 case RISCV::MULW: 2082 case RISCV::SLLIW: 2083 // Result is already sign extended just remove the sext.w. 2084 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 2085 ReplaceUses(N, N0.getNode()); 2086 return true; 2087 } 2088 2089 return false; 2090 } 2091 2092 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 2093 // for instruction scheduling. 2094 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) { 2095 return new RISCVDAGToDAGISel(TM); 2096 } 2097