1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISCV target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVMCTargetDesc.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCVISelLowering.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/IR/IntrinsicsRISCV.h" 20 #include "llvm/Support/Alignment.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/KnownBits.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "riscv-isel" 29 30 namespace llvm { 31 namespace RISCV { 32 #define GET_RISCVVSSEGTable_IMPL 33 #define GET_RISCVVLSEGTable_IMPL 34 #define GET_RISCVVLXSEGTable_IMPL 35 #define GET_RISCVVSXSEGTable_IMPL 36 #define GET_RISCVVLETable_IMPL 37 #define GET_RISCVVSETable_IMPL 38 #define GET_RISCVVLXTable_IMPL 39 #define GET_RISCVVSXTable_IMPL 40 #define GET_RISCVMaskedPseudosTable_IMPL 41 #include "RISCVGenSearchableTables.inc" 42 } // namespace RISCV 43 } // namespace llvm 44 45 void RISCVDAGToDAGISel::PreprocessISelDAG() { 46 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 47 E = CurDAG->allnodes_end(); 48 I != E;) { 49 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 50 51 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point 52 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. 53 if (N->getOpcode() == ISD::SPLAT_VECTOR) { 54 MVT VT = N->getSimpleValueType(0); 55 unsigned Opc = 56 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; 57 SDLoc DL(N); 58 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); 59 SDValue Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), 60 N->getOperand(0), VL); 61 62 --I; 63 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 64 ++I; 65 CurDAG->DeleteNode(N); 66 continue; 67 } 68 69 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 70 // load. Done after lowering and combining so that we have a chance to 71 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 72 if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) 73 continue; 74 75 assert(N->getNumOperands() == 4 && "Unexpected number of operands"); 76 MVT VT = N->getSimpleValueType(0); 77 SDValue Passthru = N->getOperand(0); 78 SDValue Lo = N->getOperand(1); 79 SDValue Hi = N->getOperand(2); 80 SDValue VL = N->getOperand(3); 81 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 82 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 83 "Unexpected VTs!"); 84 MachineFunction &MF = CurDAG->getMachineFunction(); 85 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 86 SDLoc DL(N); 87 88 // We use the same frame index we use for moving two i32s into 64-bit FPR. 89 // This is an analogous operation. 90 int FI = FuncInfo->getMoveF64FrameIndex(MF); 91 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 92 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); 93 SDValue StackSlot = 94 CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); 95 96 SDValue Chain = CurDAG->getEntryNode(); 97 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 98 99 SDValue OffsetSlot = 100 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); 101 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 102 Align(8)); 103 104 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 105 106 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 107 SDValue IntID = 108 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 109 SDValue Ops[] = {Chain, 110 IntID, 111 Passthru, 112 StackSlot, 113 CurDAG->getRegister(RISCV::X0, MVT::i64), 114 VL}; 115 116 SDValue Result = CurDAG->getMemIntrinsicNode( 117 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8), 118 MachineMemOperand::MOLoad); 119 120 // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the 121 // vlse we created. This will cause general havok on the dag because 122 // anything below the conversion could be folded into other existing nodes. 123 // To avoid invalidating 'I', back it up to the convert node. 124 --I; 125 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 126 127 // Now that we did that, the node is dead. Increment the iterator to the 128 // next node to process, then delete N. 129 ++I; 130 CurDAG->DeleteNode(N); 131 } 132 } 133 134 void RISCVDAGToDAGISel::PostprocessISelDAG() { 135 HandleSDNode Dummy(CurDAG->getRoot()); 136 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 137 138 bool MadeChange = false; 139 while (Position != CurDAG->allnodes_begin()) { 140 SDNode *N = &*--Position; 141 // Skip dead nodes and any non-machine opcodes. 142 if (N->use_empty() || !N->isMachineOpcode()) 143 continue; 144 145 MadeChange |= doPeepholeSExtW(N); 146 MadeChange |= doPeepholeLoadStoreADDI(N); 147 MadeChange |= doPeepholeMaskedRVV(N); 148 } 149 150 CurDAG->setRoot(Dummy.getValue()); 151 152 if (MadeChange) 153 CurDAG->RemoveDeadNodes(); 154 } 155 156 // Returns true if N is a MachineSDNode that has a reg and simm12 memory 157 // operand. The indices of the base pointer and offset are returned in BaseOpIdx 158 // and OffsetOpIdx. 159 static bool hasMemOffset(SDNode *N, unsigned &BaseOpIdx, 160 unsigned &OffsetOpIdx) { 161 switch (N->getMachineOpcode()) { 162 case RISCV::LB: 163 case RISCV::LH: 164 case RISCV::LW: 165 case RISCV::LBU: 166 case RISCV::LHU: 167 case RISCV::LWU: 168 case RISCV::LD: 169 case RISCV::FLH: 170 case RISCV::FLW: 171 case RISCV::FLD: 172 BaseOpIdx = 0; 173 OffsetOpIdx = 1; 174 return true; 175 case RISCV::SB: 176 case RISCV::SH: 177 case RISCV::SW: 178 case RISCV::SD: 179 case RISCV::FSH: 180 case RISCV::FSW: 181 case RISCV::FSD: 182 BaseOpIdx = 1; 183 OffsetOpIdx = 2; 184 return true; 185 } 186 187 return false; 188 } 189 190 static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 191 int64_t Imm, const RISCVSubtarget &Subtarget) { 192 MVT XLenVT = Subtarget.getXLenVT(); 193 RISCVMatInt::InstSeq Seq = 194 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); 195 196 SDNode *Result = nullptr; 197 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); 198 for (RISCVMatInt::Inst &Inst : Seq) { 199 SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT); 200 switch (Inst.getOpndKind()) { 201 case RISCVMatInt::Imm: 202 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SDImm); 203 break; 204 case RISCVMatInt::RegX0: 205 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, 206 CurDAG->getRegister(RISCV::X0, XLenVT)); 207 break; 208 case RISCVMatInt::RegReg: 209 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg); 210 break; 211 case RISCVMatInt::RegImm: 212 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm); 213 break; 214 } 215 216 // Only the first instruction has X0 as its source. 217 SrcReg = SDValue(Result, 0); 218 } 219 220 return Result; 221 } 222 223 static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 224 unsigned RegClassID, unsigned SubReg0) { 225 assert(Regs.size() >= 2 && Regs.size() <= 8); 226 227 SDLoc DL(Regs[0]); 228 SmallVector<SDValue, 8> Ops; 229 230 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); 231 232 for (unsigned I = 0; I < Regs.size(); ++I) { 233 Ops.push_back(Regs[I]); 234 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); 235 } 236 SDNode *N = 237 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 238 return SDValue(N, 0); 239 } 240 241 static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 242 unsigned NF) { 243 static const unsigned RegClassIDs[] = { 244 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, 245 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, 246 RISCV::VRN8M1RegClassID}; 247 248 return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0); 249 } 250 251 static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 252 unsigned NF) { 253 static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID, 254 RISCV::VRN3M2RegClassID, 255 RISCV::VRN4M2RegClassID}; 256 257 return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0); 258 } 259 260 static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 261 unsigned NF) { 262 return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID, 263 RISCV::sub_vrm4_0); 264 } 265 266 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 267 unsigned NF, RISCVII::VLMUL LMUL) { 268 switch (LMUL) { 269 default: 270 llvm_unreachable("Invalid LMUL."); 271 case RISCVII::VLMUL::LMUL_F8: 272 case RISCVII::VLMUL::LMUL_F4: 273 case RISCVII::VLMUL::LMUL_F2: 274 case RISCVII::VLMUL::LMUL_1: 275 return createM1Tuple(CurDAG, Regs, NF); 276 case RISCVII::VLMUL::LMUL_2: 277 return createM2Tuple(CurDAG, Regs, NF); 278 case RISCVII::VLMUL::LMUL_4: 279 return createM4Tuple(CurDAG, Regs, NF); 280 } 281 } 282 283 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 284 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 285 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 286 bool IsLoad, MVT *IndexVT) { 287 SDValue Chain = Node->getOperand(0); 288 SDValue Glue; 289 290 SDValue Base; 291 SelectBaseAddr(Node->getOperand(CurOp++), Base); 292 Operands.push_back(Base); // Base pointer. 293 294 if (IsStridedOrIndexed) { 295 Operands.push_back(Node->getOperand(CurOp++)); // Index. 296 if (IndexVT) 297 *IndexVT = Operands.back()->getSimpleValueType(0); 298 } 299 300 if (IsMasked) { 301 // Mask needs to be copied to V0. 302 SDValue Mask = Node->getOperand(CurOp++); 303 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); 304 Glue = Chain.getValue(1); 305 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); 306 } 307 SDValue VL; 308 selectVLOp(Node->getOperand(CurOp++), VL); 309 Operands.push_back(VL); 310 311 MVT XLenVT = Subtarget->getXLenVT(); 312 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 313 Operands.push_back(SEWOp); 314 315 // Masked load has the tail policy argument. 316 if (IsMasked && IsLoad) { 317 // Policy must be a constant. 318 uint64_t Policy = Node->getConstantOperandVal(CurOp++); 319 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 320 Operands.push_back(PolicyOp); 321 } 322 323 Operands.push_back(Chain); // Chain. 324 if (Glue) 325 Operands.push_back(Glue); 326 } 327 328 static bool isAllUndef(ArrayRef<SDValue> Values) { 329 return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); }); 330 } 331 332 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, 333 bool IsStrided) { 334 SDLoc DL(Node); 335 unsigned NF = Node->getNumValues() - 1; 336 MVT VT = Node->getSimpleValueType(0); 337 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 338 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 339 340 unsigned CurOp = 2; 341 SmallVector<SDValue, 8> Operands; 342 343 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 344 Node->op_begin() + CurOp + NF); 345 bool IsTU = IsMasked || !isAllUndef(Regs); 346 if (IsTU) { 347 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL); 348 Operands.push_back(Merge); 349 } 350 CurOp += NF; 351 352 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 353 Operands, /*IsLoad=*/true); 354 355 const RISCV::VLSEGPseudo *P = 356 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 357 static_cast<unsigned>(LMUL)); 358 MachineSDNode *Load = 359 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 360 361 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 362 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 363 364 SDValue SuperReg = SDValue(Load, 0); 365 for (unsigned I = 0; I < NF; ++I) { 366 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 367 ReplaceUses(SDValue(Node, I), 368 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 369 } 370 371 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 372 CurDAG->RemoveDeadNode(Node); 373 } 374 375 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { 376 SDLoc DL(Node); 377 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. 378 MVT VT = Node->getSimpleValueType(0); 379 MVT XLenVT = Subtarget->getXLenVT(); 380 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 381 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 382 383 unsigned CurOp = 2; 384 SmallVector<SDValue, 7> Operands; 385 386 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 387 Node->op_begin() + CurOp + NF); 388 bool IsTU = IsMasked || !isAllUndef(Regs); 389 if (IsTU) { 390 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 391 Operands.push_back(MaskedOff); 392 } 393 CurOp += NF; 394 395 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 396 /*IsStridedOrIndexed*/ false, Operands, 397 /*IsLoad=*/true); 398 399 const RISCV::VLSEGPseudo *P = 400 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 401 Log2SEW, static_cast<unsigned>(LMUL)); 402 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 403 XLenVT, MVT::Other, Operands); 404 405 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 406 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 407 408 SDValue SuperReg = SDValue(Load, 0); 409 for (unsigned I = 0; I < NF; ++I) { 410 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 411 ReplaceUses(SDValue(Node, I), 412 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 413 } 414 415 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL 416 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain 417 CurDAG->RemoveDeadNode(Node); 418 } 419 420 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, 421 bool IsOrdered) { 422 SDLoc DL(Node); 423 unsigned NF = Node->getNumValues() - 1; 424 MVT VT = Node->getSimpleValueType(0); 425 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 426 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 427 428 unsigned CurOp = 2; 429 SmallVector<SDValue, 8> Operands; 430 431 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 432 Node->op_begin() + CurOp + NF); 433 bool IsTU = IsMasked || !isAllUndef(Regs); 434 if (IsTU) { 435 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 436 Operands.push_back(MaskedOff); 437 } 438 CurOp += NF; 439 440 MVT IndexVT; 441 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 442 /*IsStridedOrIndexed*/ true, Operands, 443 /*IsLoad=*/true, &IndexVT); 444 445 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 446 "Element count mismatch"); 447 448 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 449 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 450 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 451 report_fatal_error("The V extension does not support EEW=64 for index " 452 "values when XLEN=32"); 453 } 454 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 455 NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 456 static_cast<unsigned>(IndexLMUL)); 457 MachineSDNode *Load = 458 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 459 460 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 461 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 462 463 SDValue SuperReg = SDValue(Load, 0); 464 for (unsigned I = 0; I < NF; ++I) { 465 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 466 ReplaceUses(SDValue(Node, I), 467 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 468 } 469 470 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 471 CurDAG->RemoveDeadNode(Node); 472 } 473 474 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, 475 bool IsStrided) { 476 SDLoc DL(Node); 477 unsigned NF = Node->getNumOperands() - 4; 478 if (IsStrided) 479 NF--; 480 if (IsMasked) 481 NF--; 482 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 483 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 484 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 485 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 486 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 487 488 SmallVector<SDValue, 8> Operands; 489 Operands.push_back(StoreVal); 490 unsigned CurOp = 2 + NF; 491 492 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 493 Operands); 494 495 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 496 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 497 MachineSDNode *Store = 498 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 499 500 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 501 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 502 503 ReplaceNode(Node, Store); 504 } 505 506 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, 507 bool IsOrdered) { 508 SDLoc DL(Node); 509 unsigned NF = Node->getNumOperands() - 5; 510 if (IsMasked) 511 --NF; 512 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 513 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 514 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 515 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 516 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 517 518 SmallVector<SDValue, 8> Operands; 519 Operands.push_back(StoreVal); 520 unsigned CurOp = 2 + NF; 521 522 MVT IndexVT; 523 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 524 /*IsStridedOrIndexed*/ true, Operands, 525 /*IsLoad=*/false, &IndexVT); 526 527 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 528 "Element count mismatch"); 529 530 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 531 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 532 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 533 report_fatal_error("The V extension does not support EEW=64 for index " 534 "values when XLEN=32"); 535 } 536 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 537 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 538 static_cast<unsigned>(IndexLMUL)); 539 MachineSDNode *Store = 540 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 541 542 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 543 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 544 545 ReplaceNode(Node, Store); 546 } 547 548 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { 549 if (!Subtarget->hasVInstructions()) 550 return; 551 552 assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN || 553 Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) && 554 "Unexpected opcode"); 555 556 SDLoc DL(Node); 557 MVT XLenVT = Subtarget->getXLenVT(); 558 559 bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN; 560 unsigned IntNoOffset = HasChain ? 1 : 0; 561 unsigned IntNo = Node->getConstantOperandVal(IntNoOffset); 562 563 assert((IntNo == Intrinsic::riscv_vsetvli || 564 IntNo == Intrinsic::riscv_vsetvlimax || 565 IntNo == Intrinsic::riscv_vsetvli_opt || 566 IntNo == Intrinsic::riscv_vsetvlimax_opt) && 567 "Unexpected vsetvli intrinsic"); 568 569 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax || 570 IntNo == Intrinsic::riscv_vsetvlimax_opt; 571 unsigned Offset = IntNoOffset + (VLMax ? 1 : 2); 572 573 assert(Node->getNumOperands() == Offset + 2 && 574 "Unexpected number of operands"); 575 576 unsigned SEW = 577 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 578 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( 579 Node->getConstantOperandVal(Offset + 1) & 0x7); 580 581 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true, 582 /*MaskAgnostic*/ false); 583 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 584 585 SmallVector<EVT, 2> VTs = {XLenVT}; 586 if (HasChain) 587 VTs.push_back(MVT::Other); 588 589 SDValue VLOperand; 590 unsigned Opcode = RISCV::PseudoVSETVLI; 591 if (VLMax) { 592 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 593 Opcode = RISCV::PseudoVSETVLIX0; 594 } else { 595 VLOperand = Node->getOperand(IntNoOffset + 1); 596 597 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 598 uint64_t AVL = C->getZExtValue(); 599 if (isUInt<5>(AVL)) { 600 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 601 SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp}; 602 if (HasChain) 603 Ops.push_back(Node->getOperand(0)); 604 ReplaceNode( 605 Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops)); 606 return; 607 } 608 } 609 } 610 611 SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp}; 612 if (HasChain) 613 Ops.push_back(Node->getOperand(0)); 614 615 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops)); 616 } 617 618 void RISCVDAGToDAGISel::Select(SDNode *Node) { 619 // If we have a custom node, we have already selected. 620 if (Node->isMachineOpcode()) { 621 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 622 Node->setNodeId(-1); 623 return; 624 } 625 626 // Instruction Selection not handled by the auto-generated tablegen selection 627 // should be handled here. 628 unsigned Opcode = Node->getOpcode(); 629 MVT XLenVT = Subtarget->getXLenVT(); 630 SDLoc DL(Node); 631 MVT VT = Node->getSimpleValueType(0); 632 633 switch (Opcode) { 634 case ISD::Constant: { 635 auto *ConstNode = cast<ConstantSDNode>(Node); 636 if (VT == XLenVT && ConstNode->isZero()) { 637 SDValue New = 638 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT); 639 ReplaceNode(Node, New.getNode()); 640 return; 641 } 642 int64_t Imm = ConstNode->getSExtValue(); 643 // If the upper XLen-16 bits are not used, try to convert this to a simm12 644 // by sign extending bit 15. 645 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) && 646 hasAllHUsers(Node)) 647 Imm = SignExtend64<16>(Imm); 648 // If the upper 32-bits are not used try to convert this into a simm32 by 649 // sign extending bit 32. 650 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 651 Imm = SignExtend64<32>(Imm); 652 653 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); 654 return; 655 } 656 case ISD::ADD: { 657 // Try to select ADD + immediate used as memory addresses to 658 // (ADDI (ADD X, Imm-Lo12), Lo12) if it will allow the ADDI to be removed by 659 // doPeepholeLoadStoreADDI. 660 661 // LHS should be an immediate. 662 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 663 if (!N1C) 664 break; 665 666 int64_t Offset = N1C->getSExtValue(); 667 int64_t Lo12 = SignExtend64<12>(Offset); 668 669 // Don't do this if the lower 12 bits are 0 or we could use ADDI directly. 670 if (Lo12 == 0 || isInt<12>(Offset)) 671 break; 672 673 // Don't do this if we can use a pair of ADDIs. 674 if (isInt<12>(Offset / 2) && isInt<12>(Offset - Offset / 2)) 675 break; 676 677 bool AllPointerUses = true; 678 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 679 SDNode *User = *UI; 680 681 // Is this user a memory instruction that uses a register and immediate 682 // that has this ADD as its pointer. 683 unsigned BaseOpIdx, OffsetOpIdx; 684 if (!User->isMachineOpcode() || 685 !hasMemOffset(User, BaseOpIdx, OffsetOpIdx) || 686 UI.getOperandNo() != BaseOpIdx) { 687 AllPointerUses = false; 688 break; 689 } 690 691 // If the memory instruction already has an offset, make sure the combined 692 // offset is foldable. 693 int64_t MemOffs = 694 cast<ConstantSDNode>(User->getOperand(OffsetOpIdx))->getSExtValue(); 695 MemOffs += Lo12; 696 if (!isInt<12>(MemOffs)) { 697 AllPointerUses = false; 698 break; 699 } 700 } 701 702 if (!AllPointerUses) 703 break; 704 705 Offset -= Lo12; 706 // Restore sign bits for RV32. 707 if (!Subtarget->is64Bit()) 708 Offset = SignExtend64<32>(Offset); 709 710 // Emit (ADDI (ADD X, Hi), Lo) 711 SDNode *Imm = selectImm(CurDAG, DL, VT, Offset, *Subtarget); 712 SDNode *ADD = CurDAG->getMachineNode(RISCV::ADD, DL, VT, 713 Node->getOperand(0), SDValue(Imm, 0)); 714 SDNode *ADDI = 715 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, SDValue(ADD, 0), 716 CurDAG->getTargetConstant(Lo12, DL, VT)); 717 ReplaceNode(Node, ADDI); 718 return; 719 } 720 case ISD::SRL: { 721 // Optimize (srl (and X, C2), C) -> 722 // (srli (slli X, (XLen-C3), (XLen-C3) + C) 723 // Where C2 is a mask with C3 trailing ones. 724 // Taking into account that the C2 may have had lower bits unset by 725 // SimplifyDemandedBits. This avoids materializing the C2 immediate. 726 // This pattern occurs when type legalizing right shifts for types with 727 // less than XLen bits. 728 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 729 if (!N1C) 730 break; 731 SDValue N0 = Node->getOperand(0); 732 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 733 !isa<ConstantSDNode>(N0.getOperand(1))) 734 break; 735 unsigned ShAmt = N1C->getZExtValue(); 736 uint64_t Mask = N0.getConstantOperandVal(1); 737 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 738 if (!isMask_64(Mask)) 739 break; 740 unsigned TrailingOnes = countTrailingOnes(Mask); 741 // 32 trailing ones should use srliw via tablegen pattern. 742 if (TrailingOnes == 32 || ShAmt >= TrailingOnes) 743 break; 744 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; 745 SDNode *SLLI = 746 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 747 CurDAG->getTargetConstant(LShAmt, DL, VT)); 748 SDNode *SRLI = CurDAG->getMachineNode( 749 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 750 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 751 ReplaceNode(Node, SRLI); 752 return; 753 } 754 case ISD::SRA: { 755 // Optimize (sra (sext_inreg X, i16), C) -> 756 // (srai (slli X, (XLen-16), (XLen-16) + C) 757 // And (sra (sext_inreg X, i8), C) -> 758 // (srai (slli X, (XLen-8), (XLen-8) + C) 759 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. 760 // This transform matches the code we get without Zbb. The shifts are more 761 // compressible, and this can help expose CSE opportunities in the sdiv by 762 // constant optimization. 763 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 764 if (!N1C) 765 break; 766 SDValue N0 = Node->getOperand(0); 767 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) 768 break; 769 unsigned ShAmt = N1C->getZExtValue(); 770 unsigned ExtSize = 771 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 772 // ExtSize of 32 should use sraiw via tablegen pattern. 773 if (ExtSize >= 32 || ShAmt >= ExtSize) 774 break; 775 unsigned LShAmt = Subtarget->getXLen() - ExtSize; 776 SDNode *SLLI = 777 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 778 CurDAG->getTargetConstant(LShAmt, DL, VT)); 779 SDNode *SRAI = CurDAG->getMachineNode( 780 RISCV::SRAI, DL, VT, SDValue(SLLI, 0), 781 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 782 ReplaceNode(Node, SRAI); 783 return; 784 } 785 case ISD::AND: { 786 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 787 if (!N1C) 788 break; 789 790 SDValue N0 = Node->getOperand(0); 791 792 bool LeftShift = N0.getOpcode() == ISD::SHL; 793 if (!LeftShift && N0.getOpcode() != ISD::SRL) 794 break; 795 796 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 797 if (!C) 798 break; 799 uint64_t C2 = C->getZExtValue(); 800 unsigned XLen = Subtarget->getXLen(); 801 if (!C2 || C2 >= XLen) 802 break; 803 804 uint64_t C1 = N1C->getZExtValue(); 805 806 // Keep track of whether this is a c.andi. If we can't use c.andi, the 807 // shift pair might offer more compression opportunities. 808 // TODO: We could check for C extension here, but we don't have many lit 809 // tests with the C extension enabled so not checking gets better coverage. 810 // TODO: What if ANDI faster than shift? 811 bool IsCANDI = isInt<6>(N1C->getSExtValue()); 812 813 // Clear irrelevant bits in the mask. 814 if (LeftShift) 815 C1 &= maskTrailingZeros<uint64_t>(C2); 816 else 817 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 818 819 // Some transforms should only be done if the shift has a single use or 820 // the AND would become (srli (slli X, 32), 32) 821 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 822 823 SDValue X = N0.getOperand(0); 824 825 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 826 // with c3 leading zeros. 827 if (!LeftShift && isMask_64(C1)) { 828 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 829 if (C2 < C3) { 830 // If the number of leading zeros is C2+32 this can be SRLIW. 831 if (C2 + 32 == C3) { 832 SDNode *SRLIW = 833 CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X, 834 CurDAG->getTargetConstant(C2, DL, XLenVT)); 835 ReplaceNode(Node, SRLIW); 836 return; 837 } 838 839 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if 840 // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 841 // 842 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 843 // legalized and goes through DAG combine. 844 if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() && 845 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 846 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { 847 SDNode *SRAIW = 848 CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, X.getOperand(0), 849 CurDAG->getTargetConstant(31, DL, XLenVT)); 850 SDNode *SRLIW = CurDAG->getMachineNode( 851 RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0), 852 CurDAG->getTargetConstant(C3 - 32, DL, XLenVT)); 853 ReplaceNode(Node, SRLIW); 854 return; 855 } 856 857 // (srli (slli x, c3-c2), c3). 858 // Skip if we could use (zext.w (sraiw X, C2)). 859 bool Skip = Subtarget->hasStdExtZba() && C3 == 32 && 860 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 861 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32; 862 // Also Skip if we can use bexti. 863 Skip |= Subtarget->hasStdExtZbs() && C3 == XLen - 1; 864 if (OneUseOrZExtW && !Skip) { 865 SDNode *SLLI = CurDAG->getMachineNode( 866 RISCV::SLLI, DL, XLenVT, X, 867 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 868 SDNode *SRLI = 869 CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), 870 CurDAG->getTargetConstant(C3, DL, XLenVT)); 871 ReplaceNode(Node, SRLI); 872 return; 873 } 874 } 875 } 876 877 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 878 // shifted by c2 bits with c3 leading zeros. 879 if (LeftShift && isShiftedMask_64(C1)) { 880 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 881 882 if (C2 + C3 < XLen && 883 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) { 884 // Use slli.uw when possible. 885 if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) { 886 SDNode *SLLI_UW = 887 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, XLenVT, X, 888 CurDAG->getTargetConstant(C2, DL, XLenVT)); 889 ReplaceNode(Node, SLLI_UW); 890 return; 891 } 892 893 // (srli (slli c2+c3), c3) 894 if (OneUseOrZExtW && !IsCANDI) { 895 SDNode *SLLI = CurDAG->getMachineNode( 896 RISCV::SLLI, DL, XLenVT, X, 897 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 898 SDNode *SRLI = 899 CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), 900 CurDAG->getTargetConstant(C3, DL, XLenVT)); 901 ReplaceNode(Node, SRLI); 902 return; 903 } 904 } 905 } 906 907 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 908 // shifted mask with c2 leading zeros and c3 trailing zeros. 909 if (!LeftShift && isShiftedMask_64(C1)) { 910 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 911 uint64_t C3 = countTrailingZeros(C1); 912 if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsCANDI) { 913 unsigned SrliOpc = RISCV::SRLI; 914 // If the input is zexti32 we should use SRLIW. 915 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) && 916 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) { 917 SrliOpc = RISCV::SRLIW; 918 X = X.getOperand(0); 919 } 920 SDNode *SRLI = CurDAG->getMachineNode( 921 SrliOpc, DL, XLenVT, X, 922 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 923 SDNode *SLLI = 924 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), 925 CurDAG->getTargetConstant(C3, DL, XLenVT)); 926 ReplaceNode(Node, SLLI); 927 return; 928 } 929 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 930 if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 && 931 OneUseOrZExtW && !IsCANDI) { 932 SDNode *SRLIW = CurDAG->getMachineNode( 933 RISCV::SRLIW, DL, XLenVT, X, 934 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 935 SDNode *SLLI = 936 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), 937 CurDAG->getTargetConstant(C3, DL, XLenVT)); 938 ReplaceNode(Node, SLLI); 939 return; 940 } 941 } 942 943 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 944 // shifted mask with no leading zeros and c3 trailing zeros. 945 if (LeftShift && isShiftedMask_64(C1)) { 946 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 947 uint64_t C3 = countTrailingZeros(C1); 948 if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsCANDI) { 949 SDNode *SRLI = CurDAG->getMachineNode( 950 RISCV::SRLI, DL, XLenVT, X, 951 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 952 SDNode *SLLI = 953 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), 954 CurDAG->getTargetConstant(C3, DL, XLenVT)); 955 ReplaceNode(Node, SLLI); 956 return; 957 } 958 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 959 if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { 960 SDNode *SRLIW = CurDAG->getMachineNode( 961 RISCV::SRLIW, DL, XLenVT, X, 962 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 963 SDNode *SLLI = 964 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), 965 CurDAG->getTargetConstant(C3, DL, XLenVT)); 966 ReplaceNode(Node, SLLI); 967 return; 968 } 969 } 970 971 break; 972 } 973 case ISD::MUL: { 974 // Special case for calculating (mul (and X, C2), C1) where the full product 975 // fits in XLen bits. We can shift X left by the number of leading zeros in 976 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 977 // product has XLen trailing zeros, putting it in the output of MULHU. This 978 // can avoid materializing a constant in a register for C2. 979 980 // RHS should be a constant. 981 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 982 if (!N1C || !N1C->hasOneUse()) 983 break; 984 985 // LHS should be an AND with constant. 986 SDValue N0 = Node->getOperand(0); 987 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 988 break; 989 990 uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 991 992 // Constant should be a mask. 993 if (!isMask_64(C2)) 994 break; 995 996 // This should be the only use of the AND unless we will use 997 // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND 998 // constants. 999 if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF)) 1000 break; 1001 1002 // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this 1003 // optimization. 1004 if (isInt<12>(C2) || 1005 (C2 == UINT64_C(0xFFFF) && 1006 (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) || 1007 (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())) 1008 break; 1009 1010 // We need to shift left the AND input and C1 by a total of XLen bits. 1011 1012 // How far left do we need to shift the AND input? 1013 unsigned XLen = Subtarget->getXLen(); 1014 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2)); 1015 1016 // The constant gets shifted by the remaining amount unless that would 1017 // shift bits out. 1018 uint64_t C1 = N1C->getZExtValue(); 1019 unsigned ConstantShift = XLen - LeadingZeros; 1020 if (ConstantShift > (XLen - (64 - countLeadingZeros(C1)))) 1021 break; 1022 1023 uint64_t ShiftedC1 = C1 << ConstantShift; 1024 // If this RV32, we need to sign extend the constant. 1025 if (XLen == 32) 1026 ShiftedC1 = SignExtend64<32>(ShiftedC1); 1027 1028 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 1029 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); 1030 SDNode *SLLI = 1031 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 1032 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 1033 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 1034 SDValue(SLLI, 0), SDValue(Imm, 0)); 1035 ReplaceNode(Node, MULHU); 1036 return; 1037 } 1038 case ISD::INTRINSIC_WO_CHAIN: { 1039 unsigned IntNo = Node->getConstantOperandVal(0); 1040 switch (IntNo) { 1041 // By default we do not custom select any intrinsic. 1042 default: 1043 break; 1044 case Intrinsic::riscv_vmsgeu: 1045 case Intrinsic::riscv_vmsge: { 1046 SDValue Src1 = Node->getOperand(1); 1047 SDValue Src2 = Node->getOperand(2); 1048 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 1049 bool IsCmpUnsignedZero = false; 1050 // Only custom select scalar second operand. 1051 if (Src2.getValueType() != XLenVT) 1052 break; 1053 // Small constants are handled with patterns. 1054 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1055 int64_t CVal = C->getSExtValue(); 1056 if (CVal >= -15 && CVal <= 16) { 1057 if (!IsUnsigned || CVal != 0) 1058 break; 1059 IsCmpUnsignedZero = true; 1060 } 1061 } 1062 MVT Src1VT = Src1.getSimpleValueType(); 1063 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; 1064 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1065 default: 1066 llvm_unreachable("Unexpected LMUL!"); 1067 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 1068 case RISCVII::VLMUL::lmulenum: \ 1069 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1070 : RISCV::PseudoVMSLT_VX_##suffix; \ 1071 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 1072 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 1073 break; 1074 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1) 1075 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2) 1076 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4) 1077 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8) 1078 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16) 1079 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32) 1080 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64) 1081 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES 1082 } 1083 SDValue SEW = CurDAG->getTargetConstant( 1084 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1085 SDValue VL; 1086 selectVLOp(Node->getOperand(3), VL); 1087 1088 // If vmsgeu with 0 immediate, expand it to vmset. 1089 if (IsCmpUnsignedZero) { 1090 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW)); 1091 return; 1092 } 1093 1094 // Expand to 1095 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 1096 SDValue Cmp = SDValue( 1097 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1098 0); 1099 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 1100 {Cmp, Cmp, VL, SEW})); 1101 return; 1102 } 1103 case Intrinsic::riscv_vmsgeu_mask: 1104 case Intrinsic::riscv_vmsge_mask: { 1105 SDValue Src1 = Node->getOperand(2); 1106 SDValue Src2 = Node->getOperand(3); 1107 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 1108 bool IsCmpUnsignedZero = false; 1109 // Only custom select scalar second operand. 1110 if (Src2.getValueType() != XLenVT) 1111 break; 1112 // Small constants are handled with patterns. 1113 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1114 int64_t CVal = C->getSExtValue(); 1115 if (CVal >= -15 && CVal <= 16) { 1116 if (!IsUnsigned || CVal != 0) 1117 break; 1118 IsCmpUnsignedZero = true; 1119 } 1120 } 1121 MVT Src1VT = Src1.getSimpleValueType(); 1122 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, 1123 VMOROpcode; 1124 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1125 default: 1126 llvm_unreachable("Unexpected LMUL!"); 1127 #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \ 1128 case RISCVII::VLMUL::lmulenum: \ 1129 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1130 : RISCV::PseudoVMSLT_VX_##suffix; \ 1131 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 1132 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 1133 break; 1134 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1) 1135 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2) 1136 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4) 1137 CASE_VMSLT_OPCODES(LMUL_1, M1, B8) 1138 CASE_VMSLT_OPCODES(LMUL_2, M2, B16) 1139 CASE_VMSLT_OPCODES(LMUL_4, M4, B32) 1140 CASE_VMSLT_OPCODES(LMUL_8, M8, B64) 1141 #undef CASE_VMSLT_OPCODES 1142 } 1143 // Mask operations use the LMUL from the mask type. 1144 switch (RISCVTargetLowering::getLMUL(VT)) { 1145 default: 1146 llvm_unreachable("Unexpected LMUL!"); 1147 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ 1148 case RISCVII::VLMUL::lmulenum: \ 1149 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 1150 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 1151 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ 1152 break; 1153 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8) 1154 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4) 1155 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2) 1156 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1) 1157 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2) 1158 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4) 1159 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8) 1160 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES 1161 } 1162 SDValue SEW = CurDAG->getTargetConstant( 1163 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1164 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 1165 SDValue VL; 1166 selectVLOp(Node->getOperand(5), VL); 1167 SDValue MaskedOff = Node->getOperand(1); 1168 SDValue Mask = Node->getOperand(4); 1169 1170 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff. 1171 if (IsCmpUnsignedZero) { 1172 // We don't need vmor if the MaskedOff and the Mask are the same 1173 // value. 1174 if (Mask == MaskedOff) { 1175 ReplaceUses(Node, Mask.getNode()); 1176 return; 1177 } 1178 ReplaceNode(Node, 1179 CurDAG->getMachineNode(VMOROpcode, DL, VT, 1180 {Mask, MaskedOff, VL, MaskSEW})); 1181 return; 1182 } 1183 1184 // If the MaskedOff value and the Mask are the same value use 1185 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 1186 // This avoids needing to copy v0 to vd before starting the next sequence. 1187 if (Mask == MaskedOff) { 1188 SDValue Cmp = SDValue( 1189 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1190 0); 1191 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 1192 {Mask, Cmp, VL, MaskSEW})); 1193 return; 1194 } 1195 1196 // Mask needs to be copied to V0. 1197 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1198 RISCV::V0, Mask, SDValue()); 1199 SDValue Glue = Chain.getValue(1); 1200 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); 1201 1202 // Otherwise use 1203 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 1204 // The result is mask undisturbed. 1205 // We use the same instructions to emulate mask agnostic behavior, because 1206 // the agnostic result can be either undisturbed or all 1. 1207 SDValue Cmp = SDValue( 1208 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 1209 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 1210 0); 1211 // vmxor.mm vd, vd, v0 is used to update active value. 1212 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 1213 {Cmp, Mask, VL, MaskSEW})); 1214 return; 1215 } 1216 case Intrinsic::riscv_vsetvli_opt: 1217 case Intrinsic::riscv_vsetvlimax_opt: 1218 return selectVSETVLI(Node); 1219 } 1220 break; 1221 } 1222 case ISD::INTRINSIC_W_CHAIN: { 1223 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1224 switch (IntNo) { 1225 // By default we do not custom select any intrinsic. 1226 default: 1227 break; 1228 case Intrinsic::riscv_vsetvli: 1229 case Intrinsic::riscv_vsetvlimax: 1230 return selectVSETVLI(Node); 1231 case Intrinsic::riscv_vlseg2: 1232 case Intrinsic::riscv_vlseg3: 1233 case Intrinsic::riscv_vlseg4: 1234 case Intrinsic::riscv_vlseg5: 1235 case Intrinsic::riscv_vlseg6: 1236 case Intrinsic::riscv_vlseg7: 1237 case Intrinsic::riscv_vlseg8: { 1238 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1239 return; 1240 } 1241 case Intrinsic::riscv_vlseg2_mask: 1242 case Intrinsic::riscv_vlseg3_mask: 1243 case Intrinsic::riscv_vlseg4_mask: 1244 case Intrinsic::riscv_vlseg5_mask: 1245 case Intrinsic::riscv_vlseg6_mask: 1246 case Intrinsic::riscv_vlseg7_mask: 1247 case Intrinsic::riscv_vlseg8_mask: { 1248 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1249 return; 1250 } 1251 case Intrinsic::riscv_vlsseg2: 1252 case Intrinsic::riscv_vlsseg3: 1253 case Intrinsic::riscv_vlsseg4: 1254 case Intrinsic::riscv_vlsseg5: 1255 case Intrinsic::riscv_vlsseg6: 1256 case Intrinsic::riscv_vlsseg7: 1257 case Intrinsic::riscv_vlsseg8: { 1258 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1259 return; 1260 } 1261 case Intrinsic::riscv_vlsseg2_mask: 1262 case Intrinsic::riscv_vlsseg3_mask: 1263 case Intrinsic::riscv_vlsseg4_mask: 1264 case Intrinsic::riscv_vlsseg5_mask: 1265 case Intrinsic::riscv_vlsseg6_mask: 1266 case Intrinsic::riscv_vlsseg7_mask: 1267 case Intrinsic::riscv_vlsseg8_mask: { 1268 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1269 return; 1270 } 1271 case Intrinsic::riscv_vloxseg2: 1272 case Intrinsic::riscv_vloxseg3: 1273 case Intrinsic::riscv_vloxseg4: 1274 case Intrinsic::riscv_vloxseg5: 1275 case Intrinsic::riscv_vloxseg6: 1276 case Intrinsic::riscv_vloxseg7: 1277 case Intrinsic::riscv_vloxseg8: 1278 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1279 return; 1280 case Intrinsic::riscv_vluxseg2: 1281 case Intrinsic::riscv_vluxseg3: 1282 case Intrinsic::riscv_vluxseg4: 1283 case Intrinsic::riscv_vluxseg5: 1284 case Intrinsic::riscv_vluxseg6: 1285 case Intrinsic::riscv_vluxseg7: 1286 case Intrinsic::riscv_vluxseg8: 1287 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1288 return; 1289 case Intrinsic::riscv_vloxseg2_mask: 1290 case Intrinsic::riscv_vloxseg3_mask: 1291 case Intrinsic::riscv_vloxseg4_mask: 1292 case Intrinsic::riscv_vloxseg5_mask: 1293 case Intrinsic::riscv_vloxseg6_mask: 1294 case Intrinsic::riscv_vloxseg7_mask: 1295 case Intrinsic::riscv_vloxseg8_mask: 1296 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1297 return; 1298 case Intrinsic::riscv_vluxseg2_mask: 1299 case Intrinsic::riscv_vluxseg3_mask: 1300 case Intrinsic::riscv_vluxseg4_mask: 1301 case Intrinsic::riscv_vluxseg5_mask: 1302 case Intrinsic::riscv_vluxseg6_mask: 1303 case Intrinsic::riscv_vluxseg7_mask: 1304 case Intrinsic::riscv_vluxseg8_mask: 1305 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1306 return; 1307 case Intrinsic::riscv_vlseg8ff: 1308 case Intrinsic::riscv_vlseg7ff: 1309 case Intrinsic::riscv_vlseg6ff: 1310 case Intrinsic::riscv_vlseg5ff: 1311 case Intrinsic::riscv_vlseg4ff: 1312 case Intrinsic::riscv_vlseg3ff: 1313 case Intrinsic::riscv_vlseg2ff: { 1314 selectVLSEGFF(Node, /*IsMasked*/ false); 1315 return; 1316 } 1317 case Intrinsic::riscv_vlseg8ff_mask: 1318 case Intrinsic::riscv_vlseg7ff_mask: 1319 case Intrinsic::riscv_vlseg6ff_mask: 1320 case Intrinsic::riscv_vlseg5ff_mask: 1321 case Intrinsic::riscv_vlseg4ff_mask: 1322 case Intrinsic::riscv_vlseg3ff_mask: 1323 case Intrinsic::riscv_vlseg2ff_mask: { 1324 selectVLSEGFF(Node, /*IsMasked*/ true); 1325 return; 1326 } 1327 case Intrinsic::riscv_vloxei: 1328 case Intrinsic::riscv_vloxei_mask: 1329 case Intrinsic::riscv_vluxei: 1330 case Intrinsic::riscv_vluxei_mask: { 1331 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 1332 IntNo == Intrinsic::riscv_vluxei_mask; 1333 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 1334 IntNo == Intrinsic::riscv_vloxei_mask; 1335 1336 MVT VT = Node->getSimpleValueType(0); 1337 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1338 1339 unsigned CurOp = 2; 1340 // Masked intrinsic only have TU version pseduo instructions. 1341 bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); 1342 SmallVector<SDValue, 8> Operands; 1343 if (IsTU) 1344 Operands.push_back(Node->getOperand(CurOp++)); 1345 else 1346 // Skip the undef passthru operand for nomask TA version pseudo 1347 CurOp++; 1348 1349 MVT IndexVT; 1350 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1351 /*IsStridedOrIndexed*/ true, Operands, 1352 /*IsLoad=*/true, &IndexVT); 1353 1354 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1355 "Element count mismatch"); 1356 1357 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1358 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1359 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1360 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1361 report_fatal_error("The V extension does not support EEW=64 for index " 1362 "values when XLEN=32"); 1363 } 1364 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 1365 IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1366 static_cast<unsigned>(IndexLMUL)); 1367 MachineSDNode *Load = 1368 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1369 1370 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1371 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1372 1373 ReplaceNode(Node, Load); 1374 return; 1375 } 1376 case Intrinsic::riscv_vlm: 1377 case Intrinsic::riscv_vle: 1378 case Intrinsic::riscv_vle_mask: 1379 case Intrinsic::riscv_vlse: 1380 case Intrinsic::riscv_vlse_mask: { 1381 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 1382 IntNo == Intrinsic::riscv_vlse_mask; 1383 bool IsStrided = 1384 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 1385 1386 MVT VT = Node->getSimpleValueType(0); 1387 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1388 1389 unsigned CurOp = 2; 1390 // The riscv_vlm intrinsic are always tail agnostic and no passthru operand. 1391 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; 1392 // Masked intrinsic only have TU version pseduo instructions. 1393 bool IsTU = 1394 HasPassthruOperand && 1395 ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked); 1396 SmallVector<SDValue, 8> Operands; 1397 if (IsTU) 1398 Operands.push_back(Node->getOperand(CurOp++)); 1399 else if (HasPassthruOperand) 1400 // Skip the undef passthru operand for nomask TA version pseudo 1401 CurOp++; 1402 1403 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1404 Operands, /*IsLoad=*/true); 1405 1406 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1407 const RISCV::VLEPseudo *P = 1408 RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 1409 static_cast<unsigned>(LMUL)); 1410 MachineSDNode *Load = 1411 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1412 1413 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1414 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1415 1416 ReplaceNode(Node, Load); 1417 return; 1418 } 1419 case Intrinsic::riscv_vleff: 1420 case Intrinsic::riscv_vleff_mask: { 1421 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 1422 1423 MVT VT = Node->getSimpleValueType(0); 1424 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1425 1426 unsigned CurOp = 2; 1427 // Masked intrinsic only have TU version pseduo instructions. 1428 bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); 1429 SmallVector<SDValue, 7> Operands; 1430 if (IsTU) 1431 Operands.push_back(Node->getOperand(CurOp++)); 1432 else 1433 // Skip the undef passthru operand for nomask TA version pseudo 1434 CurOp++; 1435 1436 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1437 /*IsStridedOrIndexed*/ false, Operands, 1438 /*IsLoad=*/true); 1439 1440 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1441 const RISCV::VLEPseudo *P = 1442 RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 1443 Log2SEW, static_cast<unsigned>(LMUL)); 1444 MachineSDNode *Load = CurDAG->getMachineNode( 1445 P->Pseudo, DL, Node->getValueType(0), XLenVT, MVT::Other, Operands); 1446 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1447 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1448 1449 ReplaceNode(Node, Load); 1450 return; 1451 } 1452 } 1453 break; 1454 } 1455 case ISD::INTRINSIC_VOID: { 1456 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1457 switch (IntNo) { 1458 case Intrinsic::riscv_vsseg2: 1459 case Intrinsic::riscv_vsseg3: 1460 case Intrinsic::riscv_vsseg4: 1461 case Intrinsic::riscv_vsseg5: 1462 case Intrinsic::riscv_vsseg6: 1463 case Intrinsic::riscv_vsseg7: 1464 case Intrinsic::riscv_vsseg8: { 1465 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1466 return; 1467 } 1468 case Intrinsic::riscv_vsseg2_mask: 1469 case Intrinsic::riscv_vsseg3_mask: 1470 case Intrinsic::riscv_vsseg4_mask: 1471 case Intrinsic::riscv_vsseg5_mask: 1472 case Intrinsic::riscv_vsseg6_mask: 1473 case Intrinsic::riscv_vsseg7_mask: 1474 case Intrinsic::riscv_vsseg8_mask: { 1475 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1476 return; 1477 } 1478 case Intrinsic::riscv_vssseg2: 1479 case Intrinsic::riscv_vssseg3: 1480 case Intrinsic::riscv_vssseg4: 1481 case Intrinsic::riscv_vssseg5: 1482 case Intrinsic::riscv_vssseg6: 1483 case Intrinsic::riscv_vssseg7: 1484 case Intrinsic::riscv_vssseg8: { 1485 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1486 return; 1487 } 1488 case Intrinsic::riscv_vssseg2_mask: 1489 case Intrinsic::riscv_vssseg3_mask: 1490 case Intrinsic::riscv_vssseg4_mask: 1491 case Intrinsic::riscv_vssseg5_mask: 1492 case Intrinsic::riscv_vssseg6_mask: 1493 case Intrinsic::riscv_vssseg7_mask: 1494 case Intrinsic::riscv_vssseg8_mask: { 1495 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1496 return; 1497 } 1498 case Intrinsic::riscv_vsoxseg2: 1499 case Intrinsic::riscv_vsoxseg3: 1500 case Intrinsic::riscv_vsoxseg4: 1501 case Intrinsic::riscv_vsoxseg5: 1502 case Intrinsic::riscv_vsoxseg6: 1503 case Intrinsic::riscv_vsoxseg7: 1504 case Intrinsic::riscv_vsoxseg8: 1505 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1506 return; 1507 case Intrinsic::riscv_vsuxseg2: 1508 case Intrinsic::riscv_vsuxseg3: 1509 case Intrinsic::riscv_vsuxseg4: 1510 case Intrinsic::riscv_vsuxseg5: 1511 case Intrinsic::riscv_vsuxseg6: 1512 case Intrinsic::riscv_vsuxseg7: 1513 case Intrinsic::riscv_vsuxseg8: 1514 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1515 return; 1516 case Intrinsic::riscv_vsoxseg2_mask: 1517 case Intrinsic::riscv_vsoxseg3_mask: 1518 case Intrinsic::riscv_vsoxseg4_mask: 1519 case Intrinsic::riscv_vsoxseg5_mask: 1520 case Intrinsic::riscv_vsoxseg6_mask: 1521 case Intrinsic::riscv_vsoxseg7_mask: 1522 case Intrinsic::riscv_vsoxseg8_mask: 1523 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1524 return; 1525 case Intrinsic::riscv_vsuxseg2_mask: 1526 case Intrinsic::riscv_vsuxseg3_mask: 1527 case Intrinsic::riscv_vsuxseg4_mask: 1528 case Intrinsic::riscv_vsuxseg5_mask: 1529 case Intrinsic::riscv_vsuxseg6_mask: 1530 case Intrinsic::riscv_vsuxseg7_mask: 1531 case Intrinsic::riscv_vsuxseg8_mask: 1532 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1533 return; 1534 case Intrinsic::riscv_vsoxei: 1535 case Intrinsic::riscv_vsoxei_mask: 1536 case Intrinsic::riscv_vsuxei: 1537 case Intrinsic::riscv_vsuxei_mask: { 1538 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 1539 IntNo == Intrinsic::riscv_vsuxei_mask; 1540 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 1541 IntNo == Intrinsic::riscv_vsoxei_mask; 1542 1543 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1544 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1545 1546 unsigned CurOp = 2; 1547 SmallVector<SDValue, 8> Operands; 1548 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1549 1550 MVT IndexVT; 1551 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1552 /*IsStridedOrIndexed*/ true, Operands, 1553 /*IsLoad=*/false, &IndexVT); 1554 1555 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1556 "Element count mismatch"); 1557 1558 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1559 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1560 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1561 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1562 report_fatal_error("The V extension does not support EEW=64 for index " 1563 "values when XLEN=32"); 1564 } 1565 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 1566 IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW, 1567 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); 1568 MachineSDNode *Store = 1569 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1570 1571 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1572 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1573 1574 ReplaceNode(Node, Store); 1575 return; 1576 } 1577 case Intrinsic::riscv_vsm: 1578 case Intrinsic::riscv_vse: 1579 case Intrinsic::riscv_vse_mask: 1580 case Intrinsic::riscv_vsse: 1581 case Intrinsic::riscv_vsse_mask: { 1582 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 1583 IntNo == Intrinsic::riscv_vsse_mask; 1584 bool IsStrided = 1585 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 1586 1587 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1588 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1589 1590 unsigned CurOp = 2; 1591 SmallVector<SDValue, 8> Operands; 1592 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1593 1594 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1595 Operands); 1596 1597 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1598 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 1599 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 1600 MachineSDNode *Store = 1601 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1602 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1603 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1604 1605 ReplaceNode(Node, Store); 1606 return; 1607 } 1608 } 1609 break; 1610 } 1611 case ISD::BITCAST: { 1612 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 1613 // Just drop bitcasts between vectors if both are fixed or both are 1614 // scalable. 1615 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 1616 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 1617 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 1618 CurDAG->RemoveDeadNode(Node); 1619 return; 1620 } 1621 break; 1622 } 1623 case ISD::INSERT_SUBVECTOR: { 1624 SDValue V = Node->getOperand(0); 1625 SDValue SubV = Node->getOperand(1); 1626 SDLoc DL(SubV); 1627 auto Idx = Node->getConstantOperandVal(2); 1628 MVT SubVecVT = SubV.getSimpleValueType(); 1629 1630 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1631 MVT SubVecContainerVT = SubVecVT; 1632 // Establish the correct scalable-vector types for any fixed-length type. 1633 if (SubVecVT.isFixedLengthVector()) 1634 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 1635 if (VT.isFixedLengthVector()) 1636 VT = TLI.getContainerForFixedLengthVector(VT); 1637 1638 const auto *TRI = Subtarget->getRegisterInfo(); 1639 unsigned SubRegIdx; 1640 std::tie(SubRegIdx, Idx) = 1641 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1642 VT, SubVecContainerVT, Idx, TRI); 1643 1644 // If the Idx hasn't been completely eliminated then this is a subvector 1645 // insert which doesn't naturally align to a vector register. These must 1646 // be handled using instructions to manipulate the vector registers. 1647 if (Idx != 0) 1648 break; 1649 1650 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); 1651 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 1652 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 1653 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 1654 (void)IsSubVecPartReg; // Silence unused variable warning without asserts. 1655 assert((!IsSubVecPartReg || V.isUndef()) && 1656 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 1657 "the subvector is smaller than a full-sized register"); 1658 1659 // If we haven't set a SubRegIdx, then we must be going between 1660 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 1661 if (SubRegIdx == RISCV::NoSubRegister) { 1662 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT); 1663 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1664 InRegClassID && 1665 "Unexpected subvector extraction"); 1666 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1667 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 1668 DL, VT, SubV, RC); 1669 ReplaceNode(Node, NewNode); 1670 return; 1671 } 1672 1673 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 1674 ReplaceNode(Node, Insert.getNode()); 1675 return; 1676 } 1677 case ISD::EXTRACT_SUBVECTOR: { 1678 SDValue V = Node->getOperand(0); 1679 auto Idx = Node->getConstantOperandVal(1); 1680 MVT InVT = V.getSimpleValueType(); 1681 SDLoc DL(V); 1682 1683 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1684 MVT SubVecContainerVT = VT; 1685 // Establish the correct scalable-vector types for any fixed-length type. 1686 if (VT.isFixedLengthVector()) 1687 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 1688 if (InVT.isFixedLengthVector()) 1689 InVT = TLI.getContainerForFixedLengthVector(InVT); 1690 1691 const auto *TRI = Subtarget->getRegisterInfo(); 1692 unsigned SubRegIdx; 1693 std::tie(SubRegIdx, Idx) = 1694 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1695 InVT, SubVecContainerVT, Idx, TRI); 1696 1697 // If the Idx hasn't been completely eliminated then this is a subvector 1698 // extract which doesn't naturally align to a vector register. These must 1699 // be handled using instructions to manipulate the vector registers. 1700 if (Idx != 0) 1701 break; 1702 1703 // If we haven't set a SubRegIdx, then we must be going between 1704 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 1705 if (SubRegIdx == RISCV::NoSubRegister) { 1706 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 1707 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1708 InRegClassID && 1709 "Unexpected subvector extraction"); 1710 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1711 SDNode *NewNode = 1712 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 1713 ReplaceNode(Node, NewNode); 1714 return; 1715 } 1716 1717 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 1718 ReplaceNode(Node, Extract.getNode()); 1719 return; 1720 } 1721 case ISD::SPLAT_VECTOR: 1722 case RISCVISD::VMV_S_X_VL: 1723 case RISCVISD::VFMV_S_F_VL: 1724 case RISCVISD::VMV_V_X_VL: 1725 case RISCVISD::VFMV_V_F_VL: { 1726 // Try to match splat of a scalar load to a strided load with stride of x0. 1727 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || 1728 Node->getOpcode() == RISCVISD::VFMV_S_F_VL; 1729 bool HasPassthruOperand = Node->getOpcode() != ISD::SPLAT_VECTOR; 1730 if (HasPassthruOperand && !Node->getOperand(0).isUndef()) 1731 break; 1732 SDValue Src = HasPassthruOperand ? Node->getOperand(1) : Node->getOperand(0); 1733 auto *Ld = dyn_cast<LoadSDNode>(Src); 1734 if (!Ld) 1735 break; 1736 EVT MemVT = Ld->getMemoryVT(); 1737 // The memory VT should be the same size as the element type. 1738 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 1739 break; 1740 if (!IsProfitableToFold(Src, Node, Node) || 1741 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 1742 break; 1743 1744 SDValue VL; 1745 if (Node->getOpcode() == ISD::SPLAT_VECTOR) 1746 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); 1747 else if (IsScalarMove) { 1748 // We could deal with more VL if we update the VSETVLI insert pass to 1749 // avoid introducing more VSETVLI. 1750 if (!isOneConstant(Node->getOperand(2))) 1751 break; 1752 selectVLOp(Node->getOperand(2), VL); 1753 } else 1754 selectVLOp(Node->getOperand(2), VL); 1755 1756 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1757 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 1758 1759 SDValue Operands[] = {Ld->getBasePtr(), 1760 CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW, 1761 Ld->getChain()}; 1762 1763 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1764 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 1765 /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false, 1766 Log2SEW, static_cast<unsigned>(LMUL)); 1767 MachineSDNode *Load = 1768 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1769 1770 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()}); 1771 1772 ReplaceNode(Node, Load); 1773 return; 1774 } 1775 } 1776 1777 // Select the default instruction. 1778 SelectCode(Node); 1779 } 1780 1781 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 1782 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 1783 switch (ConstraintID) { 1784 case InlineAsm::Constraint_m: 1785 // We just support simple memory operands that have a single address 1786 // operand and need no special handling. 1787 OutOps.push_back(Op); 1788 return false; 1789 case InlineAsm::Constraint_A: 1790 OutOps.push_back(Op); 1791 return false; 1792 default: 1793 break; 1794 } 1795 1796 return true; 1797 } 1798 1799 // Select a frame index and an optional immediate offset from an ADD or OR. 1800 bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, 1801 SDValue &Offset) { 1802 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 1803 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1804 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT()); 1805 return true; 1806 } 1807 1808 if (!CurDAG->isBaseWithConstantOffset(Addr)) 1809 return false; 1810 1811 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { 1812 auto *CN = cast<ConstantSDNode>(Addr.getOperand(1)); 1813 if (isInt<12>(CN->getSExtValue())) { 1814 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), 1815 Subtarget->getXLenVT()); 1816 Offset = CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(Addr), 1817 Subtarget->getXLenVT()); 1818 return true; 1819 } 1820 } 1821 1822 return false; 1823 } 1824 1825 bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { 1826 // If this is FrameIndex, select it directly. Otherwise just let it get 1827 // selected to a register independently. 1828 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) 1829 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1830 else 1831 Base = Addr; 1832 return true; 1833 } 1834 1835 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, 1836 SDValue &Offset) { 1837 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1838 auto *CN = cast<ConstantSDNode>(Addr.getOperand(1)); 1839 if (isInt<12>(CN->getSExtValue())) { 1840 Offset = CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(Addr), 1841 Subtarget->getXLenVT()); 1842 return SelectBaseAddr(Addr.getOperand(0), Base); 1843 } 1844 } 1845 1846 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT()); 1847 return SelectBaseAddr(Addr, Base); 1848 } 1849 1850 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 1851 SDValue &ShAmt) { 1852 // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift 1853 // amount. If there is an AND on the shift amount, we can bypass it if it 1854 // doesn't affect any of those bits. 1855 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 1856 const APInt &AndMask = N->getConstantOperandAPInt(1); 1857 1858 // Since the max shift amount is a power of 2 we can subtract 1 to make a 1859 // mask that covers the bits needed to represent all shift amounts. 1860 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 1861 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 1862 1863 if (ShMask.isSubsetOf(AndMask)) { 1864 ShAmt = N.getOperand(0); 1865 return true; 1866 } 1867 1868 // SimplifyDemandedBits may have optimized the mask so try restoring any 1869 // bits that are known zero. 1870 KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0)); 1871 if (ShMask.isSubsetOf(AndMask | Known.Zero)) { 1872 ShAmt = N.getOperand(0); 1873 return true; 1874 } 1875 } else if (N.getOpcode() == ISD::SUB && 1876 isa<ConstantSDNode>(N.getOperand(0))) { 1877 uint64_t Imm = N.getConstantOperandVal(0); 1878 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 1879 // generate a NEG instead of a SUB of a constant. 1880 if (Imm != 0 && Imm % ShiftWidth == 0) { 1881 SDLoc DL(N); 1882 EVT VT = N.getValueType(); 1883 SDValue Zero = 1884 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT); 1885 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; 1886 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, 1887 N.getOperand(1)); 1888 ShAmt = SDValue(Neg, 0); 1889 return true; 1890 } 1891 } 1892 1893 ShAmt = N; 1894 return true; 1895 } 1896 1897 bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { 1898 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 1899 cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { 1900 Val = N.getOperand(0); 1901 return true; 1902 } 1903 MVT VT = N.getSimpleValueType(); 1904 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { 1905 Val = N; 1906 return true; 1907 } 1908 1909 return false; 1910 } 1911 1912 bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { 1913 if (N.getOpcode() == ISD::AND) { 1914 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1915 if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) { 1916 Val = N.getOperand(0); 1917 return true; 1918 } 1919 } 1920 MVT VT = N.getSimpleValueType(); 1921 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32); 1922 if (CurDAG->MaskedValueIsZero(N, Mask)) { 1923 Val = N; 1924 return true; 1925 } 1926 1927 return false; 1928 } 1929 1930 // Return true if all users of this SDNode* only consume the lower \p Bits. 1931 // This can be used to form W instructions for add/sub/mul/shl even when the 1932 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 1933 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 1934 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 1935 // the add/sub/mul/shl to become non-W instructions. By checking the users we 1936 // may be able to use a W instruction and CSE with the other instruction if 1937 // this has happened. We could try to detect that the CSE opportunity exists 1938 // before doing this, but that would be more complicated. 1939 // TODO: Does this need to look through AND/OR/XOR to their users to find more 1940 // opportunities. 1941 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { 1942 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 1943 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 1944 Node->getOpcode() == ISD::SRL || 1945 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 1946 Node->getOpcode() == RISCVISD::GREV || 1947 Node->getOpcode() == RISCVISD::GORC || 1948 isa<ConstantSDNode>(Node)) && 1949 "Unexpected opcode"); 1950 1951 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 1952 SDNode *User = *UI; 1953 // Users of this node should have already been instruction selected 1954 if (!User->isMachineOpcode()) 1955 return false; 1956 1957 // TODO: Add more opcodes? 1958 switch (User->getMachineOpcode()) { 1959 default: 1960 return false; 1961 case RISCV::ADDW: 1962 case RISCV::ADDIW: 1963 case RISCV::SUBW: 1964 case RISCV::MULW: 1965 case RISCV::SLLW: 1966 case RISCV::SLLIW: 1967 case RISCV::SRAW: 1968 case RISCV::SRAIW: 1969 case RISCV::SRLW: 1970 case RISCV::SRLIW: 1971 case RISCV::DIVW: 1972 case RISCV::DIVUW: 1973 case RISCV::REMW: 1974 case RISCV::REMUW: 1975 case RISCV::ROLW: 1976 case RISCV::RORW: 1977 case RISCV::RORIW: 1978 case RISCV::CLZW: 1979 case RISCV::CTZW: 1980 case RISCV::CPOPW: 1981 case RISCV::SLLI_UW: 1982 case RISCV::FMV_W_X: 1983 case RISCV::FCVT_H_W: 1984 case RISCV::FCVT_H_WU: 1985 case RISCV::FCVT_S_W: 1986 case RISCV::FCVT_S_WU: 1987 case RISCV::FCVT_D_W: 1988 case RISCV::FCVT_D_WU: 1989 if (Bits < 32) 1990 return false; 1991 break; 1992 case RISCV::SLLI: 1993 // SLLI only uses the lower (XLen - ShAmt) bits. 1994 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1)) 1995 return false; 1996 break; 1997 case RISCV::ANDI: 1998 if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1)))) 1999 return false; 2000 break; 2001 case RISCV::SEXT_B: 2002 if (Bits < 8) 2003 return false; 2004 break; 2005 case RISCV::SEXT_H: 2006 case RISCV::FMV_H_X: 2007 case RISCV::ZEXT_H_RV32: 2008 case RISCV::ZEXT_H_RV64: 2009 if (Bits < 16) 2010 return false; 2011 break; 2012 case RISCV::ADD_UW: 2013 case RISCV::SH1ADD_UW: 2014 case RISCV::SH2ADD_UW: 2015 case RISCV::SH3ADD_UW: 2016 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 2017 // 32 bits. 2018 if (UI.getOperandNo() != 0 || Bits < 32) 2019 return false; 2020 break; 2021 case RISCV::SB: 2022 if (UI.getOperandNo() != 0 || Bits < 8) 2023 return false; 2024 break; 2025 case RISCV::SH: 2026 if (UI.getOperandNo() != 0 || Bits < 16) 2027 return false; 2028 break; 2029 case RISCV::SW: 2030 if (UI.getOperandNo() != 0 || Bits < 32) 2031 return false; 2032 break; 2033 } 2034 } 2035 2036 return true; 2037 } 2038 2039 // Select VL as a 5 bit immediate or a value that will become a register. This 2040 // allows us to choose betwen VSETIVLI or VSETVLI later. 2041 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 2042 auto *C = dyn_cast<ConstantSDNode>(N); 2043 if (C && isUInt<5>(C->getZExtValue())) { 2044 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 2045 N->getValueType(0)); 2046 } else if (C && C->isAllOnesValue()) { 2047 // Treat all ones as VLMax. 2048 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 2049 N->getValueType(0)); 2050 } else if (isa<RegisterSDNode>(N) && 2051 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) { 2052 // All our VL operands use an operand that allows GPRNoX0 or an immediate 2053 // as the register class. Convert X0 to a special immediate to pass the 2054 // MachineVerifier. This is recognized specially by the vsetvli insertion 2055 // pass. 2056 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 2057 N->getValueType(0)); 2058 } else { 2059 VL = N; 2060 } 2061 2062 return true; 2063 } 2064 2065 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 2066 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef()) 2067 return false; 2068 SplatVal = N.getOperand(1); 2069 return true; 2070 } 2071 2072 using ValidateFn = bool (*)(int64_t); 2073 2074 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, 2075 SelectionDAG &DAG, 2076 const RISCVSubtarget &Subtarget, 2077 ValidateFn ValidateImm) { 2078 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 2079 !isa<ConstantSDNode>(N.getOperand(1))) 2080 return false; 2081 2082 int64_t SplatImm = 2083 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 2084 2085 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand 2086 // type is wider than the resulting vector element type: an implicit 2087 // truncation first takes place. Therefore, perform a manual 2088 // truncation/sign-extension in order to ignore any truncated bits and catch 2089 // any zero-extended immediate. 2090 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 2091 // sign-extending to (XLenVT -1). 2092 MVT XLenVT = Subtarget.getXLenVT(); 2093 assert(XLenVT == N.getOperand(1).getSimpleValueType() && 2094 "Unexpected splat operand type"); 2095 MVT EltVT = N.getSimpleValueType().getVectorElementType(); 2096 if (EltVT.bitsLT(XLenVT)) 2097 SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits()); 2098 2099 if (!ValidateImm(SplatImm)) 2100 return false; 2101 2102 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT); 2103 return true; 2104 } 2105 2106 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 2107 return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget, 2108 [](int64_t Imm) { return isInt<5>(Imm); }); 2109 } 2110 2111 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 2112 return selectVSplatSimmHelper( 2113 N, SplatVal, *CurDAG, *Subtarget, 2114 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); 2115 } 2116 2117 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 2118 SDValue &SplatVal) { 2119 return selectVSplatSimmHelper( 2120 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { 2121 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 2122 }); 2123 } 2124 2125 bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { 2126 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 2127 !isa<ConstantSDNode>(N.getOperand(1))) 2128 return false; 2129 2130 int64_t SplatImm = 2131 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 2132 2133 if (!isUInt<5>(SplatImm)) 2134 return false; 2135 2136 SplatVal = 2137 CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT()); 2138 2139 return true; 2140 } 2141 2142 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 2143 SDValue &Imm) { 2144 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 2145 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 2146 2147 if (!isInt<5>(ImmVal)) 2148 return false; 2149 2150 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); 2151 return true; 2152 } 2153 2154 return false; 2155 } 2156 2157 // Merge an ADDI into the offset of a load/store instruction where possible. 2158 // (load (addi base, off1), off2) -> (load base, off1+off2) 2159 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) 2160 // (load (add base, (addi src, off1)), off2) 2161 // -> (load (add base, src), off1+off2) 2162 // (store val, (add base, (addi src, off1)), off2) 2163 // -> (store val, (add base, src), off1+off2) 2164 // This is possible when off1+off2 fits a 12-bit immediate. 2165 bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { 2166 unsigned OffsetOpIdx, BaseOpIdx; 2167 if (!hasMemOffset(N, BaseOpIdx, OffsetOpIdx)) 2168 return false; 2169 2170 if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx))) 2171 return false; 2172 2173 SDValue Base = N->getOperand(BaseOpIdx); 2174 2175 if (!Base.isMachineOpcode()) 2176 return false; 2177 2178 if (Base.getMachineOpcode() == RISCV::ADDI) { 2179 // If the base is an ADDI, we can merge it in to the load/store. 2180 } else if (Base.getMachineOpcode() == RISCV::ADDIW && 2181 isa<ConstantSDNode>(Base.getOperand(1)) && 2182 Base.getOperand(0).isMachineOpcode() && 2183 Base.getOperand(0).getMachineOpcode() == RISCV::LUI && 2184 isa<ConstantSDNode>(Base.getOperand(0).getOperand(0))) { 2185 // ADDIW can be merged if it's part of LUI+ADDIW constant materialization 2186 // and LUI+ADDI would have produced the same result. This is true for all 2187 // simm32 values except 0x7ffff800-0x7fffffff. 2188 int64_t Offset = 2189 SignExtend64<32>(Base.getOperand(0).getConstantOperandVal(0) << 12); 2190 Offset += cast<ConstantSDNode>(Base.getOperand(1))->getSExtValue(); 2191 if (!isInt<32>(Offset)) 2192 return false; 2193 } else 2194 return false; 2195 2196 SDValue ImmOperand = Base.getOperand(1); 2197 uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); 2198 2199 if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) { 2200 int64_t Offset1 = Const->getSExtValue(); 2201 int64_t CombinedOffset = Offset1 + Offset2; 2202 if (!isInt<12>(CombinedOffset)) 2203 return false; 2204 ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand), 2205 ImmOperand.getValueType()); 2206 } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) { 2207 // If the off1 in (addi base, off1) is a global variable's address (its 2208 // low part, really), then we can rely on the alignment of that variable 2209 // to provide a margin of safety before off1 can overflow the 12 bits. 2210 // Check if off2 falls within that margin; if so off1+off2 can't overflow. 2211 const DataLayout &DL = CurDAG->getDataLayout(); 2212 Align Alignment = GA->getGlobal()->getPointerAlignment(DL); 2213 if (Offset2 != 0 && Alignment <= Offset2) 2214 return false; 2215 int64_t Offset1 = GA->getOffset(); 2216 int64_t CombinedOffset = Offset1 + Offset2; 2217 ImmOperand = CurDAG->getTargetGlobalAddress( 2218 GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), 2219 CombinedOffset, GA->getTargetFlags()); 2220 } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) { 2221 // Ditto. 2222 Align Alignment = CP->getAlign(); 2223 if (Offset2 != 0 && Alignment <= Offset2) 2224 return false; 2225 int64_t Offset1 = CP->getOffset(); 2226 int64_t CombinedOffset = Offset1 + Offset2; 2227 ImmOperand = CurDAG->getTargetConstantPool( 2228 CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(), 2229 CombinedOffset, CP->getTargetFlags()); 2230 } else { 2231 return false; 2232 } 2233 2234 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 2235 LLVM_DEBUG(Base->dump(CurDAG)); 2236 LLVM_DEBUG(dbgs() << "\nN: "); 2237 LLVM_DEBUG(N->dump(CurDAG)); 2238 LLVM_DEBUG(dbgs() << "\n"); 2239 2240 // Modify the offset operand of the load/store. 2241 if (BaseOpIdx == 0) { // Load 2242 N = CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, 2243 N->getOperand(2)); 2244 } else { // Store 2245 N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), 2246 ImmOperand, N->getOperand(3)); 2247 } 2248 2249 return true; 2250 } 2251 2252 // Try to remove sext.w if the input is a W instruction or can be made into 2253 // a W instruction cheaply. 2254 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 2255 // Look for the sext.w pattern, addiw rd, rs1, 0. 2256 if (N->getMachineOpcode() != RISCV::ADDIW || 2257 !isNullConstant(N->getOperand(1))) 2258 return false; 2259 2260 SDValue N0 = N->getOperand(0); 2261 if (!N0.isMachineOpcode()) 2262 return false; 2263 2264 switch (N0.getMachineOpcode()) { 2265 default: 2266 break; 2267 case RISCV::ADD: 2268 case RISCV::ADDI: 2269 case RISCV::SUB: 2270 case RISCV::MUL: 2271 case RISCV::SLLI: { 2272 // Convert sext.w+add/sub/mul to their W instructions. This will create 2273 // a new independent instruction. This improves latency. 2274 unsigned Opc; 2275 switch (N0.getMachineOpcode()) { 2276 default: 2277 llvm_unreachable("Unexpected opcode!"); 2278 case RISCV::ADD: Opc = RISCV::ADDW; break; 2279 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 2280 case RISCV::SUB: Opc = RISCV::SUBW; break; 2281 case RISCV::MUL: Opc = RISCV::MULW; break; 2282 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 2283 } 2284 2285 SDValue N00 = N0.getOperand(0); 2286 SDValue N01 = N0.getOperand(1); 2287 2288 // Shift amount needs to be uimm5. 2289 if (N0.getMachineOpcode() == RISCV::SLLI && 2290 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 2291 break; 2292 2293 SDNode *Result = 2294 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 2295 N00, N01); 2296 ReplaceUses(N, Result); 2297 return true; 2298 } 2299 case RISCV::ADDW: 2300 case RISCV::ADDIW: 2301 case RISCV::SUBW: 2302 case RISCV::MULW: 2303 case RISCV::SLLIW: 2304 case RISCV::GREVIW: 2305 case RISCV::GORCIW: 2306 // Result is already sign extended just remove the sext.w. 2307 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 2308 ReplaceUses(N, N0.getNode()); 2309 return true; 2310 } 2311 2312 return false; 2313 } 2314 2315 // Optimize masked RVV pseudo instructions with a known all-ones mask to their 2316 // corresponding "unmasked" pseudo versions. The mask we're interested in will 2317 // take the form of a V0 physical register operand, with a glued 2318 // register-setting instruction. 2319 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { 2320 const RISCV::RISCVMaskedPseudoInfo *I = 2321 RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); 2322 if (!I) 2323 return false; 2324 2325 unsigned MaskOpIdx = I->MaskOpIdx; 2326 2327 // Check that we're using V0 as a mask register. 2328 if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) || 2329 cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0) 2330 return false; 2331 2332 // The glued user defines V0. 2333 const auto *Glued = N->getGluedNode(); 2334 2335 if (!Glued || Glued->getOpcode() != ISD::CopyToReg) 2336 return false; 2337 2338 // Check that we're defining V0 as a mask register. 2339 if (!isa<RegisterSDNode>(Glued->getOperand(1)) || 2340 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0) 2341 return false; 2342 2343 // Check the instruction defining V0; it needs to be a VMSET pseudo. 2344 SDValue MaskSetter = Glued->getOperand(2); 2345 2346 const auto IsVMSet = [](unsigned Opc) { 2347 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || 2348 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || 2349 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || 2350 Opc == RISCV::PseudoVMSET_M_B8; 2351 }; 2352 2353 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has 2354 // undefined behaviour if it's the wrong bitwidth, so we could choose to 2355 // assume that it's all-ones? Same applies to its VL. 2356 if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode())) 2357 return false; 2358 2359 // Retrieve the tail policy operand index, if any. 2360 Optional<unsigned> TailPolicyOpIdx; 2361 const RISCVInstrInfo &TII = *Subtarget->getInstrInfo(); 2362 const MCInstrDesc &MaskedMCID = TII.get(N->getMachineOpcode()); 2363 2364 bool IsTA = true; 2365 if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) { 2366 // The last operand of the pseudo is the policy op, but we might have a 2367 // Glue operand last. We might also have a chain. 2368 TailPolicyOpIdx = N->getNumOperands() - 1; 2369 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Glue) 2370 (*TailPolicyOpIdx)--; 2371 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Other) 2372 (*TailPolicyOpIdx)--; 2373 2374 if (!(N->getConstantOperandVal(*TailPolicyOpIdx) & 2375 RISCVII::TAIL_AGNOSTIC)) { 2376 // Keep the true-masked instruction when there is no unmasked TU 2377 // instruction 2378 if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef()) 2379 return false; 2380 // We can't use TA if the tie-operand is not IMPLICIT_DEF 2381 if (!N->getOperand(0).isUndef()) 2382 IsTA = false; 2383 } 2384 } 2385 2386 if (IsTA) { 2387 uint64_t TSFlags = TII.get(I->UnmaskedPseudo).TSFlags; 2388 2389 // Check that we're dropping the merge operand, the mask operand, and any 2390 // policy operand when we transform to this unmasked pseudo. 2391 assert(!RISCVII::hasMergeOp(TSFlags) && RISCVII::hasDummyMaskOp(TSFlags) && 2392 !RISCVII::hasVecPolicyOp(TSFlags) && 2393 "Unexpected pseudo to transform to"); 2394 (void)TSFlags; 2395 } else { 2396 uint64_t TSFlags = TII.get(I->UnmaskedTUPseudo).TSFlags; 2397 2398 // Check that we're dropping the mask operand, and any policy operand 2399 // when we transform to this unmasked tu pseudo. 2400 assert(RISCVII::hasMergeOp(TSFlags) && RISCVII::hasDummyMaskOp(TSFlags) && 2401 !RISCVII::hasVecPolicyOp(TSFlags) && 2402 "Unexpected pseudo to transform to"); 2403 (void)TSFlags; 2404 } 2405 2406 unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo; 2407 SmallVector<SDValue, 8> Ops; 2408 // Skip the merge operand at index 0 if IsTA 2409 for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) { 2410 // Skip the mask, the policy, and the Glue. 2411 SDValue Op = N->getOperand(I); 2412 if (I == MaskOpIdx || I == TailPolicyOpIdx || 2413 Op.getValueType() == MVT::Glue) 2414 continue; 2415 Ops.push_back(Op); 2416 } 2417 2418 // Transitively apply any node glued to our new node. 2419 if (auto *TGlued = Glued->getGluedNode()) 2420 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); 2421 2422 SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 2423 ReplaceUses(N, Result); 2424 2425 return true; 2426 } 2427 2428 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 2429 // for instruction scheduling. 2430 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, 2431 CodeGenOpt::Level OptLevel) { 2432 return new RISCVDAGToDAGISel(TM, OptLevel); 2433 } 2434