1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISCV target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVMCTargetDesc.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCVISelLowering.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/IR/IntrinsicsRISCV.h" 20 #include "llvm/Support/Alignment.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/KnownBits.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "riscv-isel" 29 30 namespace llvm { 31 namespace RISCV { 32 #define GET_RISCVVSSEGTable_IMPL 33 #define GET_RISCVVLSEGTable_IMPL 34 #define GET_RISCVVLXSEGTable_IMPL 35 #define GET_RISCVVSXSEGTable_IMPL 36 #define GET_RISCVVLETable_IMPL 37 #define GET_RISCVVSETable_IMPL 38 #define GET_RISCVVLXTable_IMPL 39 #define GET_RISCVVSXTable_IMPL 40 #define GET_RISCVMaskedPseudosTable_IMPL 41 #include "RISCVGenSearchableTables.inc" 42 } // namespace RISCV 43 } // namespace llvm 44 45 void RISCVDAGToDAGISel::PreprocessISelDAG() { 46 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 47 E = CurDAG->allnodes_end(); 48 I != E;) { 49 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 50 51 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point 52 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. 53 if (N->getOpcode() == ISD::SPLAT_VECTOR) { 54 MVT VT = N->getSimpleValueType(0); 55 unsigned Opc = 56 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; 57 SDLoc DL(N); 58 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); 59 SDValue Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), 60 N->getOperand(0), VL); 61 62 --I; 63 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 64 ++I; 65 CurDAG->DeleteNode(N); 66 continue; 67 } 68 69 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 70 // load. Done after lowering and combining so that we have a chance to 71 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 72 if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) 73 continue; 74 75 assert(N->getNumOperands() == 4 && "Unexpected number of operands"); 76 MVT VT = N->getSimpleValueType(0); 77 SDValue Passthru = N->getOperand(0); 78 SDValue Lo = N->getOperand(1); 79 SDValue Hi = N->getOperand(2); 80 SDValue VL = N->getOperand(3); 81 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 82 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 83 "Unexpected VTs!"); 84 MachineFunction &MF = CurDAG->getMachineFunction(); 85 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 86 SDLoc DL(N); 87 88 // We use the same frame index we use for moving two i32s into 64-bit FPR. 89 // This is an analogous operation. 90 int FI = FuncInfo->getMoveF64FrameIndex(MF); 91 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 92 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); 93 SDValue StackSlot = 94 CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); 95 96 SDValue Chain = CurDAG->getEntryNode(); 97 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 98 99 SDValue OffsetSlot = 100 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); 101 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 102 Align(8)); 103 104 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 105 106 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 107 SDValue IntID = 108 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 109 SDValue Ops[] = {Chain, 110 IntID, 111 Passthru, 112 StackSlot, 113 CurDAG->getRegister(RISCV::X0, MVT::i64), 114 VL}; 115 116 SDValue Result = CurDAG->getMemIntrinsicNode( 117 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8), 118 MachineMemOperand::MOLoad); 119 120 // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the 121 // vlse we created. This will cause general havok on the dag because 122 // anything below the conversion could be folded into other existing nodes. 123 // To avoid invalidating 'I', back it up to the convert node. 124 --I; 125 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 126 127 // Now that we did that, the node is dead. Increment the iterator to the 128 // next node to process, then delete N. 129 ++I; 130 CurDAG->DeleteNode(N); 131 } 132 } 133 134 void RISCVDAGToDAGISel::PostprocessISelDAG() { 135 HandleSDNode Dummy(CurDAG->getRoot()); 136 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 137 138 bool MadeChange = false; 139 while (Position != CurDAG->allnodes_begin()) { 140 SDNode *N = &*--Position; 141 // Skip dead nodes and any non-machine opcodes. 142 if (N->use_empty() || !N->isMachineOpcode()) 143 continue; 144 145 MadeChange |= doPeepholeSExtW(N); 146 MadeChange |= doPeepholeLoadStoreADDI(N); 147 MadeChange |= doPeepholeMaskedRVV(N); 148 } 149 150 CurDAG->setRoot(Dummy.getValue()); 151 152 if (MadeChange) 153 CurDAG->RemoveDeadNodes(); 154 } 155 156 // Returns true if N is a MachineSDNode that has a reg and simm12 memory 157 // operand. The indices of the base pointer and offset are returned in BaseOpIdx 158 // and OffsetOpIdx. 159 static bool hasMemOffset(SDNode *N, unsigned &BaseOpIdx, 160 unsigned &OffsetOpIdx) { 161 switch (N->getMachineOpcode()) { 162 case RISCV::LB: 163 case RISCV::LH: 164 case RISCV::LW: 165 case RISCV::LBU: 166 case RISCV::LHU: 167 case RISCV::LWU: 168 case RISCV::LD: 169 case RISCV::FLH: 170 case RISCV::FLW: 171 case RISCV::FLD: 172 BaseOpIdx = 0; 173 OffsetOpIdx = 1; 174 return true; 175 case RISCV::SB: 176 case RISCV::SH: 177 case RISCV::SW: 178 case RISCV::SD: 179 case RISCV::FSH: 180 case RISCV::FSW: 181 case RISCV::FSD: 182 BaseOpIdx = 1; 183 OffsetOpIdx = 2; 184 return true; 185 } 186 187 return false; 188 } 189 190 static SDNode *selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 191 RISCVMatInt::InstSeq &Seq) { 192 SDNode *Result = nullptr; 193 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT); 194 for (RISCVMatInt::Inst &Inst : Seq) { 195 SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, VT); 196 switch (Inst.getOpndKind()) { 197 case RISCVMatInt::Imm: 198 Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SDImm); 199 break; 200 case RISCVMatInt::RegX0: 201 Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, 202 CurDAG->getRegister(RISCV::X0, VT)); 203 break; 204 case RISCVMatInt::RegReg: 205 Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SrcReg); 206 break; 207 case RISCVMatInt::RegImm: 208 Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SDImm); 209 break; 210 } 211 212 // Only the first instruction has X0 as its source. 213 SrcReg = SDValue(Result, 0); 214 } 215 216 return Result; 217 } 218 219 static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 220 int64_t Imm, const RISCVSubtarget &Subtarget) { 221 RISCVMatInt::InstSeq Seq = 222 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); 223 224 return selectImmSeq(CurDAG, DL, VT, Seq); 225 } 226 227 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 228 unsigned NF, RISCVII::VLMUL LMUL) { 229 static const unsigned M1TupleRegClassIDs[] = { 230 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, 231 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, 232 RISCV::VRN8M1RegClassID}; 233 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID, 234 RISCV::VRN3M2RegClassID, 235 RISCV::VRN4M2RegClassID}; 236 237 assert(Regs.size() >= 2 && Regs.size() <= 8); 238 239 unsigned RegClassID; 240 unsigned SubReg0; 241 switch (LMUL) { 242 default: 243 llvm_unreachable("Invalid LMUL."); 244 case RISCVII::VLMUL::LMUL_F8: 245 case RISCVII::VLMUL::LMUL_F4: 246 case RISCVII::VLMUL::LMUL_F2: 247 case RISCVII::VLMUL::LMUL_1: 248 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 249 "Unexpected subreg numbering"); 250 SubReg0 = RISCV::sub_vrm1_0; 251 RegClassID = M1TupleRegClassIDs[NF - 2]; 252 break; 253 case RISCVII::VLMUL::LMUL_2: 254 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 255 "Unexpected subreg numbering"); 256 SubReg0 = RISCV::sub_vrm2_0; 257 RegClassID = M2TupleRegClassIDs[NF - 2]; 258 break; 259 case RISCVII::VLMUL::LMUL_4: 260 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 261 "Unexpected subreg numbering"); 262 SubReg0 = RISCV::sub_vrm4_0; 263 RegClassID = RISCV::VRN2M4RegClassID; 264 break; 265 } 266 267 SDLoc DL(Regs[0]); 268 SmallVector<SDValue, 8> Ops; 269 270 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); 271 272 for (unsigned I = 0; I < Regs.size(); ++I) { 273 Ops.push_back(Regs[I]); 274 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); 275 } 276 SDNode *N = 277 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 278 return SDValue(N, 0); 279 } 280 281 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 282 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 283 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 284 bool IsLoad, MVT *IndexVT) { 285 SDValue Chain = Node->getOperand(0); 286 SDValue Glue; 287 288 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer. 289 290 if (IsStridedOrIndexed) { 291 Operands.push_back(Node->getOperand(CurOp++)); // Index. 292 if (IndexVT) 293 *IndexVT = Operands.back()->getSimpleValueType(0); 294 } 295 296 if (IsMasked) { 297 // Mask needs to be copied to V0. 298 SDValue Mask = Node->getOperand(CurOp++); 299 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); 300 Glue = Chain.getValue(1); 301 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); 302 } 303 SDValue VL; 304 selectVLOp(Node->getOperand(CurOp++), VL); 305 Operands.push_back(VL); 306 307 MVT XLenVT = Subtarget->getXLenVT(); 308 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 309 Operands.push_back(SEWOp); 310 311 // Masked load has the tail policy argument. 312 if (IsMasked && IsLoad) { 313 // Policy must be a constant. 314 uint64_t Policy = Node->getConstantOperandVal(CurOp++); 315 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 316 Operands.push_back(PolicyOp); 317 } 318 319 Operands.push_back(Chain); // Chain. 320 if (Glue) 321 Operands.push_back(Glue); 322 } 323 324 static bool isAllUndef(ArrayRef<SDValue> Values) { 325 return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); }); 326 } 327 328 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, 329 bool IsStrided) { 330 SDLoc DL(Node); 331 unsigned NF = Node->getNumValues() - 1; 332 MVT VT = Node->getSimpleValueType(0); 333 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 334 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 335 336 unsigned CurOp = 2; 337 SmallVector<SDValue, 8> Operands; 338 339 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 340 Node->op_begin() + CurOp + NF); 341 bool IsTU = IsMasked || !isAllUndef(Regs); 342 if (IsTU) { 343 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL); 344 Operands.push_back(Merge); 345 } 346 CurOp += NF; 347 348 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 349 Operands, /*IsLoad=*/true); 350 351 const RISCV::VLSEGPseudo *P = 352 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 353 static_cast<unsigned>(LMUL)); 354 MachineSDNode *Load = 355 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 356 357 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 358 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 359 360 SDValue SuperReg = SDValue(Load, 0); 361 for (unsigned I = 0; I < NF; ++I) { 362 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 363 ReplaceUses(SDValue(Node, I), 364 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 365 } 366 367 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 368 CurDAG->RemoveDeadNode(Node); 369 } 370 371 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { 372 SDLoc DL(Node); 373 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. 374 MVT VT = Node->getSimpleValueType(0); 375 MVT XLenVT = Subtarget->getXLenVT(); 376 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 377 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 378 379 unsigned CurOp = 2; 380 SmallVector<SDValue, 7> Operands; 381 382 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 383 Node->op_begin() + CurOp + NF); 384 bool IsTU = IsMasked || !isAllUndef(Regs); 385 if (IsTU) { 386 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 387 Operands.push_back(MaskedOff); 388 } 389 CurOp += NF; 390 391 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 392 /*IsStridedOrIndexed*/ false, Operands, 393 /*IsLoad=*/true); 394 395 const RISCV::VLSEGPseudo *P = 396 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 397 Log2SEW, static_cast<unsigned>(LMUL)); 398 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 399 XLenVT, MVT::Other, Operands); 400 401 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 402 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 403 404 SDValue SuperReg = SDValue(Load, 0); 405 for (unsigned I = 0; I < NF; ++I) { 406 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 407 ReplaceUses(SDValue(Node, I), 408 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 409 } 410 411 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL 412 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain 413 CurDAG->RemoveDeadNode(Node); 414 } 415 416 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, 417 bool IsOrdered) { 418 SDLoc DL(Node); 419 unsigned NF = Node->getNumValues() - 1; 420 MVT VT = Node->getSimpleValueType(0); 421 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 422 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 423 424 unsigned CurOp = 2; 425 SmallVector<SDValue, 8> Operands; 426 427 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 428 Node->op_begin() + CurOp + NF); 429 bool IsTU = IsMasked || !isAllUndef(Regs); 430 if (IsTU) { 431 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 432 Operands.push_back(MaskedOff); 433 } 434 CurOp += NF; 435 436 MVT IndexVT; 437 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 438 /*IsStridedOrIndexed*/ true, Operands, 439 /*IsLoad=*/true, &IndexVT); 440 441 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 442 "Element count mismatch"); 443 444 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 445 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 446 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 447 report_fatal_error("The V extension does not support EEW=64 for index " 448 "values when XLEN=32"); 449 } 450 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 451 NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 452 static_cast<unsigned>(IndexLMUL)); 453 MachineSDNode *Load = 454 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 455 456 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 457 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 458 459 SDValue SuperReg = SDValue(Load, 0); 460 for (unsigned I = 0; I < NF; ++I) { 461 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 462 ReplaceUses(SDValue(Node, I), 463 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 464 } 465 466 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 467 CurDAG->RemoveDeadNode(Node); 468 } 469 470 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, 471 bool IsStrided) { 472 SDLoc DL(Node); 473 unsigned NF = Node->getNumOperands() - 4; 474 if (IsStrided) 475 NF--; 476 if (IsMasked) 477 NF--; 478 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 479 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 480 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 481 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 482 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 483 484 SmallVector<SDValue, 8> Operands; 485 Operands.push_back(StoreVal); 486 unsigned CurOp = 2 + NF; 487 488 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 489 Operands); 490 491 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 492 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 493 MachineSDNode *Store = 494 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 495 496 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 497 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 498 499 ReplaceNode(Node, Store); 500 } 501 502 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, 503 bool IsOrdered) { 504 SDLoc DL(Node); 505 unsigned NF = Node->getNumOperands() - 5; 506 if (IsMasked) 507 --NF; 508 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 509 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 510 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 511 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 512 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 513 514 SmallVector<SDValue, 8> Operands; 515 Operands.push_back(StoreVal); 516 unsigned CurOp = 2 + NF; 517 518 MVT IndexVT; 519 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 520 /*IsStridedOrIndexed*/ true, Operands, 521 /*IsLoad=*/false, &IndexVT); 522 523 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 524 "Element count mismatch"); 525 526 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 527 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 528 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 529 report_fatal_error("The V extension does not support EEW=64 for index " 530 "values when XLEN=32"); 531 } 532 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 533 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 534 static_cast<unsigned>(IndexLMUL)); 535 MachineSDNode *Store = 536 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 537 538 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 539 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 540 541 ReplaceNode(Node, Store); 542 } 543 544 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { 545 if (!Subtarget->hasVInstructions()) 546 return; 547 548 assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN || 549 Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) && 550 "Unexpected opcode"); 551 552 SDLoc DL(Node); 553 MVT XLenVT = Subtarget->getXLenVT(); 554 555 bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN; 556 unsigned IntNoOffset = HasChain ? 1 : 0; 557 unsigned IntNo = Node->getConstantOperandVal(IntNoOffset); 558 559 assert((IntNo == Intrinsic::riscv_vsetvli || 560 IntNo == Intrinsic::riscv_vsetvlimax || 561 IntNo == Intrinsic::riscv_vsetvli_opt || 562 IntNo == Intrinsic::riscv_vsetvlimax_opt) && 563 "Unexpected vsetvli intrinsic"); 564 565 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax || 566 IntNo == Intrinsic::riscv_vsetvlimax_opt; 567 unsigned Offset = IntNoOffset + (VLMax ? 1 : 2); 568 569 assert(Node->getNumOperands() == Offset + 2 && 570 "Unexpected number of operands"); 571 572 unsigned SEW = 573 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 574 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( 575 Node->getConstantOperandVal(Offset + 1) & 0x7); 576 577 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true, 578 /*MaskAgnostic*/ false); 579 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 580 581 SmallVector<EVT, 2> VTs = {XLenVT}; 582 if (HasChain) 583 VTs.push_back(MVT::Other); 584 585 SDValue VLOperand; 586 unsigned Opcode = RISCV::PseudoVSETVLI; 587 if (VLMax) { 588 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 589 Opcode = RISCV::PseudoVSETVLIX0; 590 } else { 591 VLOperand = Node->getOperand(IntNoOffset + 1); 592 593 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 594 uint64_t AVL = C->getZExtValue(); 595 if (isUInt<5>(AVL)) { 596 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 597 SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp}; 598 if (HasChain) 599 Ops.push_back(Node->getOperand(0)); 600 ReplaceNode( 601 Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops)); 602 return; 603 } 604 } 605 } 606 607 SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp}; 608 if (HasChain) 609 Ops.push_back(Node->getOperand(0)); 610 611 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops)); 612 } 613 614 void RISCVDAGToDAGISel::Select(SDNode *Node) { 615 // If we have a custom node, we have already selected. 616 if (Node->isMachineOpcode()) { 617 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 618 Node->setNodeId(-1); 619 return; 620 } 621 622 // Instruction Selection not handled by the auto-generated tablegen selection 623 // should be handled here. 624 unsigned Opcode = Node->getOpcode(); 625 MVT XLenVT = Subtarget->getXLenVT(); 626 SDLoc DL(Node); 627 MVT VT = Node->getSimpleValueType(0); 628 629 switch (Opcode) { 630 case ISD::Constant: { 631 auto *ConstNode = cast<ConstantSDNode>(Node); 632 if (VT == XLenVT && ConstNode->isZero()) { 633 SDValue New = 634 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT); 635 ReplaceNode(Node, New.getNode()); 636 return; 637 } 638 int64_t Imm = ConstNode->getSExtValue(); 639 // If the upper XLen-16 bits are not used, try to convert this to a simm12 640 // by sign extending bit 15. 641 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) && 642 hasAllHUsers(Node)) 643 Imm = SignExtend64<16>(Imm); 644 // If the upper 32-bits are not used try to convert this into a simm32 by 645 // sign extending bit 32. 646 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 647 Imm = SignExtend64<32>(Imm); 648 649 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); 650 return; 651 } 652 case ISD::ADD: { 653 // Try to select ADD + immediate used as memory addresses to 654 // (ADDI (ADD X, Imm-Lo12), Lo12) if it will allow the ADDI to be removed by 655 // doPeepholeLoadStoreADDI. 656 657 // LHS should be an immediate. 658 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 659 if (!N1C) 660 break; 661 662 int64_t Offset = N1C->getSExtValue(); 663 int64_t Lo12 = SignExtend64<12>(Offset); 664 665 // Don't do this if the lower 12 bits are 0 or we could use ADDI directly. 666 if (Lo12 == 0 || isInt<12>(Offset)) 667 break; 668 669 // Don't do this if we can use a pair of ADDIs. 670 if (isInt<12>(Offset / 2) && isInt<12>(Offset - Offset / 2)) 671 break; 672 673 RISCVMatInt::InstSeq Seq = 674 RISCVMatInt::generateInstSeq(Offset, Subtarget->getFeatureBits()); 675 676 Offset -= Lo12; 677 // Restore sign bits for RV32. 678 if (!Subtarget->is64Bit()) 679 Offset = SignExtend64<32>(Offset); 680 681 // We can fold if the last operation is an ADDI or its an ADDIW that could 682 // be treated as an ADDI. 683 if (Seq.back().Opc != RISCV::ADDI && 684 !(Seq.back().Opc == RISCV::ADDIW && isInt<32>(Offset))) 685 break; 686 assert(Seq.back().Imm == Lo12 && "Expected immediate to match Lo12"); 687 // Drop the last operation. 688 Seq.pop_back(); 689 assert(!Seq.empty() && "Expected more instructions in sequence"); 690 691 bool AllPointerUses = true; 692 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 693 SDNode *User = *UI; 694 695 // Is this user a memory instruction that uses a register and immediate 696 // that has this ADD as its pointer. 697 unsigned BaseOpIdx, OffsetOpIdx; 698 if (!User->isMachineOpcode() || 699 !hasMemOffset(User, BaseOpIdx, OffsetOpIdx) || 700 UI.getOperandNo() != BaseOpIdx) { 701 AllPointerUses = false; 702 break; 703 } 704 705 // If the memory instruction already has an offset, make sure the combined 706 // offset is foldable. 707 int64_t MemOffs = 708 cast<ConstantSDNode>(User->getOperand(OffsetOpIdx))->getSExtValue(); 709 MemOffs += Lo12; 710 if (!isInt<12>(MemOffs)) { 711 AllPointerUses = false; 712 break; 713 } 714 } 715 716 if (!AllPointerUses) 717 break; 718 719 // Emit (ADDI (ADD X, Hi), Lo) 720 SDNode *Imm = selectImmSeq(CurDAG, DL, VT, Seq); 721 SDNode *ADD = CurDAG->getMachineNode(RISCV::ADD, DL, VT, 722 Node->getOperand(0), SDValue(Imm, 0)); 723 SDNode *ADDI = 724 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, SDValue(ADD, 0), 725 CurDAG->getTargetConstant(Lo12, DL, VT)); 726 ReplaceNode(Node, ADDI); 727 return; 728 } 729 case ISD::SHL: { 730 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 731 if (!N1C) 732 break; 733 SDValue N0 = Node->getOperand(0); 734 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 735 !isa<ConstantSDNode>(N0.getOperand(1))) 736 break; 737 unsigned ShAmt = N1C->getZExtValue(); 738 uint64_t Mask = N0.getConstantOperandVal(1); 739 740 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has 741 // 32 leading zeros and C3 trailing zeros. 742 if (ShAmt <= 32 && isShiftedMask_64(Mask)) { 743 unsigned XLen = Subtarget->getXLen(); 744 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask)); 745 unsigned TrailingZeros = countTrailingZeros(Mask); 746 if (TrailingZeros > 0 && LeadingZeros == 32) { 747 SDNode *SRLIW = CurDAG->getMachineNode( 748 RISCV::SRLIW, DL, VT, N0->getOperand(0), 749 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 750 SDNode *SLLI = CurDAG->getMachineNode( 751 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 752 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT)); 753 ReplaceNode(Node, SLLI); 754 return; 755 } 756 } 757 break; 758 } 759 case ISD::SRL: { 760 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 761 if (!N1C) 762 break; 763 SDValue N0 = Node->getOperand(0); 764 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 765 !isa<ConstantSDNode>(N0.getOperand(1))) 766 break; 767 unsigned ShAmt = N1C->getZExtValue(); 768 uint64_t Mask = N0.getConstantOperandVal(1); 769 770 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has 771 // 32 leading zeros and C3 trailing zeros. 772 if (isShiftedMask_64(Mask)) { 773 unsigned XLen = Subtarget->getXLen(); 774 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask)); 775 unsigned TrailingZeros = countTrailingZeros(Mask); 776 if (LeadingZeros == 32 && TrailingZeros > ShAmt) { 777 SDNode *SRLIW = CurDAG->getMachineNode( 778 RISCV::SRLIW, DL, VT, N0->getOperand(0), 779 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 780 SDNode *SLLI = CurDAG->getMachineNode( 781 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 782 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT)); 783 ReplaceNode(Node, SLLI); 784 return; 785 } 786 } 787 788 // Optimize (srl (and X, C2), C) -> 789 // (srli (slli X, (XLen-C3), (XLen-C3) + C) 790 // Where C2 is a mask with C3 trailing ones. 791 // Taking into account that the C2 may have had lower bits unset by 792 // SimplifyDemandedBits. This avoids materializing the C2 immediate. 793 // This pattern occurs when type legalizing right shifts for types with 794 // less than XLen bits. 795 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 796 if (!isMask_64(Mask)) 797 break; 798 unsigned TrailingOnes = countTrailingOnes(Mask); 799 // 32 trailing ones should use srliw via tablegen pattern. 800 if (TrailingOnes == 32 || ShAmt >= TrailingOnes) 801 break; 802 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; 803 SDNode *SLLI = 804 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 805 CurDAG->getTargetConstant(LShAmt, DL, VT)); 806 SDNode *SRLI = CurDAG->getMachineNode( 807 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 808 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 809 ReplaceNode(Node, SRLI); 810 return; 811 } 812 case ISD::SRA: { 813 // Optimize (sra (sext_inreg X, i16), C) -> 814 // (srai (slli X, (XLen-16), (XLen-16) + C) 815 // And (sra (sext_inreg X, i8), C) -> 816 // (srai (slli X, (XLen-8), (XLen-8) + C) 817 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. 818 // This transform matches the code we get without Zbb. The shifts are more 819 // compressible, and this can help expose CSE opportunities in the sdiv by 820 // constant optimization. 821 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 822 if (!N1C) 823 break; 824 SDValue N0 = Node->getOperand(0); 825 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) 826 break; 827 unsigned ShAmt = N1C->getZExtValue(); 828 unsigned ExtSize = 829 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 830 // ExtSize of 32 should use sraiw via tablegen pattern. 831 if (ExtSize >= 32 || ShAmt >= ExtSize) 832 break; 833 unsigned LShAmt = Subtarget->getXLen() - ExtSize; 834 SDNode *SLLI = 835 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 836 CurDAG->getTargetConstant(LShAmt, DL, VT)); 837 SDNode *SRAI = CurDAG->getMachineNode( 838 RISCV::SRAI, DL, VT, SDValue(SLLI, 0), 839 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 840 ReplaceNode(Node, SRAI); 841 return; 842 } 843 case ISD::AND: { 844 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 845 if (!N1C) 846 break; 847 848 SDValue N0 = Node->getOperand(0); 849 850 bool LeftShift = N0.getOpcode() == ISD::SHL; 851 if (!LeftShift && N0.getOpcode() != ISD::SRL) 852 break; 853 854 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 855 if (!C) 856 break; 857 unsigned C2 = C->getZExtValue(); 858 unsigned XLen = Subtarget->getXLen(); 859 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!"); 860 861 uint64_t C1 = N1C->getZExtValue(); 862 863 // Keep track of whether this is a c.andi. If we can't use c.andi, the 864 // shift pair might offer more compression opportunities. 865 // TODO: We could check for C extension here, but we don't have many lit 866 // tests with the C extension enabled so not checking gets better coverage. 867 // TODO: What if ANDI faster than shift? 868 bool IsCANDI = isInt<6>(N1C->getSExtValue()); 869 870 // Clear irrelevant bits in the mask. 871 if (LeftShift) 872 C1 &= maskTrailingZeros<uint64_t>(C2); 873 else 874 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 875 876 // Some transforms should only be done if the shift has a single use or 877 // the AND would become (srli (slli X, 32), 32) 878 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 879 880 SDValue X = N0.getOperand(0); 881 882 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 883 // with c3 leading zeros. 884 if (!LeftShift && isMask_64(C1)) { 885 unsigned Leading = XLen - (64 - countLeadingZeros(C1)); 886 if (C2 < Leading) { 887 // If the number of leading zeros is C2+32 this can be SRLIW. 888 if (C2 + 32 == Leading) { 889 SDNode *SRLIW = CurDAG->getMachineNode( 890 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); 891 ReplaceNode(Node, SRLIW); 892 return; 893 } 894 895 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if 896 // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 897 // 898 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 899 // legalized and goes through DAG combine. 900 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() && 901 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 902 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { 903 SDNode *SRAIW = 904 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0), 905 CurDAG->getTargetConstant(31, DL, VT)); 906 SDNode *SRLIW = CurDAG->getMachineNode( 907 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0), 908 CurDAG->getTargetConstant(Leading - 32, DL, VT)); 909 ReplaceNode(Node, SRLIW); 910 return; 911 } 912 913 // (srli (slli x, c3-c2), c3). 914 // Skip if we could use (zext.w (sraiw X, C2)). 915 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 && 916 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 917 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32; 918 // Also Skip if we can use bexti. 919 Skip |= Subtarget->hasStdExtZbs() && Leading == XLen - 1; 920 if (OneUseOrZExtW && !Skip) { 921 SDNode *SLLI = CurDAG->getMachineNode( 922 RISCV::SLLI, DL, VT, X, 923 CurDAG->getTargetConstant(Leading - C2, DL, VT)); 924 SDNode *SRLI = CurDAG->getMachineNode( 925 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 926 CurDAG->getTargetConstant(Leading, DL, VT)); 927 ReplaceNode(Node, SRLI); 928 return; 929 } 930 } 931 } 932 933 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 934 // shifted by c2 bits with c3 leading zeros. 935 if (LeftShift && isShiftedMask_64(C1)) { 936 unsigned Leading = XLen - (64 - countLeadingZeros(C1)); 937 938 if (C2 + Leading < XLen && 939 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) { 940 // Use slli.uw when possible. 941 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) { 942 SDNode *SLLI_UW = CurDAG->getMachineNode( 943 RISCV::SLLI_UW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); 944 ReplaceNode(Node, SLLI_UW); 945 return; 946 } 947 948 // (srli (slli c2+c3), c3) 949 if (OneUseOrZExtW && !IsCANDI) { 950 SDNode *SLLI = CurDAG->getMachineNode( 951 RISCV::SLLI, DL, VT, X, 952 CurDAG->getTargetConstant(C2 + Leading, DL, VT)); 953 SDNode *SRLI = CurDAG->getMachineNode( 954 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 955 CurDAG->getTargetConstant(Leading, DL, VT)); 956 ReplaceNode(Node, SRLI); 957 return; 958 } 959 } 960 } 961 962 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 963 // shifted mask with c2 leading zeros and c3 trailing zeros. 964 if (!LeftShift && isShiftedMask_64(C1)) { 965 unsigned Leading = XLen - (64 - countLeadingZeros(C1)); 966 unsigned Trailing = countTrailingZeros(C1); 967 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW && !IsCANDI) { 968 unsigned SrliOpc = RISCV::SRLI; 969 // If the input is zexti32 we should use SRLIW. 970 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) && 971 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) { 972 SrliOpc = RISCV::SRLIW; 973 X = X.getOperand(0); 974 } 975 SDNode *SRLI = CurDAG->getMachineNode( 976 SrliOpc, DL, VT, X, 977 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 978 SDNode *SLLI = 979 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 980 CurDAG->getTargetConstant(Trailing, DL, VT)); 981 ReplaceNode(Node, SLLI); 982 return; 983 } 984 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 985 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 && 986 OneUseOrZExtW && !IsCANDI) { 987 SDNode *SRLIW = CurDAG->getMachineNode( 988 RISCV::SRLIW, DL, VT, X, 989 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 990 SDNode *SLLI = 991 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 992 CurDAG->getTargetConstant(Trailing, DL, VT)); 993 ReplaceNode(Node, SLLI); 994 return; 995 } 996 } 997 998 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 999 // shifted mask with no leading zeros and c3 trailing zeros. 1000 if (LeftShift && isShiftedMask_64(C1)) { 1001 unsigned Leading = XLen - (64 - countLeadingZeros(C1)); 1002 unsigned Trailing = countTrailingZeros(C1); 1003 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) { 1004 SDNode *SRLI = CurDAG->getMachineNode( 1005 RISCV::SRLI, DL, VT, X, 1006 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1007 SDNode *SLLI = 1008 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 1009 CurDAG->getTargetConstant(Trailing, DL, VT)); 1010 ReplaceNode(Node, SLLI); 1011 return; 1012 } 1013 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 1014 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { 1015 SDNode *SRLIW = CurDAG->getMachineNode( 1016 RISCV::SRLIW, DL, VT, X, 1017 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1018 SDNode *SLLI = 1019 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1020 CurDAG->getTargetConstant(Trailing, DL, VT)); 1021 ReplaceNode(Node, SLLI); 1022 return; 1023 } 1024 } 1025 1026 break; 1027 } 1028 case ISD::MUL: { 1029 // Special case for calculating (mul (and X, C2), C1) where the full product 1030 // fits in XLen bits. We can shift X left by the number of leading zeros in 1031 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 1032 // product has XLen trailing zeros, putting it in the output of MULHU. This 1033 // can avoid materializing a constant in a register for C2. 1034 1035 // RHS should be a constant. 1036 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1037 if (!N1C || !N1C->hasOneUse()) 1038 break; 1039 1040 // LHS should be an AND with constant. 1041 SDValue N0 = Node->getOperand(0); 1042 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 1043 break; 1044 1045 uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 1046 1047 // Constant should be a mask. 1048 if (!isMask_64(C2)) 1049 break; 1050 1051 // This should be the only use of the AND unless we will use 1052 // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND 1053 // constants. 1054 if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF)) 1055 break; 1056 1057 // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this 1058 // optimization. 1059 if (isInt<12>(C2) || 1060 (C2 == UINT64_C(0xFFFF) && 1061 (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) || 1062 (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())) 1063 break; 1064 1065 // We need to shift left the AND input and C1 by a total of XLen bits. 1066 1067 // How far left do we need to shift the AND input? 1068 unsigned XLen = Subtarget->getXLen(); 1069 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2)); 1070 1071 // The constant gets shifted by the remaining amount unless that would 1072 // shift bits out. 1073 uint64_t C1 = N1C->getZExtValue(); 1074 unsigned ConstantShift = XLen - LeadingZeros; 1075 if (ConstantShift > (XLen - (64 - countLeadingZeros(C1)))) 1076 break; 1077 1078 uint64_t ShiftedC1 = C1 << ConstantShift; 1079 // If this RV32, we need to sign extend the constant. 1080 if (XLen == 32) 1081 ShiftedC1 = SignExtend64<32>(ShiftedC1); 1082 1083 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 1084 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); 1085 SDNode *SLLI = 1086 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 1087 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 1088 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 1089 SDValue(SLLI, 0), SDValue(Imm, 0)); 1090 ReplaceNode(Node, MULHU); 1091 return; 1092 } 1093 case ISD::INTRINSIC_WO_CHAIN: { 1094 unsigned IntNo = Node->getConstantOperandVal(0); 1095 switch (IntNo) { 1096 // By default we do not custom select any intrinsic. 1097 default: 1098 break; 1099 case Intrinsic::riscv_vmsgeu: 1100 case Intrinsic::riscv_vmsge: { 1101 SDValue Src1 = Node->getOperand(1); 1102 SDValue Src2 = Node->getOperand(2); 1103 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 1104 bool IsCmpUnsignedZero = false; 1105 // Only custom select scalar second operand. 1106 if (Src2.getValueType() != XLenVT) 1107 break; 1108 // Small constants are handled with patterns. 1109 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1110 int64_t CVal = C->getSExtValue(); 1111 if (CVal >= -15 && CVal <= 16) { 1112 if (!IsUnsigned || CVal != 0) 1113 break; 1114 IsCmpUnsignedZero = true; 1115 } 1116 } 1117 MVT Src1VT = Src1.getSimpleValueType(); 1118 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; 1119 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1120 default: 1121 llvm_unreachable("Unexpected LMUL!"); 1122 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 1123 case RISCVII::VLMUL::lmulenum: \ 1124 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1125 : RISCV::PseudoVMSLT_VX_##suffix; \ 1126 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 1127 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 1128 break; 1129 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1) 1130 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2) 1131 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4) 1132 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8) 1133 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16) 1134 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32) 1135 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64) 1136 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES 1137 } 1138 SDValue SEW = CurDAG->getTargetConstant( 1139 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1140 SDValue VL; 1141 selectVLOp(Node->getOperand(3), VL); 1142 1143 // If vmsgeu with 0 immediate, expand it to vmset. 1144 if (IsCmpUnsignedZero) { 1145 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW)); 1146 return; 1147 } 1148 1149 // Expand to 1150 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 1151 SDValue Cmp = SDValue( 1152 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1153 0); 1154 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 1155 {Cmp, Cmp, VL, SEW})); 1156 return; 1157 } 1158 case Intrinsic::riscv_vmsgeu_mask: 1159 case Intrinsic::riscv_vmsge_mask: { 1160 SDValue Src1 = Node->getOperand(2); 1161 SDValue Src2 = Node->getOperand(3); 1162 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 1163 bool IsCmpUnsignedZero = false; 1164 // Only custom select scalar second operand. 1165 if (Src2.getValueType() != XLenVT) 1166 break; 1167 // Small constants are handled with patterns. 1168 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1169 int64_t CVal = C->getSExtValue(); 1170 if (CVal >= -15 && CVal <= 16) { 1171 if (!IsUnsigned || CVal != 0) 1172 break; 1173 IsCmpUnsignedZero = true; 1174 } 1175 } 1176 MVT Src1VT = Src1.getSimpleValueType(); 1177 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, 1178 VMOROpcode; 1179 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1180 default: 1181 llvm_unreachable("Unexpected LMUL!"); 1182 #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \ 1183 case RISCVII::VLMUL::lmulenum: \ 1184 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1185 : RISCV::PseudoVMSLT_VX_##suffix; \ 1186 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 1187 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 1188 break; 1189 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1) 1190 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2) 1191 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4) 1192 CASE_VMSLT_OPCODES(LMUL_1, M1, B8) 1193 CASE_VMSLT_OPCODES(LMUL_2, M2, B16) 1194 CASE_VMSLT_OPCODES(LMUL_4, M4, B32) 1195 CASE_VMSLT_OPCODES(LMUL_8, M8, B64) 1196 #undef CASE_VMSLT_OPCODES 1197 } 1198 // Mask operations use the LMUL from the mask type. 1199 switch (RISCVTargetLowering::getLMUL(VT)) { 1200 default: 1201 llvm_unreachable("Unexpected LMUL!"); 1202 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ 1203 case RISCVII::VLMUL::lmulenum: \ 1204 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 1205 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 1206 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ 1207 break; 1208 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8) 1209 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4) 1210 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2) 1211 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1) 1212 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2) 1213 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4) 1214 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8) 1215 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES 1216 } 1217 SDValue SEW = CurDAG->getTargetConstant( 1218 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1219 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 1220 SDValue VL; 1221 selectVLOp(Node->getOperand(5), VL); 1222 SDValue MaskedOff = Node->getOperand(1); 1223 SDValue Mask = Node->getOperand(4); 1224 1225 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff. 1226 if (IsCmpUnsignedZero) { 1227 // We don't need vmor if the MaskedOff and the Mask are the same 1228 // value. 1229 if (Mask == MaskedOff) { 1230 ReplaceUses(Node, Mask.getNode()); 1231 return; 1232 } 1233 ReplaceNode(Node, 1234 CurDAG->getMachineNode(VMOROpcode, DL, VT, 1235 {Mask, MaskedOff, VL, MaskSEW})); 1236 return; 1237 } 1238 1239 // If the MaskedOff value and the Mask are the same value use 1240 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 1241 // This avoids needing to copy v0 to vd before starting the next sequence. 1242 if (Mask == MaskedOff) { 1243 SDValue Cmp = SDValue( 1244 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1245 0); 1246 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 1247 {Mask, Cmp, VL, MaskSEW})); 1248 return; 1249 } 1250 1251 // Mask needs to be copied to V0. 1252 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1253 RISCV::V0, Mask, SDValue()); 1254 SDValue Glue = Chain.getValue(1); 1255 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); 1256 1257 // Otherwise use 1258 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 1259 // The result is mask undisturbed. 1260 // We use the same instructions to emulate mask agnostic behavior, because 1261 // the agnostic result can be either undisturbed or all 1. 1262 SDValue Cmp = SDValue( 1263 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 1264 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 1265 0); 1266 // vmxor.mm vd, vd, v0 is used to update active value. 1267 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 1268 {Cmp, Mask, VL, MaskSEW})); 1269 return; 1270 } 1271 case Intrinsic::riscv_vsetvli_opt: 1272 case Intrinsic::riscv_vsetvlimax_opt: 1273 return selectVSETVLI(Node); 1274 } 1275 break; 1276 } 1277 case ISD::INTRINSIC_W_CHAIN: { 1278 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1279 switch (IntNo) { 1280 // By default we do not custom select any intrinsic. 1281 default: 1282 break; 1283 case Intrinsic::riscv_vsetvli: 1284 case Intrinsic::riscv_vsetvlimax: 1285 return selectVSETVLI(Node); 1286 case Intrinsic::riscv_vlseg2: 1287 case Intrinsic::riscv_vlseg3: 1288 case Intrinsic::riscv_vlseg4: 1289 case Intrinsic::riscv_vlseg5: 1290 case Intrinsic::riscv_vlseg6: 1291 case Intrinsic::riscv_vlseg7: 1292 case Intrinsic::riscv_vlseg8: { 1293 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1294 return; 1295 } 1296 case Intrinsic::riscv_vlseg2_mask: 1297 case Intrinsic::riscv_vlseg3_mask: 1298 case Intrinsic::riscv_vlseg4_mask: 1299 case Intrinsic::riscv_vlseg5_mask: 1300 case Intrinsic::riscv_vlseg6_mask: 1301 case Intrinsic::riscv_vlseg7_mask: 1302 case Intrinsic::riscv_vlseg8_mask: { 1303 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1304 return; 1305 } 1306 case Intrinsic::riscv_vlsseg2: 1307 case Intrinsic::riscv_vlsseg3: 1308 case Intrinsic::riscv_vlsseg4: 1309 case Intrinsic::riscv_vlsseg5: 1310 case Intrinsic::riscv_vlsseg6: 1311 case Intrinsic::riscv_vlsseg7: 1312 case Intrinsic::riscv_vlsseg8: { 1313 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1314 return; 1315 } 1316 case Intrinsic::riscv_vlsseg2_mask: 1317 case Intrinsic::riscv_vlsseg3_mask: 1318 case Intrinsic::riscv_vlsseg4_mask: 1319 case Intrinsic::riscv_vlsseg5_mask: 1320 case Intrinsic::riscv_vlsseg6_mask: 1321 case Intrinsic::riscv_vlsseg7_mask: 1322 case Intrinsic::riscv_vlsseg8_mask: { 1323 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1324 return; 1325 } 1326 case Intrinsic::riscv_vloxseg2: 1327 case Intrinsic::riscv_vloxseg3: 1328 case Intrinsic::riscv_vloxseg4: 1329 case Intrinsic::riscv_vloxseg5: 1330 case Intrinsic::riscv_vloxseg6: 1331 case Intrinsic::riscv_vloxseg7: 1332 case Intrinsic::riscv_vloxseg8: 1333 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1334 return; 1335 case Intrinsic::riscv_vluxseg2: 1336 case Intrinsic::riscv_vluxseg3: 1337 case Intrinsic::riscv_vluxseg4: 1338 case Intrinsic::riscv_vluxseg5: 1339 case Intrinsic::riscv_vluxseg6: 1340 case Intrinsic::riscv_vluxseg7: 1341 case Intrinsic::riscv_vluxseg8: 1342 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1343 return; 1344 case Intrinsic::riscv_vloxseg2_mask: 1345 case Intrinsic::riscv_vloxseg3_mask: 1346 case Intrinsic::riscv_vloxseg4_mask: 1347 case Intrinsic::riscv_vloxseg5_mask: 1348 case Intrinsic::riscv_vloxseg6_mask: 1349 case Intrinsic::riscv_vloxseg7_mask: 1350 case Intrinsic::riscv_vloxseg8_mask: 1351 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1352 return; 1353 case Intrinsic::riscv_vluxseg2_mask: 1354 case Intrinsic::riscv_vluxseg3_mask: 1355 case Intrinsic::riscv_vluxseg4_mask: 1356 case Intrinsic::riscv_vluxseg5_mask: 1357 case Intrinsic::riscv_vluxseg6_mask: 1358 case Intrinsic::riscv_vluxseg7_mask: 1359 case Intrinsic::riscv_vluxseg8_mask: 1360 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1361 return; 1362 case Intrinsic::riscv_vlseg8ff: 1363 case Intrinsic::riscv_vlseg7ff: 1364 case Intrinsic::riscv_vlseg6ff: 1365 case Intrinsic::riscv_vlseg5ff: 1366 case Intrinsic::riscv_vlseg4ff: 1367 case Intrinsic::riscv_vlseg3ff: 1368 case Intrinsic::riscv_vlseg2ff: { 1369 selectVLSEGFF(Node, /*IsMasked*/ false); 1370 return; 1371 } 1372 case Intrinsic::riscv_vlseg8ff_mask: 1373 case Intrinsic::riscv_vlseg7ff_mask: 1374 case Intrinsic::riscv_vlseg6ff_mask: 1375 case Intrinsic::riscv_vlseg5ff_mask: 1376 case Intrinsic::riscv_vlseg4ff_mask: 1377 case Intrinsic::riscv_vlseg3ff_mask: 1378 case Intrinsic::riscv_vlseg2ff_mask: { 1379 selectVLSEGFF(Node, /*IsMasked*/ true); 1380 return; 1381 } 1382 case Intrinsic::riscv_vloxei: 1383 case Intrinsic::riscv_vloxei_mask: 1384 case Intrinsic::riscv_vluxei: 1385 case Intrinsic::riscv_vluxei_mask: { 1386 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 1387 IntNo == Intrinsic::riscv_vluxei_mask; 1388 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 1389 IntNo == Intrinsic::riscv_vloxei_mask; 1390 1391 MVT VT = Node->getSimpleValueType(0); 1392 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1393 1394 unsigned CurOp = 2; 1395 // Masked intrinsic only have TU version pseduo instructions. 1396 bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef(); 1397 SmallVector<SDValue, 8> Operands; 1398 if (IsTU) 1399 Operands.push_back(Node->getOperand(CurOp++)); 1400 else 1401 // Skip the undef passthru operand for nomask TA version pseudo 1402 CurOp++; 1403 1404 MVT IndexVT; 1405 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1406 /*IsStridedOrIndexed*/ true, Operands, 1407 /*IsLoad=*/true, &IndexVT); 1408 1409 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1410 "Element count mismatch"); 1411 1412 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1413 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1414 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1415 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1416 report_fatal_error("The V extension does not support EEW=64 for index " 1417 "values when XLEN=32"); 1418 } 1419 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 1420 IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1421 static_cast<unsigned>(IndexLMUL)); 1422 MachineSDNode *Load = 1423 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1424 1425 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1426 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1427 1428 ReplaceNode(Node, Load); 1429 return; 1430 } 1431 case Intrinsic::riscv_vlm: 1432 case Intrinsic::riscv_vle: 1433 case Intrinsic::riscv_vle_mask: 1434 case Intrinsic::riscv_vlse: 1435 case Intrinsic::riscv_vlse_mask: { 1436 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 1437 IntNo == Intrinsic::riscv_vlse_mask; 1438 bool IsStrided = 1439 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 1440 1441 MVT VT = Node->getSimpleValueType(0); 1442 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1443 1444 unsigned CurOp = 2; 1445 // The riscv_vlm intrinsic are always tail agnostic and no passthru operand. 1446 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; 1447 // Masked intrinsic only have TU version pseduo instructions. 1448 bool IsTU = HasPassthruOperand && 1449 (IsMasked || !Node->getOperand(CurOp).isUndef()); 1450 SmallVector<SDValue, 8> Operands; 1451 if (IsTU) 1452 Operands.push_back(Node->getOperand(CurOp++)); 1453 else if (HasPassthruOperand) 1454 // Skip the undef passthru operand for nomask TA version pseudo 1455 CurOp++; 1456 1457 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1458 Operands, /*IsLoad=*/true); 1459 1460 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1461 const RISCV::VLEPseudo *P = 1462 RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 1463 static_cast<unsigned>(LMUL)); 1464 MachineSDNode *Load = 1465 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1466 1467 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1468 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1469 1470 ReplaceNode(Node, Load); 1471 return; 1472 } 1473 case Intrinsic::riscv_vleff: 1474 case Intrinsic::riscv_vleff_mask: { 1475 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 1476 1477 MVT VT = Node->getSimpleValueType(0); 1478 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1479 1480 unsigned CurOp = 2; 1481 // Masked intrinsic only have TU version pseduo instructions. 1482 bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef(); 1483 SmallVector<SDValue, 7> Operands; 1484 if (IsTU) 1485 Operands.push_back(Node->getOperand(CurOp++)); 1486 else 1487 // Skip the undef passthru operand for nomask TA version pseudo 1488 CurOp++; 1489 1490 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1491 /*IsStridedOrIndexed*/ false, Operands, 1492 /*IsLoad=*/true); 1493 1494 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1495 const RISCV::VLEPseudo *P = 1496 RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 1497 Log2SEW, static_cast<unsigned>(LMUL)); 1498 MachineSDNode *Load = CurDAG->getMachineNode( 1499 P->Pseudo, DL, Node->getVTList(), Operands); 1500 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1501 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1502 1503 ReplaceNode(Node, Load); 1504 return; 1505 } 1506 } 1507 break; 1508 } 1509 case ISD::INTRINSIC_VOID: { 1510 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1511 switch (IntNo) { 1512 case Intrinsic::riscv_vsseg2: 1513 case Intrinsic::riscv_vsseg3: 1514 case Intrinsic::riscv_vsseg4: 1515 case Intrinsic::riscv_vsseg5: 1516 case Intrinsic::riscv_vsseg6: 1517 case Intrinsic::riscv_vsseg7: 1518 case Intrinsic::riscv_vsseg8: { 1519 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1520 return; 1521 } 1522 case Intrinsic::riscv_vsseg2_mask: 1523 case Intrinsic::riscv_vsseg3_mask: 1524 case Intrinsic::riscv_vsseg4_mask: 1525 case Intrinsic::riscv_vsseg5_mask: 1526 case Intrinsic::riscv_vsseg6_mask: 1527 case Intrinsic::riscv_vsseg7_mask: 1528 case Intrinsic::riscv_vsseg8_mask: { 1529 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1530 return; 1531 } 1532 case Intrinsic::riscv_vssseg2: 1533 case Intrinsic::riscv_vssseg3: 1534 case Intrinsic::riscv_vssseg4: 1535 case Intrinsic::riscv_vssseg5: 1536 case Intrinsic::riscv_vssseg6: 1537 case Intrinsic::riscv_vssseg7: 1538 case Intrinsic::riscv_vssseg8: { 1539 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1540 return; 1541 } 1542 case Intrinsic::riscv_vssseg2_mask: 1543 case Intrinsic::riscv_vssseg3_mask: 1544 case Intrinsic::riscv_vssseg4_mask: 1545 case Intrinsic::riscv_vssseg5_mask: 1546 case Intrinsic::riscv_vssseg6_mask: 1547 case Intrinsic::riscv_vssseg7_mask: 1548 case Intrinsic::riscv_vssseg8_mask: { 1549 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1550 return; 1551 } 1552 case Intrinsic::riscv_vsoxseg2: 1553 case Intrinsic::riscv_vsoxseg3: 1554 case Intrinsic::riscv_vsoxseg4: 1555 case Intrinsic::riscv_vsoxseg5: 1556 case Intrinsic::riscv_vsoxseg6: 1557 case Intrinsic::riscv_vsoxseg7: 1558 case Intrinsic::riscv_vsoxseg8: 1559 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1560 return; 1561 case Intrinsic::riscv_vsuxseg2: 1562 case Intrinsic::riscv_vsuxseg3: 1563 case Intrinsic::riscv_vsuxseg4: 1564 case Intrinsic::riscv_vsuxseg5: 1565 case Intrinsic::riscv_vsuxseg6: 1566 case Intrinsic::riscv_vsuxseg7: 1567 case Intrinsic::riscv_vsuxseg8: 1568 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1569 return; 1570 case Intrinsic::riscv_vsoxseg2_mask: 1571 case Intrinsic::riscv_vsoxseg3_mask: 1572 case Intrinsic::riscv_vsoxseg4_mask: 1573 case Intrinsic::riscv_vsoxseg5_mask: 1574 case Intrinsic::riscv_vsoxseg6_mask: 1575 case Intrinsic::riscv_vsoxseg7_mask: 1576 case Intrinsic::riscv_vsoxseg8_mask: 1577 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1578 return; 1579 case Intrinsic::riscv_vsuxseg2_mask: 1580 case Intrinsic::riscv_vsuxseg3_mask: 1581 case Intrinsic::riscv_vsuxseg4_mask: 1582 case Intrinsic::riscv_vsuxseg5_mask: 1583 case Intrinsic::riscv_vsuxseg6_mask: 1584 case Intrinsic::riscv_vsuxseg7_mask: 1585 case Intrinsic::riscv_vsuxseg8_mask: 1586 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1587 return; 1588 case Intrinsic::riscv_vsoxei: 1589 case Intrinsic::riscv_vsoxei_mask: 1590 case Intrinsic::riscv_vsuxei: 1591 case Intrinsic::riscv_vsuxei_mask: { 1592 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 1593 IntNo == Intrinsic::riscv_vsuxei_mask; 1594 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 1595 IntNo == Intrinsic::riscv_vsoxei_mask; 1596 1597 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1598 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1599 1600 unsigned CurOp = 2; 1601 SmallVector<SDValue, 8> Operands; 1602 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1603 1604 MVT IndexVT; 1605 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1606 /*IsStridedOrIndexed*/ true, Operands, 1607 /*IsLoad=*/false, &IndexVT); 1608 1609 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1610 "Element count mismatch"); 1611 1612 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1613 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1614 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1615 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1616 report_fatal_error("The V extension does not support EEW=64 for index " 1617 "values when XLEN=32"); 1618 } 1619 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 1620 IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW, 1621 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); 1622 MachineSDNode *Store = 1623 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1624 1625 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1626 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1627 1628 ReplaceNode(Node, Store); 1629 return; 1630 } 1631 case Intrinsic::riscv_vsm: 1632 case Intrinsic::riscv_vse: 1633 case Intrinsic::riscv_vse_mask: 1634 case Intrinsic::riscv_vsse: 1635 case Intrinsic::riscv_vsse_mask: { 1636 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 1637 IntNo == Intrinsic::riscv_vsse_mask; 1638 bool IsStrided = 1639 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 1640 1641 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1642 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1643 1644 unsigned CurOp = 2; 1645 SmallVector<SDValue, 8> Operands; 1646 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1647 1648 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1649 Operands); 1650 1651 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1652 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 1653 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 1654 MachineSDNode *Store = 1655 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1656 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1657 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1658 1659 ReplaceNode(Node, Store); 1660 return; 1661 } 1662 } 1663 break; 1664 } 1665 case ISD::BITCAST: { 1666 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 1667 // Just drop bitcasts between vectors if both are fixed or both are 1668 // scalable. 1669 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 1670 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 1671 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 1672 CurDAG->RemoveDeadNode(Node); 1673 return; 1674 } 1675 break; 1676 } 1677 case ISD::INSERT_SUBVECTOR: { 1678 SDValue V = Node->getOperand(0); 1679 SDValue SubV = Node->getOperand(1); 1680 SDLoc DL(SubV); 1681 auto Idx = Node->getConstantOperandVal(2); 1682 MVT SubVecVT = SubV.getSimpleValueType(); 1683 1684 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1685 MVT SubVecContainerVT = SubVecVT; 1686 // Establish the correct scalable-vector types for any fixed-length type. 1687 if (SubVecVT.isFixedLengthVector()) 1688 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 1689 if (VT.isFixedLengthVector()) 1690 VT = TLI.getContainerForFixedLengthVector(VT); 1691 1692 const auto *TRI = Subtarget->getRegisterInfo(); 1693 unsigned SubRegIdx; 1694 std::tie(SubRegIdx, Idx) = 1695 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1696 VT, SubVecContainerVT, Idx, TRI); 1697 1698 // If the Idx hasn't been completely eliminated then this is a subvector 1699 // insert which doesn't naturally align to a vector register. These must 1700 // be handled using instructions to manipulate the vector registers. 1701 if (Idx != 0) 1702 break; 1703 1704 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); 1705 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 1706 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 1707 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 1708 (void)IsSubVecPartReg; // Silence unused variable warning without asserts. 1709 assert((!IsSubVecPartReg || V.isUndef()) && 1710 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 1711 "the subvector is smaller than a full-sized register"); 1712 1713 // If we haven't set a SubRegIdx, then we must be going between 1714 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 1715 if (SubRegIdx == RISCV::NoSubRegister) { 1716 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT); 1717 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1718 InRegClassID && 1719 "Unexpected subvector extraction"); 1720 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1721 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 1722 DL, VT, SubV, RC); 1723 ReplaceNode(Node, NewNode); 1724 return; 1725 } 1726 1727 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 1728 ReplaceNode(Node, Insert.getNode()); 1729 return; 1730 } 1731 case ISD::EXTRACT_SUBVECTOR: { 1732 SDValue V = Node->getOperand(0); 1733 auto Idx = Node->getConstantOperandVal(1); 1734 MVT InVT = V.getSimpleValueType(); 1735 SDLoc DL(V); 1736 1737 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1738 MVT SubVecContainerVT = VT; 1739 // Establish the correct scalable-vector types for any fixed-length type. 1740 if (VT.isFixedLengthVector()) 1741 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 1742 if (InVT.isFixedLengthVector()) 1743 InVT = TLI.getContainerForFixedLengthVector(InVT); 1744 1745 const auto *TRI = Subtarget->getRegisterInfo(); 1746 unsigned SubRegIdx; 1747 std::tie(SubRegIdx, Idx) = 1748 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1749 InVT, SubVecContainerVT, Idx, TRI); 1750 1751 // If the Idx hasn't been completely eliminated then this is a subvector 1752 // extract which doesn't naturally align to a vector register. These must 1753 // be handled using instructions to manipulate the vector registers. 1754 if (Idx != 0) 1755 break; 1756 1757 // If we haven't set a SubRegIdx, then we must be going between 1758 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 1759 if (SubRegIdx == RISCV::NoSubRegister) { 1760 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 1761 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1762 InRegClassID && 1763 "Unexpected subvector extraction"); 1764 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1765 SDNode *NewNode = 1766 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 1767 ReplaceNode(Node, NewNode); 1768 return; 1769 } 1770 1771 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 1772 ReplaceNode(Node, Extract.getNode()); 1773 return; 1774 } 1775 case ISD::SPLAT_VECTOR: 1776 case RISCVISD::VMV_S_X_VL: 1777 case RISCVISD::VFMV_S_F_VL: 1778 case RISCVISD::VMV_V_X_VL: 1779 case RISCVISD::VFMV_V_F_VL: { 1780 // Try to match splat of a scalar load to a strided load with stride of x0. 1781 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || 1782 Node->getOpcode() == RISCVISD::VFMV_S_F_VL; 1783 bool HasPassthruOperand = Node->getOpcode() != ISD::SPLAT_VECTOR; 1784 if (HasPassthruOperand && !Node->getOperand(0).isUndef()) 1785 break; 1786 SDValue Src = HasPassthruOperand ? Node->getOperand(1) : Node->getOperand(0); 1787 auto *Ld = dyn_cast<LoadSDNode>(Src); 1788 if (!Ld) 1789 break; 1790 EVT MemVT = Ld->getMemoryVT(); 1791 // The memory VT should be the same size as the element type. 1792 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 1793 break; 1794 if (!IsProfitableToFold(Src, Node, Node) || 1795 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 1796 break; 1797 1798 SDValue VL; 1799 if (Node->getOpcode() == ISD::SPLAT_VECTOR) 1800 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); 1801 else if (IsScalarMove) { 1802 // We could deal with more VL if we update the VSETVLI insert pass to 1803 // avoid introducing more VSETVLI. 1804 if (!isOneConstant(Node->getOperand(2))) 1805 break; 1806 selectVLOp(Node->getOperand(2), VL); 1807 } else 1808 selectVLOp(Node->getOperand(2), VL); 1809 1810 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1811 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 1812 1813 SDValue Operands[] = {Ld->getBasePtr(), 1814 CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW, 1815 Ld->getChain()}; 1816 1817 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1818 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 1819 /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false, 1820 Log2SEW, static_cast<unsigned>(LMUL)); 1821 MachineSDNode *Load = 1822 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1823 1824 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()}); 1825 1826 ReplaceNode(Node, Load); 1827 return; 1828 } 1829 } 1830 1831 // Select the default instruction. 1832 SelectCode(Node); 1833 } 1834 1835 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 1836 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 1837 switch (ConstraintID) { 1838 case InlineAsm::Constraint_m: 1839 // We just support simple memory operands that have a single address 1840 // operand and need no special handling. 1841 OutOps.push_back(Op); 1842 return false; 1843 case InlineAsm::Constraint_A: 1844 OutOps.push_back(Op); 1845 return false; 1846 default: 1847 break; 1848 } 1849 1850 return true; 1851 } 1852 1853 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base, 1854 SDValue &Offset) { 1855 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 1856 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1857 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT()); 1858 return true; 1859 } 1860 1861 return false; 1862 } 1863 1864 // Select a frame index and an optional immediate offset from an ADD or OR. 1865 bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, 1866 SDValue &Offset) { 1867 if (SelectAddrFrameIndex(Addr, Base, Offset)) 1868 return true; 1869 1870 if (!CurDAG->isBaseWithConstantOffset(Addr)) 1871 return false; 1872 1873 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { 1874 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 1875 if (isInt<12>(CVal)) { 1876 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), 1877 Subtarget->getXLenVT()); 1878 Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr), 1879 Subtarget->getXLenVT()); 1880 return true; 1881 } 1882 } 1883 1884 return false; 1885 } 1886 1887 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, 1888 SDValue &Offset) { 1889 if (SelectAddrFrameIndex(Addr, Base, Offset)) 1890 return true; 1891 1892 SDLoc DL(Addr); 1893 MVT VT = Addr.getSimpleValueType(); 1894 1895 if (Addr.getOpcode() == RISCVISD::ADD_LO) { 1896 Base = Addr.getOperand(0); 1897 Offset = Addr.getOperand(1); 1898 return true; 1899 } 1900 1901 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1902 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 1903 if (isInt<12>(CVal)) { 1904 Base = Addr.getOperand(0); 1905 if (Base.getOpcode() == RISCVISD::ADD_LO) { 1906 SDValue LoOperand = Base.getOperand(1); 1907 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) { 1908 // If the Lo in (ADD_LO hi, lo) is a global variable's address 1909 // (its low part, really), then we can rely on the alignment of that 1910 // variable to provide a margin of safety before low part can overflow 1911 // the 12 bits of the load/store offset. Check if CVal falls within 1912 // that margin; if so (low part + CVal) can't overflow. 1913 const DataLayout &DL = CurDAG->getDataLayout(); 1914 Align Alignment = commonAlignment( 1915 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); 1916 if (CVal == 0 || Alignment > CVal) { 1917 int64_t CombinedOffset = CVal + GA->getOffset(); 1918 Base = Base.getOperand(0); 1919 Offset = CurDAG->getTargetGlobalAddress( 1920 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(), 1921 CombinedOffset, GA->getTargetFlags()); 1922 return true; 1923 } 1924 } 1925 } 1926 1927 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base)) 1928 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); 1929 Offset = CurDAG->getTargetConstant(CVal, DL, VT); 1930 return true; 1931 } 1932 } 1933 1934 // Handle ADD with large immediates. 1935 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) { 1936 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 1937 assert(!isInt<12>(CVal) && "simm12 not already handled?"); 1938 1939 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) { 1940 // We can use an ADDI for part of the offset and fold the rest into the 1941 // load/store. This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. 1942 int64_t Adj = CVal < 0 ? -2048 : 2047; 1943 Base = SDValue( 1944 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0), 1945 CurDAG->getTargetConstant(Adj, DL, VT)), 1946 0); 1947 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT); 1948 return true; 1949 } 1950 } 1951 1952 Base = Addr; 1953 Offset = CurDAG->getTargetConstant(0, DL, VT); 1954 return true; 1955 } 1956 1957 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 1958 SDValue &ShAmt) { 1959 // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift 1960 // amount. If there is an AND on the shift amount, we can bypass it if it 1961 // doesn't affect any of those bits. 1962 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 1963 const APInt &AndMask = N->getConstantOperandAPInt(1); 1964 1965 // Since the max shift amount is a power of 2 we can subtract 1 to make a 1966 // mask that covers the bits needed to represent all shift amounts. 1967 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 1968 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 1969 1970 if (ShMask.isSubsetOf(AndMask)) { 1971 ShAmt = N.getOperand(0); 1972 return true; 1973 } 1974 1975 // SimplifyDemandedBits may have optimized the mask so try restoring any 1976 // bits that are known zero. 1977 KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0)); 1978 if (ShMask.isSubsetOf(AndMask | Known.Zero)) { 1979 ShAmt = N.getOperand(0); 1980 return true; 1981 } 1982 } else if (N.getOpcode() == ISD::SUB && 1983 isa<ConstantSDNode>(N.getOperand(0))) { 1984 uint64_t Imm = N.getConstantOperandVal(0); 1985 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 1986 // generate a NEG instead of a SUB of a constant. 1987 if (Imm != 0 && Imm % ShiftWidth == 0) { 1988 SDLoc DL(N); 1989 EVT VT = N.getValueType(); 1990 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT); 1991 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; 1992 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, 1993 N.getOperand(1)); 1994 ShAmt = SDValue(Neg, 0); 1995 return true; 1996 } 1997 } 1998 1999 ShAmt = N; 2000 return true; 2001 } 2002 2003 bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { 2004 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 2005 cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { 2006 Val = N.getOperand(0); 2007 return true; 2008 } 2009 MVT VT = N.getSimpleValueType(); 2010 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { 2011 Val = N; 2012 return true; 2013 } 2014 2015 return false; 2016 } 2017 2018 bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { 2019 if (N.getOpcode() == ISD::AND) { 2020 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2021 if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) { 2022 Val = N.getOperand(0); 2023 return true; 2024 } 2025 } 2026 MVT VT = N.getSimpleValueType(); 2027 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32); 2028 if (CurDAG->MaskedValueIsZero(N, Mask)) { 2029 Val = N; 2030 return true; 2031 } 2032 2033 return false; 2034 } 2035 2036 /// Look for various patterns that can be done with a SHL that can be folded 2037 /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which 2038 /// SHXADD we are trying to match. 2039 bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt, 2040 SDValue &Val) { 2041 bool LeftShift = N.getOpcode() == ISD::SHL; 2042 if ((LeftShift || N.getOpcode() == ISD::SRL) && 2043 isa<ConstantSDNode>(N.getOperand(1))) { 2044 unsigned C1 = N.getConstantOperandVal(1); 2045 SDValue N0 = N.getOperand(0); 2046 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && 2047 isa<ConstantSDNode>(N0.getOperand(1))) { 2048 uint64_t Mask = N0.getConstantOperandVal(1); 2049 if (isShiftedMask_64(Mask)) { 2050 unsigned XLen = Subtarget->getXLen(); 2051 unsigned Leading = XLen - (64 - countLeadingZeros(Mask)); 2052 unsigned Trailing = countTrailingZeros(Mask); 2053 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and 2054 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD. 2055 if (LeftShift && Leading == 32 && Trailing > 0 && 2056 (Trailing + C1) == ShAmt) { 2057 SDLoc DL(N); 2058 EVT VT = N.getValueType(); 2059 Val = SDValue(CurDAG->getMachineNode( 2060 RISCV::SRLIW, DL, VT, N0.getOperand(0), 2061 CurDAG->getTargetConstant(Trailing, DL, VT)), 2062 0); 2063 return true; 2064 } 2065 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and 2066 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD. 2067 if (!LeftShift && Leading == 32 && Trailing > C1 && 2068 (Trailing - C1) == ShAmt) { 2069 SDLoc DL(N); 2070 EVT VT = N.getValueType(); 2071 Val = SDValue(CurDAG->getMachineNode( 2072 RISCV::SRLIW, DL, VT, N0.getOperand(0), 2073 CurDAG->getTargetConstant(Trailing, DL, VT)), 2074 0); 2075 return true; 2076 } 2077 } 2078 } 2079 } 2080 2081 return false; 2082 } 2083 2084 // Return true if all users of this SDNode* only consume the lower \p Bits. 2085 // This can be used to form W instructions for add/sub/mul/shl even when the 2086 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 2087 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 2088 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 2089 // the add/sub/mul/shl to become non-W instructions. By checking the users we 2090 // may be able to use a W instruction and CSE with the other instruction if 2091 // this has happened. We could try to detect that the CSE opportunity exists 2092 // before doing this, but that would be more complicated. 2093 // TODO: Does this need to look through AND/OR/XOR to their users to find more 2094 // opportunities. 2095 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { 2096 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 2097 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 2098 Node->getOpcode() == ISD::SRL || 2099 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 2100 Node->getOpcode() == RISCVISD::GREV || 2101 Node->getOpcode() == RISCVISD::GORC || 2102 isa<ConstantSDNode>(Node)) && 2103 "Unexpected opcode"); 2104 2105 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 2106 SDNode *User = *UI; 2107 // Users of this node should have already been instruction selected 2108 if (!User->isMachineOpcode()) 2109 return false; 2110 2111 // TODO: Add more opcodes? 2112 switch (User->getMachineOpcode()) { 2113 default: 2114 return false; 2115 case RISCV::ADDW: 2116 case RISCV::ADDIW: 2117 case RISCV::SUBW: 2118 case RISCV::MULW: 2119 case RISCV::SLLW: 2120 case RISCV::SLLIW: 2121 case RISCV::SRAW: 2122 case RISCV::SRAIW: 2123 case RISCV::SRLW: 2124 case RISCV::SRLIW: 2125 case RISCV::DIVW: 2126 case RISCV::DIVUW: 2127 case RISCV::REMW: 2128 case RISCV::REMUW: 2129 case RISCV::ROLW: 2130 case RISCV::RORW: 2131 case RISCV::RORIW: 2132 case RISCV::CLZW: 2133 case RISCV::CTZW: 2134 case RISCV::CPOPW: 2135 case RISCV::SLLI_UW: 2136 case RISCV::FMV_W_X: 2137 case RISCV::FCVT_H_W: 2138 case RISCV::FCVT_H_WU: 2139 case RISCV::FCVT_S_W: 2140 case RISCV::FCVT_S_WU: 2141 case RISCV::FCVT_D_W: 2142 case RISCV::FCVT_D_WU: 2143 if (Bits < 32) 2144 return false; 2145 break; 2146 case RISCV::SLLI: 2147 // SLLI only uses the lower (XLen - ShAmt) bits. 2148 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1)) 2149 return false; 2150 break; 2151 case RISCV::ANDI: 2152 if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1)))) 2153 return false; 2154 break; 2155 case RISCV::SEXT_B: 2156 if (Bits < 8) 2157 return false; 2158 break; 2159 case RISCV::SEXT_H: 2160 case RISCV::FMV_H_X: 2161 case RISCV::ZEXT_H_RV32: 2162 case RISCV::ZEXT_H_RV64: 2163 if (Bits < 16) 2164 return false; 2165 break; 2166 case RISCV::ADD_UW: 2167 case RISCV::SH1ADD_UW: 2168 case RISCV::SH2ADD_UW: 2169 case RISCV::SH3ADD_UW: 2170 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 2171 // 32 bits. 2172 if (UI.getOperandNo() != 0 || Bits < 32) 2173 return false; 2174 break; 2175 case RISCV::SB: 2176 if (UI.getOperandNo() != 0 || Bits < 8) 2177 return false; 2178 break; 2179 case RISCV::SH: 2180 if (UI.getOperandNo() != 0 || Bits < 16) 2181 return false; 2182 break; 2183 case RISCV::SW: 2184 if (UI.getOperandNo() != 0 || Bits < 32) 2185 return false; 2186 break; 2187 } 2188 } 2189 2190 return true; 2191 } 2192 2193 // Select VL as a 5 bit immediate or a value that will become a register. This 2194 // allows us to choose betwen VSETIVLI or VSETVLI later. 2195 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 2196 auto *C = dyn_cast<ConstantSDNode>(N); 2197 if (C && isUInt<5>(C->getZExtValue())) { 2198 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 2199 N->getValueType(0)); 2200 } else if (C && C->isAllOnesValue()) { 2201 // Treat all ones as VLMax. 2202 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 2203 N->getValueType(0)); 2204 } else if (isa<RegisterSDNode>(N) && 2205 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) { 2206 // All our VL operands use an operand that allows GPRNoX0 or an immediate 2207 // as the register class. Convert X0 to a special immediate to pass the 2208 // MachineVerifier. This is recognized specially by the vsetvli insertion 2209 // pass. 2210 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 2211 N->getValueType(0)); 2212 } else { 2213 VL = N; 2214 } 2215 2216 return true; 2217 } 2218 2219 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 2220 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef()) 2221 return false; 2222 SplatVal = N.getOperand(1); 2223 return true; 2224 } 2225 2226 using ValidateFn = bool (*)(int64_t); 2227 2228 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, 2229 SelectionDAG &DAG, 2230 const RISCVSubtarget &Subtarget, 2231 ValidateFn ValidateImm) { 2232 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 2233 !isa<ConstantSDNode>(N.getOperand(1))) 2234 return false; 2235 2236 int64_t SplatImm = 2237 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 2238 2239 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand 2240 // type is wider than the resulting vector element type: an implicit 2241 // truncation first takes place. Therefore, perform a manual 2242 // truncation/sign-extension in order to ignore any truncated bits and catch 2243 // any zero-extended immediate. 2244 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 2245 // sign-extending to (XLenVT -1). 2246 MVT XLenVT = Subtarget.getXLenVT(); 2247 assert(XLenVT == N.getOperand(1).getSimpleValueType() && 2248 "Unexpected splat operand type"); 2249 MVT EltVT = N.getSimpleValueType().getVectorElementType(); 2250 if (EltVT.bitsLT(XLenVT)) 2251 SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits()); 2252 2253 if (!ValidateImm(SplatImm)) 2254 return false; 2255 2256 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT); 2257 return true; 2258 } 2259 2260 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 2261 return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget, 2262 [](int64_t Imm) { return isInt<5>(Imm); }); 2263 } 2264 2265 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 2266 return selectVSplatSimmHelper( 2267 N, SplatVal, *CurDAG, *Subtarget, 2268 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); 2269 } 2270 2271 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 2272 SDValue &SplatVal) { 2273 return selectVSplatSimmHelper( 2274 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { 2275 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 2276 }); 2277 } 2278 2279 bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { 2280 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 2281 !isa<ConstantSDNode>(N.getOperand(1))) 2282 return false; 2283 2284 int64_t SplatImm = 2285 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 2286 2287 if (!isUInt<5>(SplatImm)) 2288 return false; 2289 2290 SplatVal = 2291 CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT()); 2292 2293 return true; 2294 } 2295 2296 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 2297 SDValue &Imm) { 2298 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 2299 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 2300 2301 if (!isInt<5>(ImmVal)) 2302 return false; 2303 2304 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); 2305 return true; 2306 } 2307 2308 return false; 2309 } 2310 2311 // Merge an ADDI into the offset of a load/store instruction where possible. 2312 // (load (addi base, off1), off2) -> (load base, off1+off2) 2313 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) 2314 // (load (add base, (addi src, off1)), off2) 2315 // -> (load (add base, src), off1+off2) 2316 // (store val, (add base, (addi src, off1)), off2) 2317 // -> (store val, (add base, src), off1+off2) 2318 // This is possible when off1+off2 fits a 12-bit immediate. 2319 bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { 2320 unsigned OffsetOpIdx, BaseOpIdx; 2321 if (!hasMemOffset(N, BaseOpIdx, OffsetOpIdx)) 2322 return false; 2323 2324 if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx))) 2325 return false; 2326 2327 SDValue Base = N->getOperand(BaseOpIdx); 2328 2329 if (!Base.isMachineOpcode()) 2330 return false; 2331 2332 if (Base.getMachineOpcode() == RISCV::ADDI) { 2333 // If the base is an ADDI, we can merge it in to the load/store. 2334 } else if (Base.getMachineOpcode() == RISCV::ADDIW && 2335 isa<ConstantSDNode>(Base.getOperand(1)) && 2336 Base.getOperand(0).isMachineOpcode() && 2337 Base.getOperand(0).getMachineOpcode() == RISCV::LUI && 2338 isa<ConstantSDNode>(Base.getOperand(0).getOperand(0))) { 2339 // ADDIW can be merged if it's part of LUI+ADDIW constant materialization 2340 // and LUI+ADDI would have produced the same result. This is true for all 2341 // simm32 values except 0x7ffff800-0x7fffffff. 2342 int64_t Offset = 2343 SignExtend64<32>(Base.getOperand(0).getConstantOperandVal(0) << 12); 2344 Offset += cast<ConstantSDNode>(Base.getOperand(1))->getSExtValue(); 2345 if (!isInt<32>(Offset)) 2346 return false; 2347 } else 2348 return false; 2349 2350 SDValue ImmOperand = Base.getOperand(1); 2351 uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); 2352 2353 if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) { 2354 int64_t Offset1 = Const->getSExtValue(); 2355 int64_t CombinedOffset = Offset1 + Offset2; 2356 if (!isInt<12>(CombinedOffset)) 2357 return false; 2358 ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand), 2359 ImmOperand.getValueType()); 2360 } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) { 2361 // If the off1 in (addi base, off1) is a global variable's address (its 2362 // low part, really), then we can rely on the alignment of that variable 2363 // to provide a margin of safety before off1 can overflow the 12 bits. 2364 // Check if off2 falls within that margin; if so off1+off2 can't overflow. 2365 const DataLayout &DL = CurDAG->getDataLayout(); 2366 Align Alignment = commonAlignment(GA->getGlobal()->getPointerAlignment(DL), 2367 GA->getOffset()); 2368 if (Offset2 != 0 && Alignment <= Offset2) 2369 return false; 2370 int64_t Offset1 = GA->getOffset(); 2371 int64_t CombinedOffset = Offset1 + Offset2; 2372 ImmOperand = CurDAG->getTargetGlobalAddress( 2373 GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), 2374 CombinedOffset, GA->getTargetFlags()); 2375 } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) { 2376 // Ditto. 2377 Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset()); 2378 if (Offset2 != 0 && Alignment <= Offset2) 2379 return false; 2380 int64_t Offset1 = CP->getOffset(); 2381 int64_t CombinedOffset = Offset1 + Offset2; 2382 ImmOperand = CurDAG->getTargetConstantPool( 2383 CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(), 2384 CombinedOffset, CP->getTargetFlags()); 2385 } else { 2386 return false; 2387 } 2388 2389 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 2390 LLVM_DEBUG(Base->dump(CurDAG)); 2391 LLVM_DEBUG(dbgs() << "\nN: "); 2392 LLVM_DEBUG(N->dump(CurDAG)); 2393 LLVM_DEBUG(dbgs() << "\n"); 2394 2395 // Modify the offset operand of the load/store. 2396 if (BaseOpIdx == 0) { // Load 2397 N = CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, 2398 N->getOperand(2)); 2399 } else { // Store 2400 N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), 2401 ImmOperand, N->getOperand(3)); 2402 } 2403 2404 return true; 2405 } 2406 2407 // Try to remove sext.w if the input is a W instruction or can be made into 2408 // a W instruction cheaply. 2409 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 2410 // Look for the sext.w pattern, addiw rd, rs1, 0. 2411 if (N->getMachineOpcode() != RISCV::ADDIW || 2412 !isNullConstant(N->getOperand(1))) 2413 return false; 2414 2415 SDValue N0 = N->getOperand(0); 2416 if (!N0.isMachineOpcode()) 2417 return false; 2418 2419 switch (N0.getMachineOpcode()) { 2420 default: 2421 break; 2422 case RISCV::ADD: 2423 case RISCV::ADDI: 2424 case RISCV::SUB: 2425 case RISCV::MUL: 2426 case RISCV::SLLI: { 2427 // Convert sext.w+add/sub/mul to their W instructions. This will create 2428 // a new independent instruction. This improves latency. 2429 unsigned Opc; 2430 switch (N0.getMachineOpcode()) { 2431 default: 2432 llvm_unreachable("Unexpected opcode!"); 2433 case RISCV::ADD: Opc = RISCV::ADDW; break; 2434 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 2435 case RISCV::SUB: Opc = RISCV::SUBW; break; 2436 case RISCV::MUL: Opc = RISCV::MULW; break; 2437 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 2438 } 2439 2440 SDValue N00 = N0.getOperand(0); 2441 SDValue N01 = N0.getOperand(1); 2442 2443 // Shift amount needs to be uimm5. 2444 if (N0.getMachineOpcode() == RISCV::SLLI && 2445 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 2446 break; 2447 2448 SDNode *Result = 2449 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 2450 N00, N01); 2451 ReplaceUses(N, Result); 2452 return true; 2453 } 2454 case RISCV::ADDW: 2455 case RISCV::ADDIW: 2456 case RISCV::SUBW: 2457 case RISCV::MULW: 2458 case RISCV::SLLIW: 2459 case RISCV::GREVIW: 2460 case RISCV::GORCIW: 2461 // Result is already sign extended just remove the sext.w. 2462 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 2463 ReplaceUses(N, N0.getNode()); 2464 return true; 2465 } 2466 2467 return false; 2468 } 2469 2470 // Optimize masked RVV pseudo instructions with a known all-ones mask to their 2471 // corresponding "unmasked" pseudo versions. The mask we're interested in will 2472 // take the form of a V0 physical register operand, with a glued 2473 // register-setting instruction. 2474 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { 2475 const RISCV::RISCVMaskedPseudoInfo *I = 2476 RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); 2477 if (!I) 2478 return false; 2479 2480 unsigned MaskOpIdx = I->MaskOpIdx; 2481 2482 // Check that we're using V0 as a mask register. 2483 if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) || 2484 cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0) 2485 return false; 2486 2487 // The glued user defines V0. 2488 const auto *Glued = N->getGluedNode(); 2489 2490 if (!Glued || Glued->getOpcode() != ISD::CopyToReg) 2491 return false; 2492 2493 // Check that we're defining V0 as a mask register. 2494 if (!isa<RegisterSDNode>(Glued->getOperand(1)) || 2495 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0) 2496 return false; 2497 2498 // Check the instruction defining V0; it needs to be a VMSET pseudo. 2499 SDValue MaskSetter = Glued->getOperand(2); 2500 2501 const auto IsVMSet = [](unsigned Opc) { 2502 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || 2503 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || 2504 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || 2505 Opc == RISCV::PseudoVMSET_M_B8; 2506 }; 2507 2508 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has 2509 // undefined behaviour if it's the wrong bitwidth, so we could choose to 2510 // assume that it's all-ones? Same applies to its VL. 2511 if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode())) 2512 return false; 2513 2514 // Retrieve the tail policy operand index, if any. 2515 Optional<unsigned> TailPolicyOpIdx; 2516 const RISCVInstrInfo &TII = *Subtarget->getInstrInfo(); 2517 const MCInstrDesc &MaskedMCID = TII.get(N->getMachineOpcode()); 2518 2519 bool IsTA = true; 2520 if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) { 2521 // The last operand of the pseudo is the policy op, but we might have a 2522 // Glue operand last. We might also have a chain. 2523 TailPolicyOpIdx = N->getNumOperands() - 1; 2524 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Glue) 2525 (*TailPolicyOpIdx)--; 2526 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Other) 2527 (*TailPolicyOpIdx)--; 2528 2529 if (!(N->getConstantOperandVal(*TailPolicyOpIdx) & 2530 RISCVII::TAIL_AGNOSTIC)) { 2531 // Keep the true-masked instruction when there is no unmasked TU 2532 // instruction 2533 if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef()) 2534 return false; 2535 // We can't use TA if the tie-operand is not IMPLICIT_DEF 2536 if (!N->getOperand(0).isUndef()) 2537 IsTA = false; 2538 } 2539 } 2540 2541 unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo; 2542 2543 // Check that we're dropping the mask operand and any policy operand 2544 // when we transform to this unmasked pseudo. Additionally, if this insturtion 2545 // is tail agnostic, the unmasked instruction should not have a merge op. 2546 uint64_t TSFlags = TII.get(Opc).TSFlags; 2547 assert((IsTA != RISCVII::hasMergeOp(TSFlags)) && 2548 RISCVII::hasDummyMaskOp(TSFlags) && 2549 !RISCVII::hasVecPolicyOp(TSFlags) && 2550 "Unexpected pseudo to transform to"); 2551 (void)TSFlags; 2552 2553 SmallVector<SDValue, 8> Ops; 2554 // Skip the merge operand at index 0 if IsTA 2555 for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) { 2556 // Skip the mask, the policy, and the Glue. 2557 SDValue Op = N->getOperand(I); 2558 if (I == MaskOpIdx || I == TailPolicyOpIdx || 2559 Op.getValueType() == MVT::Glue) 2560 continue; 2561 Ops.push_back(Op); 2562 } 2563 2564 // Transitively apply any node glued to our new node. 2565 if (auto *TGlued = Glued->getGluedNode()) 2566 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); 2567 2568 SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 2569 ReplaceUses(N, Result); 2570 2571 return true; 2572 } 2573 2574 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 2575 // for instruction scheduling. 2576 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, 2577 CodeGenOpt::Level OptLevel) { 2578 return new RISCVDAGToDAGISel(TM, OptLevel); 2579 } 2580