1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISCV target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVMCTargetDesc.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCVISelLowering.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/IR/IntrinsicsRISCV.h" 20 #include "llvm/Support/Alignment.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/KnownBits.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "riscv-isel" 29 30 namespace llvm { 31 namespace RISCV { 32 #define GET_RISCVVSSEGTable_IMPL 33 #define GET_RISCVVLSEGTable_IMPL 34 #define GET_RISCVVLXSEGTable_IMPL 35 #define GET_RISCVVSXSEGTable_IMPL 36 #define GET_RISCVVLETable_IMPL 37 #define GET_RISCVVSETable_IMPL 38 #define GET_RISCVVLXTable_IMPL 39 #define GET_RISCVVSXTable_IMPL 40 #define GET_RISCVMaskedPseudosTable_IMPL 41 #include "RISCVGenSearchableTables.inc" 42 } // namespace RISCV 43 } // namespace llvm 44 45 void RISCVDAGToDAGISel::PreprocessISelDAG() { 46 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 47 E = CurDAG->allnodes_end(); 48 I != E;) { 49 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 50 51 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point 52 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. 53 if (N->getOpcode() == ISD::SPLAT_VECTOR) { 54 MVT VT = N->getSimpleValueType(0); 55 unsigned Opc = 56 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; 57 SDLoc DL(N); 58 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); 59 SDValue Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), 60 N->getOperand(0), VL); 61 62 --I; 63 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 64 ++I; 65 CurDAG->DeleteNode(N); 66 continue; 67 } 68 69 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 70 // load. Done after lowering and combining so that we have a chance to 71 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 72 if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) 73 continue; 74 75 assert(N->getNumOperands() == 4 && "Unexpected number of operands"); 76 MVT VT = N->getSimpleValueType(0); 77 SDValue Passthru = N->getOperand(0); 78 SDValue Lo = N->getOperand(1); 79 SDValue Hi = N->getOperand(2); 80 SDValue VL = N->getOperand(3); 81 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 82 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 83 "Unexpected VTs!"); 84 MachineFunction &MF = CurDAG->getMachineFunction(); 85 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 86 SDLoc DL(N); 87 88 // We use the same frame index we use for moving two i32s into 64-bit FPR. 89 // This is an analogous operation. 90 int FI = FuncInfo->getMoveF64FrameIndex(MF); 91 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 92 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); 93 SDValue StackSlot = 94 CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); 95 96 SDValue Chain = CurDAG->getEntryNode(); 97 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 98 99 SDValue OffsetSlot = 100 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); 101 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 102 Align(8)); 103 104 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 105 106 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 107 SDValue IntID = 108 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 109 SDValue Ops[] = {Chain, 110 IntID, 111 Passthru, 112 StackSlot, 113 CurDAG->getRegister(RISCV::X0, MVT::i64), 114 VL}; 115 116 SDValue Result = CurDAG->getMemIntrinsicNode( 117 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8), 118 MachineMemOperand::MOLoad); 119 120 // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the 121 // vlse we created. This will cause general havok on the dag because 122 // anything below the conversion could be folded into other existing nodes. 123 // To avoid invalidating 'I', back it up to the convert node. 124 --I; 125 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 126 127 // Now that we did that, the node is dead. Increment the iterator to the 128 // next node to process, then delete N. 129 ++I; 130 CurDAG->DeleteNode(N); 131 } 132 } 133 134 void RISCVDAGToDAGISel::PostprocessISelDAG() { 135 HandleSDNode Dummy(CurDAG->getRoot()); 136 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 137 138 bool MadeChange = false; 139 while (Position != CurDAG->allnodes_begin()) { 140 SDNode *N = &*--Position; 141 // Skip dead nodes and any non-machine opcodes. 142 if (N->use_empty() || !N->isMachineOpcode()) 143 continue; 144 145 MadeChange |= doPeepholeSExtW(N); 146 MadeChange |= doPeepholeLoadStoreADDI(N); 147 MadeChange |= doPeepholeMaskedRVV(N); 148 } 149 150 CurDAG->setRoot(Dummy.getValue()); 151 152 if (MadeChange) 153 CurDAG->RemoveDeadNodes(); 154 } 155 156 // Returns true if N is a MachineSDNode that has a reg and simm12 memory 157 // operand. The indices of the base pointer and offset are returned in BaseOpIdx 158 // and OffsetOpIdx. 159 static bool hasMemOffset(SDNode *N, unsigned &BaseOpIdx, 160 unsigned &OffsetOpIdx) { 161 switch (N->getMachineOpcode()) { 162 case RISCV::LB: 163 case RISCV::LH: 164 case RISCV::LW: 165 case RISCV::LBU: 166 case RISCV::LHU: 167 case RISCV::LWU: 168 case RISCV::LD: 169 case RISCV::FLH: 170 case RISCV::FLW: 171 case RISCV::FLD: 172 BaseOpIdx = 0; 173 OffsetOpIdx = 1; 174 return true; 175 case RISCV::SB: 176 case RISCV::SH: 177 case RISCV::SW: 178 case RISCV::SD: 179 case RISCV::FSH: 180 case RISCV::FSW: 181 case RISCV::FSD: 182 BaseOpIdx = 1; 183 OffsetOpIdx = 2; 184 return true; 185 } 186 187 return false; 188 } 189 190 static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL, 191 const MVT VT, int64_t Imm, 192 const RISCVSubtarget &Subtarget) { 193 assert(VT == MVT::i64 && "Expecting MVT::i64"); 194 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering(); 195 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(CurDAG->getConstantPool( 196 ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT)); 197 SDValue Addr = TLI->getAddr(CP, *CurDAG); 198 SDValue Offset = CurDAG->getTargetConstant(0, DL, VT); 199 // Since there is no data race, the chain can be the entry node. 200 SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset, 201 CurDAG->getEntryNode()); 202 MachineFunction &MF = CurDAG->getMachineFunction(); 203 MachineMemOperand *MemOp = MF.getMachineMemOperand( 204 MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, 205 LLT(VT), CP->getAlign()); 206 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Load), {MemOp}); 207 return Load; 208 } 209 210 static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 211 int64_t Imm, const RISCVSubtarget &Subtarget) { 212 MVT XLenVT = Subtarget.getXLenVT(); 213 RISCVMatInt::InstSeq Seq = 214 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); 215 216 // If Imm is expensive to build, then we put it into constant pool. 217 if (Subtarget.useConstantPoolForLargeInts() && 218 Seq.size() > Subtarget.getMaxBuildIntsCost()) 219 return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget); 220 221 SDNode *Result = nullptr; 222 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); 223 for (RISCVMatInt::Inst &Inst : Seq) { 224 SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT); 225 switch (Inst.getOpndKind()) { 226 case RISCVMatInt::Imm: 227 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SDImm); 228 break; 229 case RISCVMatInt::RegX0: 230 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, 231 CurDAG->getRegister(RISCV::X0, XLenVT)); 232 break; 233 case RISCVMatInt::RegReg: 234 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg); 235 break; 236 case RISCVMatInt::RegImm: 237 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm); 238 break; 239 } 240 241 // Only the first instruction has X0 as its source. 242 SrcReg = SDValue(Result, 0); 243 } 244 245 return Result; 246 } 247 248 static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 249 unsigned RegClassID, unsigned SubReg0) { 250 assert(Regs.size() >= 2 && Regs.size() <= 8); 251 252 SDLoc DL(Regs[0]); 253 SmallVector<SDValue, 8> Ops; 254 255 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); 256 257 for (unsigned I = 0; I < Regs.size(); ++I) { 258 Ops.push_back(Regs[I]); 259 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); 260 } 261 SDNode *N = 262 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 263 return SDValue(N, 0); 264 } 265 266 static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 267 unsigned NF) { 268 static const unsigned RegClassIDs[] = { 269 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, 270 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, 271 RISCV::VRN8M1RegClassID}; 272 273 return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0); 274 } 275 276 static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 277 unsigned NF) { 278 static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID, 279 RISCV::VRN3M2RegClassID, 280 RISCV::VRN4M2RegClassID}; 281 282 return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0); 283 } 284 285 static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 286 unsigned NF) { 287 return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID, 288 RISCV::sub_vrm4_0); 289 } 290 291 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 292 unsigned NF, RISCVII::VLMUL LMUL) { 293 switch (LMUL) { 294 default: 295 llvm_unreachable("Invalid LMUL."); 296 case RISCVII::VLMUL::LMUL_F8: 297 case RISCVII::VLMUL::LMUL_F4: 298 case RISCVII::VLMUL::LMUL_F2: 299 case RISCVII::VLMUL::LMUL_1: 300 return createM1Tuple(CurDAG, Regs, NF); 301 case RISCVII::VLMUL::LMUL_2: 302 return createM2Tuple(CurDAG, Regs, NF); 303 case RISCVII::VLMUL::LMUL_4: 304 return createM4Tuple(CurDAG, Regs, NF); 305 } 306 } 307 308 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 309 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 310 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 311 bool IsLoad, MVT *IndexVT) { 312 SDValue Chain = Node->getOperand(0); 313 SDValue Glue; 314 315 SDValue Base; 316 SelectBaseAddr(Node->getOperand(CurOp++), Base); 317 Operands.push_back(Base); // Base pointer. 318 319 if (IsStridedOrIndexed) { 320 Operands.push_back(Node->getOperand(CurOp++)); // Index. 321 if (IndexVT) 322 *IndexVT = Operands.back()->getSimpleValueType(0); 323 } 324 325 if (IsMasked) { 326 // Mask needs to be copied to V0. 327 SDValue Mask = Node->getOperand(CurOp++); 328 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); 329 Glue = Chain.getValue(1); 330 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); 331 } 332 SDValue VL; 333 selectVLOp(Node->getOperand(CurOp++), VL); 334 Operands.push_back(VL); 335 336 MVT XLenVT = Subtarget->getXLenVT(); 337 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 338 Operands.push_back(SEWOp); 339 340 // Masked load has the tail policy argument. 341 if (IsMasked && IsLoad) { 342 // Policy must be a constant. 343 uint64_t Policy = Node->getConstantOperandVal(CurOp++); 344 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 345 Operands.push_back(PolicyOp); 346 } 347 348 Operands.push_back(Chain); // Chain. 349 if (Glue) 350 Operands.push_back(Glue); 351 } 352 353 static bool isAllUndef(ArrayRef<SDValue> Values) { 354 return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); }); 355 } 356 357 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, 358 bool IsStrided) { 359 SDLoc DL(Node); 360 unsigned NF = Node->getNumValues() - 1; 361 MVT VT = Node->getSimpleValueType(0); 362 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 363 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 364 365 unsigned CurOp = 2; 366 SmallVector<SDValue, 8> Operands; 367 368 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 369 Node->op_begin() + CurOp + NF); 370 bool IsTU = IsMasked || !isAllUndef(Regs); 371 if (IsTU) { 372 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL); 373 Operands.push_back(Merge); 374 } 375 CurOp += NF; 376 377 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 378 Operands, /*IsLoad=*/true); 379 380 const RISCV::VLSEGPseudo *P = 381 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 382 static_cast<unsigned>(LMUL)); 383 MachineSDNode *Load = 384 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 385 386 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 387 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 388 389 SDValue SuperReg = SDValue(Load, 0); 390 for (unsigned I = 0; I < NF; ++I) { 391 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 392 ReplaceUses(SDValue(Node, I), 393 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 394 } 395 396 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 397 CurDAG->RemoveDeadNode(Node); 398 } 399 400 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { 401 SDLoc DL(Node); 402 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. 403 MVT VT = Node->getSimpleValueType(0); 404 MVT XLenVT = Subtarget->getXLenVT(); 405 unsigned SEW = VT.getScalarSizeInBits(); 406 unsigned Log2SEW = Log2_32(SEW); 407 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 408 409 unsigned CurOp = 2; 410 SmallVector<SDValue, 7> Operands; 411 412 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 413 Node->op_begin() + CurOp + NF); 414 bool IsTU = IsMasked || !isAllUndef(Regs); 415 if (IsTU) { 416 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 417 Operands.push_back(MaskedOff); 418 } 419 CurOp += NF; 420 421 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 422 /*IsStridedOrIndexed*/ false, Operands, 423 /*IsLoad=*/true); 424 425 const RISCV::VLSEGPseudo *P = 426 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 427 Log2SEW, static_cast<unsigned>(LMUL)); 428 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 429 MVT::Other, MVT::Glue, Operands); 430 bool TailAgnostic = true; 431 bool MaskAgnostic = false; 432 if (IsMasked) { 433 uint64_t Policy = Node->getConstantOperandVal(Node->getNumOperands() - 1); 434 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; 435 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; 436 } 437 unsigned VType = 438 RISCVVType::encodeVTYPE(LMUL, SEW, TailAgnostic, MaskAgnostic); 439 SDValue VTypeOp = CurDAG->getTargetConstant(VType, DL, XLenVT); 440 SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, 441 VTypeOp, /*Glue*/ SDValue(Load, 2)); 442 443 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 444 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 445 446 SDValue SuperReg = SDValue(Load, 0); 447 for (unsigned I = 0; I < NF; ++I) { 448 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 449 ReplaceUses(SDValue(Node, I), 450 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 451 } 452 453 ReplaceUses(SDValue(Node, NF), SDValue(ReadVL, 0)); // VL 454 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 1)); // Chain 455 CurDAG->RemoveDeadNode(Node); 456 } 457 458 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, 459 bool IsOrdered) { 460 SDLoc DL(Node); 461 unsigned NF = Node->getNumValues() - 1; 462 MVT VT = Node->getSimpleValueType(0); 463 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 464 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 465 466 unsigned CurOp = 2; 467 SmallVector<SDValue, 8> Operands; 468 469 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 470 Node->op_begin() + CurOp + NF); 471 bool IsTU = IsMasked || !isAllUndef(Regs); 472 if (IsTU) { 473 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 474 Operands.push_back(MaskedOff); 475 } 476 CurOp += NF; 477 478 MVT IndexVT; 479 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 480 /*IsStridedOrIndexed*/ true, Operands, 481 /*IsLoad=*/true, &IndexVT); 482 483 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 484 "Element count mismatch"); 485 486 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 487 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 488 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 489 report_fatal_error("The V extension does not support EEW=64 for index " 490 "values when XLEN=32"); 491 } 492 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 493 NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 494 static_cast<unsigned>(IndexLMUL)); 495 MachineSDNode *Load = 496 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 497 498 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 499 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 500 501 SDValue SuperReg = SDValue(Load, 0); 502 for (unsigned I = 0; I < NF; ++I) { 503 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 504 ReplaceUses(SDValue(Node, I), 505 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 506 } 507 508 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 509 CurDAG->RemoveDeadNode(Node); 510 } 511 512 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, 513 bool IsStrided) { 514 SDLoc DL(Node); 515 unsigned NF = Node->getNumOperands() - 4; 516 if (IsStrided) 517 NF--; 518 if (IsMasked) 519 NF--; 520 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 521 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 522 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 523 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 524 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 525 526 SmallVector<SDValue, 8> Operands; 527 Operands.push_back(StoreVal); 528 unsigned CurOp = 2 + NF; 529 530 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 531 Operands); 532 533 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 534 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 535 MachineSDNode *Store = 536 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 537 538 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 539 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 540 541 ReplaceNode(Node, Store); 542 } 543 544 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, 545 bool IsOrdered) { 546 SDLoc DL(Node); 547 unsigned NF = Node->getNumOperands() - 5; 548 if (IsMasked) 549 --NF; 550 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 551 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 552 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 553 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 554 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 555 556 SmallVector<SDValue, 8> Operands; 557 Operands.push_back(StoreVal); 558 unsigned CurOp = 2 + NF; 559 560 MVT IndexVT; 561 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 562 /*IsStridedOrIndexed*/ true, Operands, 563 /*IsLoad=*/false, &IndexVT); 564 565 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 566 "Element count mismatch"); 567 568 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 569 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 570 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 571 report_fatal_error("The V extension does not support EEW=64 for index " 572 "values when XLEN=32"); 573 } 574 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 575 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 576 static_cast<unsigned>(IndexLMUL)); 577 MachineSDNode *Store = 578 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 579 580 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 581 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 582 583 ReplaceNode(Node, Store); 584 } 585 586 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { 587 if (!Subtarget->hasVInstructions()) 588 return; 589 590 assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN || 591 Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) && 592 "Unexpected opcode"); 593 594 SDLoc DL(Node); 595 MVT XLenVT = Subtarget->getXLenVT(); 596 597 bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN; 598 unsigned IntNoOffset = HasChain ? 1 : 0; 599 unsigned IntNo = Node->getConstantOperandVal(IntNoOffset); 600 601 assert((IntNo == Intrinsic::riscv_vsetvli || 602 IntNo == Intrinsic::riscv_vsetvlimax || 603 IntNo == Intrinsic::riscv_vsetvli_opt || 604 IntNo == Intrinsic::riscv_vsetvlimax_opt) && 605 "Unexpected vsetvli intrinsic"); 606 607 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax || 608 IntNo == Intrinsic::riscv_vsetvlimax_opt; 609 unsigned Offset = IntNoOffset + (VLMax ? 1 : 2); 610 611 assert(Node->getNumOperands() == Offset + 2 && 612 "Unexpected number of operands"); 613 614 unsigned SEW = 615 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 616 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( 617 Node->getConstantOperandVal(Offset + 1) & 0x7); 618 619 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true, 620 /*MaskAgnostic*/ false); 621 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 622 623 SmallVector<EVT, 2> VTs = {XLenVT}; 624 if (HasChain) 625 VTs.push_back(MVT::Other); 626 627 SDValue VLOperand; 628 unsigned Opcode = RISCV::PseudoVSETVLI; 629 if (VLMax) { 630 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 631 Opcode = RISCV::PseudoVSETVLIX0; 632 } else { 633 VLOperand = Node->getOperand(IntNoOffset + 1); 634 635 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 636 uint64_t AVL = C->getZExtValue(); 637 if (isUInt<5>(AVL)) { 638 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 639 SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp}; 640 if (HasChain) 641 Ops.push_back(Node->getOperand(0)); 642 ReplaceNode( 643 Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops)); 644 return; 645 } 646 } 647 } 648 649 SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp}; 650 if (HasChain) 651 Ops.push_back(Node->getOperand(0)); 652 653 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops)); 654 } 655 656 void RISCVDAGToDAGISel::Select(SDNode *Node) { 657 // If we have a custom node, we have already selected. 658 if (Node->isMachineOpcode()) { 659 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 660 Node->setNodeId(-1); 661 return; 662 } 663 664 // Instruction Selection not handled by the auto-generated tablegen selection 665 // should be handled here. 666 unsigned Opcode = Node->getOpcode(); 667 MVT XLenVT = Subtarget->getXLenVT(); 668 SDLoc DL(Node); 669 MVT VT = Node->getSimpleValueType(0); 670 671 switch (Opcode) { 672 case ISD::Constant: { 673 auto *ConstNode = cast<ConstantSDNode>(Node); 674 if (VT == XLenVT && ConstNode->isZero()) { 675 SDValue New = 676 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT); 677 ReplaceNode(Node, New.getNode()); 678 return; 679 } 680 int64_t Imm = ConstNode->getSExtValue(); 681 // If the upper XLen-16 bits are not used, try to convert this to a simm12 682 // by sign extending bit 15. 683 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) && 684 hasAllHUsers(Node)) 685 Imm = SignExtend64<16>(Imm); 686 // If the upper 32-bits are not used try to convert this into a simm32 by 687 // sign extending bit 32. 688 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 689 Imm = SignExtend64<32>(Imm); 690 691 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); 692 return; 693 } 694 case ISD::ADD: { 695 // Try to select ADD + immediate used as memory addresses to 696 // (ADDI (ADD X, Imm-Lo12), Lo12) if it will allow the ADDI to be removed by 697 // doPeepholeLoadStoreADDI. 698 699 // LHS should be an immediate. 700 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 701 if (!N1C) 702 break; 703 704 int64_t Offset = N1C->getSExtValue(); 705 int64_t Lo12 = SignExtend64<12>(Offset); 706 707 // Don't do this if the lower 12 bits are 0 or we could use ADDI directly. 708 if (Lo12 == 0 || isInt<12>(Offset)) 709 break; 710 711 // Don't do this if we can use a pair of ADDIs. 712 if (isInt<12>(Offset / 2) && isInt<12>(Offset - Offset / 2)) 713 break; 714 715 bool AllPointerUses = true; 716 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 717 SDNode *User = *UI; 718 719 // Is this user a memory instruction that uses a register and immediate 720 // that has this ADD as its pointer. 721 unsigned BaseOpIdx, OffsetOpIdx; 722 if (!User->isMachineOpcode() || 723 !hasMemOffset(User, BaseOpIdx, OffsetOpIdx) || 724 UI.getOperandNo() != BaseOpIdx) { 725 AllPointerUses = false; 726 break; 727 } 728 729 // If the memory instruction already has an offset, make sure the combined 730 // offset is foldable. 731 int64_t MemOffs = 732 cast<ConstantSDNode>(User->getOperand(OffsetOpIdx))->getSExtValue(); 733 MemOffs += Lo12; 734 if (!isInt<12>(MemOffs)) { 735 AllPointerUses = false; 736 break; 737 } 738 } 739 740 if (!AllPointerUses) 741 break; 742 743 Offset -= Lo12; 744 // Restore sign bits for RV32. 745 if (!Subtarget->is64Bit()) 746 Offset = SignExtend64<32>(Offset); 747 748 // Emit (ADDI (ADD X, Hi), Lo) 749 SDNode *Imm = selectImm(CurDAG, DL, VT, Offset, *Subtarget); 750 SDNode *ADD = CurDAG->getMachineNode(RISCV::ADD, DL, VT, 751 Node->getOperand(0), SDValue(Imm, 0)); 752 SDNode *ADDI = 753 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, SDValue(ADD, 0), 754 CurDAG->getTargetConstant(Lo12, DL, VT)); 755 ReplaceNode(Node, ADDI); 756 return; 757 } 758 case ISD::SRL: { 759 // Optimize (srl (and X, C2), C) -> 760 // (srli (slli X, (XLen-C3), (XLen-C3) + C) 761 // Where C2 is a mask with C3 trailing ones. 762 // Taking into account that the C2 may have had lower bits unset by 763 // SimplifyDemandedBits. This avoids materializing the C2 immediate. 764 // This pattern occurs when type legalizing right shifts for types with 765 // less than XLen bits. 766 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 767 if (!N1C) 768 break; 769 SDValue N0 = Node->getOperand(0); 770 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 771 !isa<ConstantSDNode>(N0.getOperand(1))) 772 break; 773 unsigned ShAmt = N1C->getZExtValue(); 774 uint64_t Mask = N0.getConstantOperandVal(1); 775 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 776 if (!isMask_64(Mask)) 777 break; 778 unsigned TrailingOnes = countTrailingOnes(Mask); 779 // 32 trailing ones should use srliw via tablegen pattern. 780 if (TrailingOnes == 32 || ShAmt >= TrailingOnes) 781 break; 782 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; 783 SDNode *SLLI = 784 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 785 CurDAG->getTargetConstant(LShAmt, DL, VT)); 786 SDNode *SRLI = CurDAG->getMachineNode( 787 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 788 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 789 ReplaceNode(Node, SRLI); 790 return; 791 } 792 case ISD::SRA: { 793 // Optimize (sra (sext_inreg X, i16), C) -> 794 // (srai (slli X, (XLen-16), (XLen-16) + C) 795 // And (sra (sext_inreg X, i8), C) -> 796 // (srai (slli X, (XLen-8), (XLen-8) + C) 797 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. 798 // This transform matches the code we get without Zbb. The shifts are more 799 // compressible, and this can help expose CSE opportunities in the sdiv by 800 // constant optimization. 801 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 802 if (!N1C) 803 break; 804 SDValue N0 = Node->getOperand(0); 805 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) 806 break; 807 unsigned ShAmt = N1C->getZExtValue(); 808 unsigned ExtSize = 809 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 810 // ExtSize of 32 should use sraiw via tablegen pattern. 811 if (ExtSize >= 32 || ShAmt >= ExtSize) 812 break; 813 unsigned LShAmt = Subtarget->getXLen() - ExtSize; 814 SDNode *SLLI = 815 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 816 CurDAG->getTargetConstant(LShAmt, DL, VT)); 817 SDNode *SRAI = CurDAG->getMachineNode( 818 RISCV::SRAI, DL, VT, SDValue(SLLI, 0), 819 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 820 ReplaceNode(Node, SRAI); 821 return; 822 } 823 case ISD::AND: { 824 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 825 if (!N1C) 826 break; 827 828 SDValue N0 = Node->getOperand(0); 829 830 bool LeftShift = N0.getOpcode() == ISD::SHL; 831 if (!LeftShift && N0.getOpcode() != ISD::SRL) 832 break; 833 834 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 835 if (!C) 836 break; 837 uint64_t C2 = C->getZExtValue(); 838 unsigned XLen = Subtarget->getXLen(); 839 if (!C2 || C2 >= XLen) 840 break; 841 842 uint64_t C1 = N1C->getZExtValue(); 843 844 // Keep track of whether this is a c.andi. If we can't use c.andi, the 845 // shift pair might offer more compression opportunities. 846 // TODO: We could check for C extension here, but we don't have many lit 847 // tests with the C extension enabled so not checking gets better coverage. 848 // TODO: What if ANDI faster than shift? 849 bool IsCANDI = isInt<6>(N1C->getSExtValue()); 850 851 // Clear irrelevant bits in the mask. 852 if (LeftShift) 853 C1 &= maskTrailingZeros<uint64_t>(C2); 854 else 855 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 856 857 // Some transforms should only be done if the shift has a single use or 858 // the AND would become (srli (slli X, 32), 32) 859 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 860 861 SDValue X = N0.getOperand(0); 862 863 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 864 // with c3 leading zeros. 865 if (!LeftShift && isMask_64(C1)) { 866 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 867 if (C2 < C3) { 868 // If the number of leading zeros is C2+32 this can be SRLIW. 869 if (C2 + 32 == C3) { 870 SDNode *SRLIW = 871 CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X, 872 CurDAG->getTargetConstant(C2, DL, XLenVT)); 873 ReplaceNode(Node, SRLIW); 874 return; 875 } 876 877 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if 878 // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 879 // 880 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 881 // legalized and goes through DAG combine. 882 if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() && 883 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 884 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { 885 SDNode *SRAIW = 886 CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, X.getOperand(0), 887 CurDAG->getTargetConstant(31, DL, XLenVT)); 888 SDNode *SRLIW = CurDAG->getMachineNode( 889 RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0), 890 CurDAG->getTargetConstant(C3 - 32, DL, XLenVT)); 891 ReplaceNode(Node, SRLIW); 892 return; 893 } 894 895 // (srli (slli x, c3-c2), c3). 896 // Skip if we could use (zext.w (sraiw X, C2)). 897 bool Skip = Subtarget->hasStdExtZba() && C3 == 32 && 898 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 899 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32; 900 // Also Skip if we can use bexti. 901 Skip |= Subtarget->hasStdExtZbs() && C3 == XLen - 1; 902 if (OneUseOrZExtW && !Skip) { 903 SDNode *SLLI = CurDAG->getMachineNode( 904 RISCV::SLLI, DL, XLenVT, X, 905 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 906 SDNode *SRLI = 907 CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), 908 CurDAG->getTargetConstant(C3, DL, XLenVT)); 909 ReplaceNode(Node, SRLI); 910 return; 911 } 912 } 913 } 914 915 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 916 // shifted by c2 bits with c3 leading zeros. 917 if (LeftShift && isShiftedMask_64(C1)) { 918 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 919 920 if (C2 + C3 < XLen && 921 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) { 922 // Use slli.uw when possible. 923 if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) { 924 SDNode *SLLI_UW = 925 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, XLenVT, X, 926 CurDAG->getTargetConstant(C2, DL, XLenVT)); 927 ReplaceNode(Node, SLLI_UW); 928 return; 929 } 930 931 // (srli (slli c2+c3), c3) 932 if (OneUseOrZExtW && !IsCANDI) { 933 SDNode *SLLI = CurDAG->getMachineNode( 934 RISCV::SLLI, DL, XLenVT, X, 935 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 936 SDNode *SRLI = 937 CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), 938 CurDAG->getTargetConstant(C3, DL, XLenVT)); 939 ReplaceNode(Node, SRLI); 940 return; 941 } 942 } 943 } 944 945 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 946 // shifted mask with c2 leading zeros and c3 trailing zeros. 947 if (!LeftShift && isShiftedMask_64(C1)) { 948 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 949 uint64_t C3 = countTrailingZeros(C1); 950 if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsCANDI) { 951 unsigned SrliOpc = RISCV::SRLI; 952 // If the input is zexti32 we should use SRLIW. 953 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) && 954 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) { 955 SrliOpc = RISCV::SRLIW; 956 X = X.getOperand(0); 957 } 958 SDNode *SRLI = CurDAG->getMachineNode( 959 SrliOpc, DL, XLenVT, X, 960 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 961 SDNode *SLLI = 962 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), 963 CurDAG->getTargetConstant(C3, DL, XLenVT)); 964 ReplaceNode(Node, SLLI); 965 return; 966 } 967 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 968 if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 && 969 OneUseOrZExtW && !IsCANDI) { 970 SDNode *SRLIW = CurDAG->getMachineNode( 971 RISCV::SRLIW, DL, XLenVT, X, 972 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 973 SDNode *SLLI = 974 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), 975 CurDAG->getTargetConstant(C3, DL, XLenVT)); 976 ReplaceNode(Node, SLLI); 977 return; 978 } 979 } 980 981 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 982 // shifted mask with no leading zeros and c3 trailing zeros. 983 if (LeftShift && isShiftedMask_64(C1)) { 984 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 985 uint64_t C3 = countTrailingZeros(C1); 986 if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsCANDI) { 987 SDNode *SRLI = CurDAG->getMachineNode( 988 RISCV::SRLI, DL, XLenVT, X, 989 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 990 SDNode *SLLI = 991 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), 992 CurDAG->getTargetConstant(C3, DL, XLenVT)); 993 ReplaceNode(Node, SLLI); 994 return; 995 } 996 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 997 if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { 998 SDNode *SRLIW = CurDAG->getMachineNode( 999 RISCV::SRLIW, DL, XLenVT, X, 1000 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 1001 SDNode *SLLI = 1002 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), 1003 CurDAG->getTargetConstant(C3, DL, XLenVT)); 1004 ReplaceNode(Node, SLLI); 1005 return; 1006 } 1007 } 1008 1009 break; 1010 } 1011 case ISD::MUL: { 1012 // Special case for calculating (mul (and X, C2), C1) where the full product 1013 // fits in XLen bits. We can shift X left by the number of leading zeros in 1014 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 1015 // product has XLen trailing zeros, putting it in the output of MULHU. This 1016 // can avoid materializing a constant in a register for C2. 1017 1018 // RHS should be a constant. 1019 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1020 if (!N1C || !N1C->hasOneUse()) 1021 break; 1022 1023 // LHS should be an AND with constant. 1024 SDValue N0 = Node->getOperand(0); 1025 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 1026 break; 1027 1028 uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 1029 1030 // Constant should be a mask. 1031 if (!isMask_64(C2)) 1032 break; 1033 1034 // This should be the only use of the AND unless we will use 1035 // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND 1036 // constants. 1037 if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF)) 1038 break; 1039 1040 // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this 1041 // optimization. 1042 if (isInt<12>(C2) || 1043 (C2 == UINT64_C(0xFFFF) && 1044 (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) || 1045 (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())) 1046 break; 1047 1048 // We need to shift left the AND input and C1 by a total of XLen bits. 1049 1050 // How far left do we need to shift the AND input? 1051 unsigned XLen = Subtarget->getXLen(); 1052 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2)); 1053 1054 // The constant gets shifted by the remaining amount unless that would 1055 // shift bits out. 1056 uint64_t C1 = N1C->getZExtValue(); 1057 unsigned ConstantShift = XLen - LeadingZeros; 1058 if (ConstantShift > (XLen - (64 - countLeadingZeros(C1)))) 1059 break; 1060 1061 uint64_t ShiftedC1 = C1 << ConstantShift; 1062 // If this RV32, we need to sign extend the constant. 1063 if (XLen == 32) 1064 ShiftedC1 = SignExtend64<32>(ShiftedC1); 1065 1066 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 1067 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); 1068 SDNode *SLLI = 1069 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 1070 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 1071 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 1072 SDValue(SLLI, 0), SDValue(Imm, 0)); 1073 ReplaceNode(Node, MULHU); 1074 return; 1075 } 1076 case ISD::INTRINSIC_WO_CHAIN: { 1077 unsigned IntNo = Node->getConstantOperandVal(0); 1078 switch (IntNo) { 1079 // By default we do not custom select any intrinsic. 1080 default: 1081 break; 1082 case Intrinsic::riscv_vmsgeu: 1083 case Intrinsic::riscv_vmsge: { 1084 SDValue Src1 = Node->getOperand(1); 1085 SDValue Src2 = Node->getOperand(2); 1086 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 1087 bool IsCmpUnsignedZero = false; 1088 // Only custom select scalar second operand. 1089 if (Src2.getValueType() != XLenVT) 1090 break; 1091 // Small constants are handled with patterns. 1092 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1093 int64_t CVal = C->getSExtValue(); 1094 if (CVal >= -15 && CVal <= 16) { 1095 if (!IsUnsigned || CVal != 0) 1096 break; 1097 IsCmpUnsignedZero = true; 1098 } 1099 } 1100 MVT Src1VT = Src1.getSimpleValueType(); 1101 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; 1102 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1103 default: 1104 llvm_unreachable("Unexpected LMUL!"); 1105 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 1106 case RISCVII::VLMUL::lmulenum: \ 1107 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1108 : RISCV::PseudoVMSLT_VX_##suffix; \ 1109 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 1110 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 1111 break; 1112 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1) 1113 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2) 1114 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4) 1115 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8) 1116 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16) 1117 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32) 1118 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64) 1119 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES 1120 } 1121 SDValue SEW = CurDAG->getTargetConstant( 1122 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1123 SDValue VL; 1124 selectVLOp(Node->getOperand(3), VL); 1125 1126 // If vmsgeu with 0 immediate, expand it to vmset. 1127 if (IsCmpUnsignedZero) { 1128 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW)); 1129 return; 1130 } 1131 1132 // Expand to 1133 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 1134 SDValue Cmp = SDValue( 1135 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1136 0); 1137 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 1138 {Cmp, Cmp, VL, SEW})); 1139 return; 1140 } 1141 case Intrinsic::riscv_vmsgeu_mask: 1142 case Intrinsic::riscv_vmsge_mask: { 1143 SDValue Src1 = Node->getOperand(2); 1144 SDValue Src2 = Node->getOperand(3); 1145 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 1146 bool IsCmpUnsignedZero = false; 1147 // Only custom select scalar second operand. 1148 if (Src2.getValueType() != XLenVT) 1149 break; 1150 // Small constants are handled with patterns. 1151 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1152 int64_t CVal = C->getSExtValue(); 1153 if (CVal >= -15 && CVal <= 16) { 1154 if (!IsUnsigned || CVal != 0) 1155 break; 1156 IsCmpUnsignedZero = true; 1157 } 1158 } 1159 MVT Src1VT = Src1.getSimpleValueType(); 1160 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, 1161 VMOROpcode; 1162 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1163 default: 1164 llvm_unreachable("Unexpected LMUL!"); 1165 #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \ 1166 case RISCVII::VLMUL::lmulenum: \ 1167 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1168 : RISCV::PseudoVMSLT_VX_##suffix; \ 1169 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 1170 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 1171 break; 1172 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1) 1173 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2) 1174 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4) 1175 CASE_VMSLT_OPCODES(LMUL_1, M1, B8) 1176 CASE_VMSLT_OPCODES(LMUL_2, M2, B16) 1177 CASE_VMSLT_OPCODES(LMUL_4, M4, B32) 1178 CASE_VMSLT_OPCODES(LMUL_8, M8, B64) 1179 #undef CASE_VMSLT_OPCODES 1180 } 1181 // Mask operations use the LMUL from the mask type. 1182 switch (RISCVTargetLowering::getLMUL(VT)) { 1183 default: 1184 llvm_unreachable("Unexpected LMUL!"); 1185 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ 1186 case RISCVII::VLMUL::lmulenum: \ 1187 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 1188 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 1189 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ 1190 break; 1191 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8) 1192 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4) 1193 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2) 1194 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1) 1195 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2) 1196 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4) 1197 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8) 1198 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES 1199 } 1200 SDValue SEW = CurDAG->getTargetConstant( 1201 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1202 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 1203 SDValue VL; 1204 selectVLOp(Node->getOperand(5), VL); 1205 SDValue MaskedOff = Node->getOperand(1); 1206 SDValue Mask = Node->getOperand(4); 1207 1208 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff. 1209 if (IsCmpUnsignedZero) { 1210 // We don't need vmor if the MaskedOff and the Mask are the same 1211 // value. 1212 if (Mask == MaskedOff) { 1213 ReplaceUses(Node, Mask.getNode()); 1214 return; 1215 } 1216 ReplaceNode(Node, 1217 CurDAG->getMachineNode(VMOROpcode, DL, VT, 1218 {Mask, MaskedOff, VL, MaskSEW})); 1219 return; 1220 } 1221 1222 // If the MaskedOff value and the Mask are the same value use 1223 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 1224 // This avoids needing to copy v0 to vd before starting the next sequence. 1225 if (Mask == MaskedOff) { 1226 SDValue Cmp = SDValue( 1227 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1228 0); 1229 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 1230 {Mask, Cmp, VL, MaskSEW})); 1231 return; 1232 } 1233 1234 // Mask needs to be copied to V0. 1235 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1236 RISCV::V0, Mask, SDValue()); 1237 SDValue Glue = Chain.getValue(1); 1238 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); 1239 1240 // Otherwise use 1241 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 1242 // The result is mask undisturbed. 1243 // We use the same instructions to emulate mask agnostic behavior, because 1244 // the agnostic result can be either undisturbed or all 1. 1245 SDValue Cmp = SDValue( 1246 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 1247 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 1248 0); 1249 // vmxor.mm vd, vd, v0 is used to update active value. 1250 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 1251 {Cmp, Mask, VL, MaskSEW})); 1252 return; 1253 } 1254 case Intrinsic::riscv_vsetvli_opt: 1255 case Intrinsic::riscv_vsetvlimax_opt: 1256 return selectVSETVLI(Node); 1257 } 1258 break; 1259 } 1260 case ISD::INTRINSIC_W_CHAIN: { 1261 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1262 switch (IntNo) { 1263 // By default we do not custom select any intrinsic. 1264 default: 1265 break; 1266 case Intrinsic::riscv_vsetvli: 1267 case Intrinsic::riscv_vsetvlimax: 1268 return selectVSETVLI(Node); 1269 case Intrinsic::riscv_vlseg2: 1270 case Intrinsic::riscv_vlseg3: 1271 case Intrinsic::riscv_vlseg4: 1272 case Intrinsic::riscv_vlseg5: 1273 case Intrinsic::riscv_vlseg6: 1274 case Intrinsic::riscv_vlseg7: 1275 case Intrinsic::riscv_vlseg8: { 1276 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1277 return; 1278 } 1279 case Intrinsic::riscv_vlseg2_mask: 1280 case Intrinsic::riscv_vlseg3_mask: 1281 case Intrinsic::riscv_vlseg4_mask: 1282 case Intrinsic::riscv_vlseg5_mask: 1283 case Intrinsic::riscv_vlseg6_mask: 1284 case Intrinsic::riscv_vlseg7_mask: 1285 case Intrinsic::riscv_vlseg8_mask: { 1286 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1287 return; 1288 } 1289 case Intrinsic::riscv_vlsseg2: 1290 case Intrinsic::riscv_vlsseg3: 1291 case Intrinsic::riscv_vlsseg4: 1292 case Intrinsic::riscv_vlsseg5: 1293 case Intrinsic::riscv_vlsseg6: 1294 case Intrinsic::riscv_vlsseg7: 1295 case Intrinsic::riscv_vlsseg8: { 1296 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1297 return; 1298 } 1299 case Intrinsic::riscv_vlsseg2_mask: 1300 case Intrinsic::riscv_vlsseg3_mask: 1301 case Intrinsic::riscv_vlsseg4_mask: 1302 case Intrinsic::riscv_vlsseg5_mask: 1303 case Intrinsic::riscv_vlsseg6_mask: 1304 case Intrinsic::riscv_vlsseg7_mask: 1305 case Intrinsic::riscv_vlsseg8_mask: { 1306 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1307 return; 1308 } 1309 case Intrinsic::riscv_vloxseg2: 1310 case Intrinsic::riscv_vloxseg3: 1311 case Intrinsic::riscv_vloxseg4: 1312 case Intrinsic::riscv_vloxseg5: 1313 case Intrinsic::riscv_vloxseg6: 1314 case Intrinsic::riscv_vloxseg7: 1315 case Intrinsic::riscv_vloxseg8: 1316 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1317 return; 1318 case Intrinsic::riscv_vluxseg2: 1319 case Intrinsic::riscv_vluxseg3: 1320 case Intrinsic::riscv_vluxseg4: 1321 case Intrinsic::riscv_vluxseg5: 1322 case Intrinsic::riscv_vluxseg6: 1323 case Intrinsic::riscv_vluxseg7: 1324 case Intrinsic::riscv_vluxseg8: 1325 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1326 return; 1327 case Intrinsic::riscv_vloxseg2_mask: 1328 case Intrinsic::riscv_vloxseg3_mask: 1329 case Intrinsic::riscv_vloxseg4_mask: 1330 case Intrinsic::riscv_vloxseg5_mask: 1331 case Intrinsic::riscv_vloxseg6_mask: 1332 case Intrinsic::riscv_vloxseg7_mask: 1333 case Intrinsic::riscv_vloxseg8_mask: 1334 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1335 return; 1336 case Intrinsic::riscv_vluxseg2_mask: 1337 case Intrinsic::riscv_vluxseg3_mask: 1338 case Intrinsic::riscv_vluxseg4_mask: 1339 case Intrinsic::riscv_vluxseg5_mask: 1340 case Intrinsic::riscv_vluxseg6_mask: 1341 case Intrinsic::riscv_vluxseg7_mask: 1342 case Intrinsic::riscv_vluxseg8_mask: 1343 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1344 return; 1345 case Intrinsic::riscv_vlseg8ff: 1346 case Intrinsic::riscv_vlseg7ff: 1347 case Intrinsic::riscv_vlseg6ff: 1348 case Intrinsic::riscv_vlseg5ff: 1349 case Intrinsic::riscv_vlseg4ff: 1350 case Intrinsic::riscv_vlseg3ff: 1351 case Intrinsic::riscv_vlseg2ff: { 1352 selectVLSEGFF(Node, /*IsMasked*/ false); 1353 return; 1354 } 1355 case Intrinsic::riscv_vlseg8ff_mask: 1356 case Intrinsic::riscv_vlseg7ff_mask: 1357 case Intrinsic::riscv_vlseg6ff_mask: 1358 case Intrinsic::riscv_vlseg5ff_mask: 1359 case Intrinsic::riscv_vlseg4ff_mask: 1360 case Intrinsic::riscv_vlseg3ff_mask: 1361 case Intrinsic::riscv_vlseg2ff_mask: { 1362 selectVLSEGFF(Node, /*IsMasked*/ true); 1363 return; 1364 } 1365 case Intrinsic::riscv_vloxei: 1366 case Intrinsic::riscv_vloxei_mask: 1367 case Intrinsic::riscv_vluxei: 1368 case Intrinsic::riscv_vluxei_mask: { 1369 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 1370 IntNo == Intrinsic::riscv_vluxei_mask; 1371 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 1372 IntNo == Intrinsic::riscv_vloxei_mask; 1373 1374 MVT VT = Node->getSimpleValueType(0); 1375 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1376 1377 unsigned CurOp = 2; 1378 // Masked intrinsic only have TU version pseduo instructions. 1379 bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); 1380 SmallVector<SDValue, 8> Operands; 1381 if (IsTU) 1382 Operands.push_back(Node->getOperand(CurOp++)); 1383 else 1384 // Skip the undef passthru operand for nomask TA version pseudo 1385 CurOp++; 1386 1387 MVT IndexVT; 1388 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1389 /*IsStridedOrIndexed*/ true, Operands, 1390 /*IsLoad=*/true, &IndexVT); 1391 1392 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1393 "Element count mismatch"); 1394 1395 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1396 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1397 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1398 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1399 report_fatal_error("The V extension does not support EEW=64 for index " 1400 "values when XLEN=32"); 1401 } 1402 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 1403 IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1404 static_cast<unsigned>(IndexLMUL)); 1405 MachineSDNode *Load = 1406 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1407 1408 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1409 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1410 1411 ReplaceNode(Node, Load); 1412 return; 1413 } 1414 case Intrinsic::riscv_vlm: 1415 case Intrinsic::riscv_vle: 1416 case Intrinsic::riscv_vle_mask: 1417 case Intrinsic::riscv_vlse: 1418 case Intrinsic::riscv_vlse_mask: { 1419 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 1420 IntNo == Intrinsic::riscv_vlse_mask; 1421 bool IsStrided = 1422 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 1423 1424 MVT VT = Node->getSimpleValueType(0); 1425 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1426 1427 unsigned CurOp = 2; 1428 // The riscv_vlm intrinsic are always tail agnostic and no passthru operand. 1429 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; 1430 // Masked intrinsic only have TU version pseduo instructions. 1431 bool IsTU = 1432 HasPassthruOperand && 1433 ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked); 1434 SmallVector<SDValue, 8> Operands; 1435 if (IsTU) 1436 Operands.push_back(Node->getOperand(CurOp++)); 1437 else if (HasPassthruOperand) 1438 // Skip the undef passthru operand for nomask TA version pseudo 1439 CurOp++; 1440 1441 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1442 Operands, /*IsLoad=*/true); 1443 1444 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1445 const RISCV::VLEPseudo *P = 1446 RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 1447 static_cast<unsigned>(LMUL)); 1448 MachineSDNode *Load = 1449 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1450 1451 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1452 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1453 1454 ReplaceNode(Node, Load); 1455 return; 1456 } 1457 case Intrinsic::riscv_vleff: 1458 case Intrinsic::riscv_vleff_mask: { 1459 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 1460 1461 MVT VT = Node->getSimpleValueType(0); 1462 unsigned SEW = VT.getScalarSizeInBits(); 1463 unsigned Log2SEW = Log2_32(SEW); 1464 1465 unsigned CurOp = 2; 1466 // Masked intrinsic only have TU version pseduo instructions. 1467 bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); 1468 SmallVector<SDValue, 7> Operands; 1469 if (IsTU) 1470 Operands.push_back(Node->getOperand(CurOp++)); 1471 else 1472 // Skip the undef passthru operand for nomask TA version pseudo 1473 CurOp++; 1474 1475 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1476 /*IsStridedOrIndexed*/ false, Operands, 1477 /*IsLoad=*/true); 1478 1479 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1480 const RISCV::VLEPseudo *P = 1481 RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 1482 Log2SEW, static_cast<unsigned>(LMUL)); 1483 MachineSDNode *Load = 1484 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), 1485 MVT::Other, MVT::Glue, Operands); 1486 bool TailAgnostic = !IsTU; 1487 bool MaskAgnostic = false; 1488 if (IsMasked) { 1489 uint64_t Policy = 1490 Node->getConstantOperandVal(Node->getNumOperands() - 1); 1491 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; 1492 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; 1493 } 1494 unsigned VType = 1495 RISCVVType::encodeVTYPE(LMUL, SEW, TailAgnostic, MaskAgnostic); 1496 SDValue VTypeOp = CurDAG->getTargetConstant(VType, DL, XLenVT); 1497 SDNode *ReadVL = 1498 CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, VTypeOp, 1499 /*Glue*/ SDValue(Load, 2)); 1500 1501 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1502 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1503 1504 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); 1505 ReplaceUses(SDValue(Node, 1), SDValue(ReadVL, 0)); // VL 1506 ReplaceUses(SDValue(Node, 2), SDValue(Load, 1)); // Chain 1507 CurDAG->RemoveDeadNode(Node); 1508 return; 1509 } 1510 } 1511 break; 1512 } 1513 case ISD::INTRINSIC_VOID: { 1514 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1515 switch (IntNo) { 1516 case Intrinsic::riscv_vsseg2: 1517 case Intrinsic::riscv_vsseg3: 1518 case Intrinsic::riscv_vsseg4: 1519 case Intrinsic::riscv_vsseg5: 1520 case Intrinsic::riscv_vsseg6: 1521 case Intrinsic::riscv_vsseg7: 1522 case Intrinsic::riscv_vsseg8: { 1523 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1524 return; 1525 } 1526 case Intrinsic::riscv_vsseg2_mask: 1527 case Intrinsic::riscv_vsseg3_mask: 1528 case Intrinsic::riscv_vsseg4_mask: 1529 case Intrinsic::riscv_vsseg5_mask: 1530 case Intrinsic::riscv_vsseg6_mask: 1531 case Intrinsic::riscv_vsseg7_mask: 1532 case Intrinsic::riscv_vsseg8_mask: { 1533 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1534 return; 1535 } 1536 case Intrinsic::riscv_vssseg2: 1537 case Intrinsic::riscv_vssseg3: 1538 case Intrinsic::riscv_vssseg4: 1539 case Intrinsic::riscv_vssseg5: 1540 case Intrinsic::riscv_vssseg6: 1541 case Intrinsic::riscv_vssseg7: 1542 case Intrinsic::riscv_vssseg8: { 1543 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1544 return; 1545 } 1546 case Intrinsic::riscv_vssseg2_mask: 1547 case Intrinsic::riscv_vssseg3_mask: 1548 case Intrinsic::riscv_vssseg4_mask: 1549 case Intrinsic::riscv_vssseg5_mask: 1550 case Intrinsic::riscv_vssseg6_mask: 1551 case Intrinsic::riscv_vssseg7_mask: 1552 case Intrinsic::riscv_vssseg8_mask: { 1553 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1554 return; 1555 } 1556 case Intrinsic::riscv_vsoxseg2: 1557 case Intrinsic::riscv_vsoxseg3: 1558 case Intrinsic::riscv_vsoxseg4: 1559 case Intrinsic::riscv_vsoxseg5: 1560 case Intrinsic::riscv_vsoxseg6: 1561 case Intrinsic::riscv_vsoxseg7: 1562 case Intrinsic::riscv_vsoxseg8: 1563 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1564 return; 1565 case Intrinsic::riscv_vsuxseg2: 1566 case Intrinsic::riscv_vsuxseg3: 1567 case Intrinsic::riscv_vsuxseg4: 1568 case Intrinsic::riscv_vsuxseg5: 1569 case Intrinsic::riscv_vsuxseg6: 1570 case Intrinsic::riscv_vsuxseg7: 1571 case Intrinsic::riscv_vsuxseg8: 1572 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1573 return; 1574 case Intrinsic::riscv_vsoxseg2_mask: 1575 case Intrinsic::riscv_vsoxseg3_mask: 1576 case Intrinsic::riscv_vsoxseg4_mask: 1577 case Intrinsic::riscv_vsoxseg5_mask: 1578 case Intrinsic::riscv_vsoxseg6_mask: 1579 case Intrinsic::riscv_vsoxseg7_mask: 1580 case Intrinsic::riscv_vsoxseg8_mask: 1581 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1582 return; 1583 case Intrinsic::riscv_vsuxseg2_mask: 1584 case Intrinsic::riscv_vsuxseg3_mask: 1585 case Intrinsic::riscv_vsuxseg4_mask: 1586 case Intrinsic::riscv_vsuxseg5_mask: 1587 case Intrinsic::riscv_vsuxseg6_mask: 1588 case Intrinsic::riscv_vsuxseg7_mask: 1589 case Intrinsic::riscv_vsuxseg8_mask: 1590 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1591 return; 1592 case Intrinsic::riscv_vsoxei: 1593 case Intrinsic::riscv_vsoxei_mask: 1594 case Intrinsic::riscv_vsuxei: 1595 case Intrinsic::riscv_vsuxei_mask: { 1596 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 1597 IntNo == Intrinsic::riscv_vsuxei_mask; 1598 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 1599 IntNo == Intrinsic::riscv_vsoxei_mask; 1600 1601 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1602 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1603 1604 unsigned CurOp = 2; 1605 SmallVector<SDValue, 8> Operands; 1606 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1607 1608 MVT IndexVT; 1609 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1610 /*IsStridedOrIndexed*/ true, Operands, 1611 /*IsLoad=*/false, &IndexVT); 1612 1613 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1614 "Element count mismatch"); 1615 1616 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1617 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1618 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1619 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1620 report_fatal_error("The V extension does not support EEW=64 for index " 1621 "values when XLEN=32"); 1622 } 1623 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 1624 IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW, 1625 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); 1626 MachineSDNode *Store = 1627 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1628 1629 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1630 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1631 1632 ReplaceNode(Node, Store); 1633 return; 1634 } 1635 case Intrinsic::riscv_vsm: 1636 case Intrinsic::riscv_vse: 1637 case Intrinsic::riscv_vse_mask: 1638 case Intrinsic::riscv_vsse: 1639 case Intrinsic::riscv_vsse_mask: { 1640 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 1641 IntNo == Intrinsic::riscv_vsse_mask; 1642 bool IsStrided = 1643 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 1644 1645 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1646 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1647 1648 unsigned CurOp = 2; 1649 SmallVector<SDValue, 8> Operands; 1650 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1651 1652 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1653 Operands); 1654 1655 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1656 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 1657 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 1658 MachineSDNode *Store = 1659 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1660 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1661 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1662 1663 ReplaceNode(Node, Store); 1664 return; 1665 } 1666 } 1667 break; 1668 } 1669 case ISD::BITCAST: { 1670 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 1671 // Just drop bitcasts between vectors if both are fixed or both are 1672 // scalable. 1673 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 1674 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 1675 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 1676 CurDAG->RemoveDeadNode(Node); 1677 return; 1678 } 1679 break; 1680 } 1681 case ISD::INSERT_SUBVECTOR: { 1682 SDValue V = Node->getOperand(0); 1683 SDValue SubV = Node->getOperand(1); 1684 SDLoc DL(SubV); 1685 auto Idx = Node->getConstantOperandVal(2); 1686 MVT SubVecVT = SubV.getSimpleValueType(); 1687 1688 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1689 MVT SubVecContainerVT = SubVecVT; 1690 // Establish the correct scalable-vector types for any fixed-length type. 1691 if (SubVecVT.isFixedLengthVector()) 1692 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 1693 if (VT.isFixedLengthVector()) 1694 VT = TLI.getContainerForFixedLengthVector(VT); 1695 1696 const auto *TRI = Subtarget->getRegisterInfo(); 1697 unsigned SubRegIdx; 1698 std::tie(SubRegIdx, Idx) = 1699 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1700 VT, SubVecContainerVT, Idx, TRI); 1701 1702 // If the Idx hasn't been completely eliminated then this is a subvector 1703 // insert which doesn't naturally align to a vector register. These must 1704 // be handled using instructions to manipulate the vector registers. 1705 if (Idx != 0) 1706 break; 1707 1708 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); 1709 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 1710 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 1711 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 1712 (void)IsSubVecPartReg; // Silence unused variable warning without asserts. 1713 assert((!IsSubVecPartReg || V.isUndef()) && 1714 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 1715 "the subvector is smaller than a full-sized register"); 1716 1717 // If we haven't set a SubRegIdx, then we must be going between 1718 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 1719 if (SubRegIdx == RISCV::NoSubRegister) { 1720 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT); 1721 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1722 InRegClassID && 1723 "Unexpected subvector extraction"); 1724 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1725 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 1726 DL, VT, SubV, RC); 1727 ReplaceNode(Node, NewNode); 1728 return; 1729 } 1730 1731 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 1732 ReplaceNode(Node, Insert.getNode()); 1733 return; 1734 } 1735 case ISD::EXTRACT_SUBVECTOR: { 1736 SDValue V = Node->getOperand(0); 1737 auto Idx = Node->getConstantOperandVal(1); 1738 MVT InVT = V.getSimpleValueType(); 1739 SDLoc DL(V); 1740 1741 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1742 MVT SubVecContainerVT = VT; 1743 // Establish the correct scalable-vector types for any fixed-length type. 1744 if (VT.isFixedLengthVector()) 1745 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 1746 if (InVT.isFixedLengthVector()) 1747 InVT = TLI.getContainerForFixedLengthVector(InVT); 1748 1749 const auto *TRI = Subtarget->getRegisterInfo(); 1750 unsigned SubRegIdx; 1751 std::tie(SubRegIdx, Idx) = 1752 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1753 InVT, SubVecContainerVT, Idx, TRI); 1754 1755 // If the Idx hasn't been completely eliminated then this is a subvector 1756 // extract which doesn't naturally align to a vector register. These must 1757 // be handled using instructions to manipulate the vector registers. 1758 if (Idx != 0) 1759 break; 1760 1761 // If we haven't set a SubRegIdx, then we must be going between 1762 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 1763 if (SubRegIdx == RISCV::NoSubRegister) { 1764 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 1765 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1766 InRegClassID && 1767 "Unexpected subvector extraction"); 1768 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1769 SDNode *NewNode = 1770 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 1771 ReplaceNode(Node, NewNode); 1772 return; 1773 } 1774 1775 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 1776 ReplaceNode(Node, Extract.getNode()); 1777 return; 1778 } 1779 case ISD::SPLAT_VECTOR: 1780 case RISCVISD::VMV_S_X_VL: 1781 case RISCVISD::VFMV_S_F_VL: 1782 case RISCVISD::VMV_V_X_VL: 1783 case RISCVISD::VFMV_V_F_VL: { 1784 // Try to match splat of a scalar load to a strided load with stride of x0. 1785 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || 1786 Node->getOpcode() == RISCVISD::VFMV_S_F_VL; 1787 bool HasPassthruOperand = Node->getOpcode() != ISD::SPLAT_VECTOR; 1788 if (HasPassthruOperand && !Node->getOperand(0).isUndef()) 1789 break; 1790 SDValue Src = HasPassthruOperand ? Node->getOperand(1) : Node->getOperand(0); 1791 auto *Ld = dyn_cast<LoadSDNode>(Src); 1792 if (!Ld) 1793 break; 1794 EVT MemVT = Ld->getMemoryVT(); 1795 // The memory VT should be the same size as the element type. 1796 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 1797 break; 1798 if (!IsProfitableToFold(Src, Node, Node) || 1799 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 1800 break; 1801 1802 SDValue VL; 1803 if (Node->getOpcode() == ISD::SPLAT_VECTOR) 1804 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); 1805 else if (IsScalarMove) { 1806 // We could deal with more VL if we update the VSETVLI insert pass to 1807 // avoid introducing more VSETVLI. 1808 if (!isOneConstant(Node->getOperand(2))) 1809 break; 1810 selectVLOp(Node->getOperand(2), VL); 1811 } else 1812 selectVLOp(Node->getOperand(2), VL); 1813 1814 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1815 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 1816 1817 SDValue Operands[] = {Ld->getBasePtr(), 1818 CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW, 1819 Ld->getChain()}; 1820 1821 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1822 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 1823 /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false, 1824 Log2SEW, static_cast<unsigned>(LMUL)); 1825 MachineSDNode *Load = 1826 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1827 1828 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()}); 1829 1830 ReplaceNode(Node, Load); 1831 return; 1832 } 1833 } 1834 1835 // Select the default instruction. 1836 SelectCode(Node); 1837 } 1838 1839 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 1840 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 1841 switch (ConstraintID) { 1842 case InlineAsm::Constraint_m: 1843 // We just support simple memory operands that have a single address 1844 // operand and need no special handling. 1845 OutOps.push_back(Op); 1846 return false; 1847 case InlineAsm::Constraint_A: 1848 OutOps.push_back(Op); 1849 return false; 1850 default: 1851 break; 1852 } 1853 1854 return true; 1855 } 1856 1857 // Select a frame index and an optional immediate offset from an ADD or OR. 1858 bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, 1859 SDValue &Offset) { 1860 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 1861 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1862 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT()); 1863 return true; 1864 } 1865 1866 if (!CurDAG->isBaseWithConstantOffset(Addr)) 1867 return false; 1868 1869 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { 1870 auto *CN = cast<ConstantSDNode>(Addr.getOperand(1)); 1871 if (isInt<12>(CN->getSExtValue())) { 1872 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), 1873 Subtarget->getXLenVT()); 1874 Offset = CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(Addr), 1875 Subtarget->getXLenVT()); 1876 return true; 1877 } 1878 } 1879 1880 return false; 1881 } 1882 1883 bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { 1884 // If this is FrameIndex, select it directly. Otherwise just let it get 1885 // selected to a register independently. 1886 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) 1887 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1888 else 1889 Base = Addr; 1890 return true; 1891 } 1892 1893 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, 1894 SDValue &Offset) { 1895 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1896 auto *CN = cast<ConstantSDNode>(Addr.getOperand(1)); 1897 if (isInt<12>(CN->getSExtValue())) { 1898 Offset = CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(Addr), 1899 Subtarget->getXLenVT()); 1900 return SelectBaseAddr(Addr.getOperand(0), Base); 1901 } 1902 } 1903 1904 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT()); 1905 return SelectBaseAddr(Addr, Base); 1906 } 1907 1908 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 1909 SDValue &ShAmt) { 1910 // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift 1911 // amount. If there is an AND on the shift amount, we can bypass it if it 1912 // doesn't affect any of those bits. 1913 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 1914 const APInt &AndMask = N->getConstantOperandAPInt(1); 1915 1916 // Since the max shift amount is a power of 2 we can subtract 1 to make a 1917 // mask that covers the bits needed to represent all shift amounts. 1918 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 1919 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 1920 1921 if (ShMask.isSubsetOf(AndMask)) { 1922 ShAmt = N.getOperand(0); 1923 return true; 1924 } 1925 1926 // SimplifyDemandedBits may have optimized the mask so try restoring any 1927 // bits that are known zero. 1928 KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0)); 1929 if (ShMask.isSubsetOf(AndMask | Known.Zero)) { 1930 ShAmt = N.getOperand(0); 1931 return true; 1932 } 1933 } else if (N.getOpcode() == ISD::SUB && 1934 isa<ConstantSDNode>(N.getOperand(0))) { 1935 uint64_t Imm = N.getConstantOperandVal(0); 1936 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 1937 // generate a NEG instead of a SUB of a constant. 1938 if (Imm != 0 && Imm % ShiftWidth == 0) { 1939 SDLoc DL(N); 1940 EVT VT = N.getValueType(); 1941 SDValue Zero = 1942 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT); 1943 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; 1944 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, 1945 N.getOperand(1)); 1946 ShAmt = SDValue(Neg, 0); 1947 return true; 1948 } 1949 } 1950 1951 ShAmt = N; 1952 return true; 1953 } 1954 1955 bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { 1956 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 1957 cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { 1958 Val = N.getOperand(0); 1959 return true; 1960 } 1961 MVT VT = N.getSimpleValueType(); 1962 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { 1963 Val = N; 1964 return true; 1965 } 1966 1967 return false; 1968 } 1969 1970 bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { 1971 if (N.getOpcode() == ISD::AND) { 1972 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1973 if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) { 1974 Val = N.getOperand(0); 1975 return true; 1976 } 1977 } 1978 MVT VT = N.getSimpleValueType(); 1979 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32); 1980 if (CurDAG->MaskedValueIsZero(N, Mask)) { 1981 Val = N; 1982 return true; 1983 } 1984 1985 return false; 1986 } 1987 1988 // Return true if all users of this SDNode* only consume the lower \p Bits. 1989 // This can be used to form W instructions for add/sub/mul/shl even when the 1990 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 1991 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 1992 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 1993 // the add/sub/mul/shl to become non-W instructions. By checking the users we 1994 // may be able to use a W instruction and CSE with the other instruction if 1995 // this has happened. We could try to detect that the CSE opportunity exists 1996 // before doing this, but that would be more complicated. 1997 // TODO: Does this need to look through AND/OR/XOR to their users to find more 1998 // opportunities. 1999 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { 2000 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 2001 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 2002 Node->getOpcode() == ISD::SRL || 2003 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 2004 Node->getOpcode() == RISCVISD::GREV || 2005 Node->getOpcode() == RISCVISD::GORC || 2006 isa<ConstantSDNode>(Node)) && 2007 "Unexpected opcode"); 2008 2009 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 2010 SDNode *User = *UI; 2011 // Users of this node should have already been instruction selected 2012 if (!User->isMachineOpcode()) 2013 return false; 2014 2015 // TODO: Add more opcodes? 2016 switch (User->getMachineOpcode()) { 2017 default: 2018 return false; 2019 case RISCV::ADDW: 2020 case RISCV::ADDIW: 2021 case RISCV::SUBW: 2022 case RISCV::MULW: 2023 case RISCV::SLLW: 2024 case RISCV::SLLIW: 2025 case RISCV::SRAW: 2026 case RISCV::SRAIW: 2027 case RISCV::SRLW: 2028 case RISCV::SRLIW: 2029 case RISCV::DIVW: 2030 case RISCV::DIVUW: 2031 case RISCV::REMW: 2032 case RISCV::REMUW: 2033 case RISCV::ROLW: 2034 case RISCV::RORW: 2035 case RISCV::RORIW: 2036 case RISCV::CLZW: 2037 case RISCV::CTZW: 2038 case RISCV::CPOPW: 2039 case RISCV::SLLI_UW: 2040 case RISCV::FMV_W_X: 2041 case RISCV::FCVT_H_W: 2042 case RISCV::FCVT_H_WU: 2043 case RISCV::FCVT_S_W: 2044 case RISCV::FCVT_S_WU: 2045 case RISCV::FCVT_D_W: 2046 case RISCV::FCVT_D_WU: 2047 if (Bits < 32) 2048 return false; 2049 break; 2050 case RISCV::SLLI: 2051 // SLLI only uses the lower (XLen - ShAmt) bits. 2052 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1)) 2053 return false; 2054 break; 2055 case RISCV::ANDI: 2056 if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1)))) 2057 return false; 2058 break; 2059 case RISCV::SEXT_B: 2060 if (Bits < 8) 2061 return false; 2062 break; 2063 case RISCV::SEXT_H: 2064 case RISCV::FMV_H_X: 2065 case RISCV::ZEXT_H_RV32: 2066 case RISCV::ZEXT_H_RV64: 2067 if (Bits < 16) 2068 return false; 2069 break; 2070 case RISCV::ADD_UW: 2071 case RISCV::SH1ADD_UW: 2072 case RISCV::SH2ADD_UW: 2073 case RISCV::SH3ADD_UW: 2074 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 2075 // 32 bits. 2076 if (UI.getOperandNo() != 0 || Bits < 32) 2077 return false; 2078 break; 2079 case RISCV::SB: 2080 if (UI.getOperandNo() != 0 || Bits < 8) 2081 return false; 2082 break; 2083 case RISCV::SH: 2084 if (UI.getOperandNo() != 0 || Bits < 16) 2085 return false; 2086 break; 2087 case RISCV::SW: 2088 if (UI.getOperandNo() != 0 || Bits < 32) 2089 return false; 2090 break; 2091 } 2092 } 2093 2094 return true; 2095 } 2096 2097 // Select VL as a 5 bit immediate or a value that will become a register. This 2098 // allows us to choose betwen VSETIVLI or VSETVLI later. 2099 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 2100 auto *C = dyn_cast<ConstantSDNode>(N); 2101 if (C && isUInt<5>(C->getZExtValue())) { 2102 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 2103 N->getValueType(0)); 2104 } else if (C && C->isAllOnesValue()) { 2105 // Treat all ones as VLMax. 2106 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 2107 N->getValueType(0)); 2108 } else if (isa<RegisterSDNode>(N) && 2109 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) { 2110 // All our VL operands use an operand that allows GPRNoX0 or an immediate 2111 // as the register class. Convert X0 to a special immediate to pass the 2112 // MachineVerifier. This is recognized specially by the vsetvli insertion 2113 // pass. 2114 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 2115 N->getValueType(0)); 2116 } else { 2117 VL = N; 2118 } 2119 2120 return true; 2121 } 2122 2123 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 2124 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef()) 2125 return false; 2126 SplatVal = N.getOperand(1); 2127 return true; 2128 } 2129 2130 using ValidateFn = bool (*)(int64_t); 2131 2132 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, 2133 SelectionDAG &DAG, 2134 const RISCVSubtarget &Subtarget, 2135 ValidateFn ValidateImm) { 2136 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 2137 !isa<ConstantSDNode>(N.getOperand(1))) 2138 return false; 2139 2140 int64_t SplatImm = 2141 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 2142 2143 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand 2144 // type is wider than the resulting vector element type: an implicit 2145 // truncation first takes place. Therefore, perform a manual 2146 // truncation/sign-extension in order to ignore any truncated bits and catch 2147 // any zero-extended immediate. 2148 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 2149 // sign-extending to (XLenVT -1). 2150 MVT XLenVT = Subtarget.getXLenVT(); 2151 assert(XLenVT == N.getOperand(1).getSimpleValueType() && 2152 "Unexpected splat operand type"); 2153 MVT EltVT = N.getSimpleValueType().getVectorElementType(); 2154 if (EltVT.bitsLT(XLenVT)) 2155 SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits()); 2156 2157 if (!ValidateImm(SplatImm)) 2158 return false; 2159 2160 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT); 2161 return true; 2162 } 2163 2164 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 2165 return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget, 2166 [](int64_t Imm) { return isInt<5>(Imm); }); 2167 } 2168 2169 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 2170 return selectVSplatSimmHelper( 2171 N, SplatVal, *CurDAG, *Subtarget, 2172 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); 2173 } 2174 2175 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 2176 SDValue &SplatVal) { 2177 return selectVSplatSimmHelper( 2178 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { 2179 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 2180 }); 2181 } 2182 2183 bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { 2184 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 2185 !isa<ConstantSDNode>(N.getOperand(1))) 2186 return false; 2187 2188 int64_t SplatImm = 2189 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 2190 2191 if (!isUInt<5>(SplatImm)) 2192 return false; 2193 2194 SplatVal = 2195 CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT()); 2196 2197 return true; 2198 } 2199 2200 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 2201 SDValue &Imm) { 2202 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 2203 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 2204 2205 if (!isInt<5>(ImmVal)) 2206 return false; 2207 2208 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); 2209 return true; 2210 } 2211 2212 return false; 2213 } 2214 2215 // Merge an ADDI into the offset of a load/store instruction where possible. 2216 // (load (addi base, off1), off2) -> (load base, off1+off2) 2217 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) 2218 // (load (add base, (addi src, off1)), off2) 2219 // -> (load (add base, src), off1+off2) 2220 // (store val, (add base, (addi src, off1)), off2) 2221 // -> (store val, (add base, src), off1+off2) 2222 // This is possible when off1+off2 fits a 12-bit immediate. 2223 bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { 2224 unsigned OffsetOpIdx, BaseOpIdx; 2225 if (!hasMemOffset(N, BaseOpIdx, OffsetOpIdx)) 2226 return false; 2227 2228 if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx))) 2229 return false; 2230 2231 SDValue Base = N->getOperand(BaseOpIdx); 2232 2233 if (!Base.isMachineOpcode()) 2234 return false; 2235 2236 if (Base.getMachineOpcode() == RISCV::ADDI) { 2237 // If the base is an ADDI, we can merge it in to the load/store. 2238 } else if (Base.getMachineOpcode() == RISCV::ADDIW && 2239 isa<ConstantSDNode>(Base.getOperand(1)) && 2240 Base.getOperand(0).isMachineOpcode() && 2241 Base.getOperand(0).getMachineOpcode() == RISCV::LUI && 2242 isa<ConstantSDNode>(Base.getOperand(0).getOperand(0))) { 2243 // ADDIW can be merged if it's part of LUI+ADDIW constant materialization 2244 // and LUI+ADDI would have produced the same result. This is true for all 2245 // simm32 values except 0x7ffff800-0x7fffffff. 2246 int64_t Offset = 2247 SignExtend64<32>(Base.getOperand(0).getConstantOperandVal(0) << 12); 2248 Offset += cast<ConstantSDNode>(Base.getOperand(1))->getSExtValue(); 2249 if (!isInt<32>(Offset)) 2250 return false; 2251 } else 2252 return false; 2253 2254 SDValue ImmOperand = Base.getOperand(1); 2255 uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); 2256 2257 if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) { 2258 int64_t Offset1 = Const->getSExtValue(); 2259 int64_t CombinedOffset = Offset1 + Offset2; 2260 if (!isInt<12>(CombinedOffset)) 2261 return false; 2262 ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand), 2263 ImmOperand.getValueType()); 2264 } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) { 2265 // If the off1 in (addi base, off1) is a global variable's address (its 2266 // low part, really), then we can rely on the alignment of that variable 2267 // to provide a margin of safety before off1 can overflow the 12 bits. 2268 // Check if off2 falls within that margin; if so off1+off2 can't overflow. 2269 const DataLayout &DL = CurDAG->getDataLayout(); 2270 Align Alignment = GA->getGlobal()->getPointerAlignment(DL); 2271 if (Offset2 != 0 && Alignment <= Offset2) 2272 return false; 2273 int64_t Offset1 = GA->getOffset(); 2274 int64_t CombinedOffset = Offset1 + Offset2; 2275 ImmOperand = CurDAG->getTargetGlobalAddress( 2276 GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), 2277 CombinedOffset, GA->getTargetFlags()); 2278 } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) { 2279 // Ditto. 2280 Align Alignment = CP->getAlign(); 2281 if (Offset2 != 0 && Alignment <= Offset2) 2282 return false; 2283 int64_t Offset1 = CP->getOffset(); 2284 int64_t CombinedOffset = Offset1 + Offset2; 2285 ImmOperand = CurDAG->getTargetConstantPool( 2286 CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(), 2287 CombinedOffset, CP->getTargetFlags()); 2288 } else { 2289 return false; 2290 } 2291 2292 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 2293 LLVM_DEBUG(Base->dump(CurDAG)); 2294 LLVM_DEBUG(dbgs() << "\nN: "); 2295 LLVM_DEBUG(N->dump(CurDAG)); 2296 LLVM_DEBUG(dbgs() << "\n"); 2297 2298 // Modify the offset operand of the load/store. 2299 if (BaseOpIdx == 0) { // Load 2300 N = CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, 2301 N->getOperand(2)); 2302 } else { // Store 2303 N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), 2304 ImmOperand, N->getOperand(3)); 2305 } 2306 2307 return true; 2308 } 2309 2310 // Try to remove sext.w if the input is a W instruction or can be made into 2311 // a W instruction cheaply. 2312 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 2313 // Look for the sext.w pattern, addiw rd, rs1, 0. 2314 if (N->getMachineOpcode() != RISCV::ADDIW || 2315 !isNullConstant(N->getOperand(1))) 2316 return false; 2317 2318 SDValue N0 = N->getOperand(0); 2319 if (!N0.isMachineOpcode()) 2320 return false; 2321 2322 switch (N0.getMachineOpcode()) { 2323 default: 2324 break; 2325 case RISCV::ADD: 2326 case RISCV::ADDI: 2327 case RISCV::SUB: 2328 case RISCV::MUL: 2329 case RISCV::SLLI: { 2330 // Convert sext.w+add/sub/mul to their W instructions. This will create 2331 // a new independent instruction. This improves latency. 2332 unsigned Opc; 2333 switch (N0.getMachineOpcode()) { 2334 default: 2335 llvm_unreachable("Unexpected opcode!"); 2336 case RISCV::ADD: Opc = RISCV::ADDW; break; 2337 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 2338 case RISCV::SUB: Opc = RISCV::SUBW; break; 2339 case RISCV::MUL: Opc = RISCV::MULW; break; 2340 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 2341 } 2342 2343 SDValue N00 = N0.getOperand(0); 2344 SDValue N01 = N0.getOperand(1); 2345 2346 // Shift amount needs to be uimm5. 2347 if (N0.getMachineOpcode() == RISCV::SLLI && 2348 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 2349 break; 2350 2351 SDNode *Result = 2352 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 2353 N00, N01); 2354 ReplaceUses(N, Result); 2355 return true; 2356 } 2357 case RISCV::ADDW: 2358 case RISCV::ADDIW: 2359 case RISCV::SUBW: 2360 case RISCV::MULW: 2361 case RISCV::SLLIW: 2362 case RISCV::GREVIW: 2363 case RISCV::GORCIW: 2364 // Result is already sign extended just remove the sext.w. 2365 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 2366 ReplaceUses(N, N0.getNode()); 2367 return true; 2368 } 2369 2370 return false; 2371 } 2372 2373 // Optimize masked RVV pseudo instructions with a known all-ones mask to their 2374 // corresponding "unmasked" pseudo versions. The mask we're interested in will 2375 // take the form of a V0 physical register operand, with a glued 2376 // register-setting instruction. 2377 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { 2378 const RISCV::RISCVMaskedPseudoInfo *I = 2379 RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); 2380 if (!I) 2381 return false; 2382 2383 unsigned MaskOpIdx = I->MaskOpIdx; 2384 2385 // Check that we're using V0 as a mask register. 2386 if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) || 2387 cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0) 2388 return false; 2389 2390 // The glued user defines V0. 2391 const auto *Glued = N->getGluedNode(); 2392 2393 if (!Glued || Glued->getOpcode() != ISD::CopyToReg) 2394 return false; 2395 2396 // Check that we're defining V0 as a mask register. 2397 if (!isa<RegisterSDNode>(Glued->getOperand(1)) || 2398 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0) 2399 return false; 2400 2401 // Check the instruction defining V0; it needs to be a VMSET pseudo. 2402 SDValue MaskSetter = Glued->getOperand(2); 2403 2404 const auto IsVMSet = [](unsigned Opc) { 2405 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || 2406 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || 2407 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || 2408 Opc == RISCV::PseudoVMSET_M_B8; 2409 }; 2410 2411 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has 2412 // undefined behaviour if it's the wrong bitwidth, so we could choose to 2413 // assume that it's all-ones? Same applies to its VL. 2414 if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode())) 2415 return false; 2416 2417 // Retrieve the tail policy operand index, if any. 2418 Optional<unsigned> TailPolicyOpIdx; 2419 const RISCVInstrInfo *TII = static_cast<const RISCVInstrInfo *>( 2420 CurDAG->getSubtarget().getInstrInfo()); 2421 2422 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode()); 2423 2424 bool IsTA = true; 2425 if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) { 2426 // The last operand of the pseudo is the policy op, but we might have a 2427 // Glue operand last. We might also have a chain. 2428 TailPolicyOpIdx = N->getNumOperands() - 1; 2429 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Glue) 2430 (*TailPolicyOpIdx)--; 2431 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Other) 2432 (*TailPolicyOpIdx)--; 2433 2434 if (!(N->getConstantOperandVal(*TailPolicyOpIdx) & 2435 RISCVII::TAIL_AGNOSTIC)) { 2436 // Keep the true-masked instruction when there is no unmasked TU 2437 // instruction 2438 if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef()) 2439 return false; 2440 // We can't use TA if the tie-operand is not IMPLICIT_DEF 2441 if (!N->getOperand(0).isUndef()) 2442 IsTA = false; 2443 } 2444 } 2445 2446 if (IsTA) { 2447 uint64_t TSFlags = TII->get(I->UnmaskedPseudo).TSFlags; 2448 2449 // Check that we're dropping the merge operand, the mask operand, and any 2450 // policy operand when we transform to this unmasked pseudo. 2451 assert(!RISCVII::hasMergeOp(TSFlags) && RISCVII::hasDummyMaskOp(TSFlags) && 2452 !RISCVII::hasVecPolicyOp(TSFlags) && 2453 "Unexpected pseudo to transform to"); 2454 (void)TSFlags; 2455 } else { 2456 uint64_t TSFlags = TII->get(I->UnmaskedTUPseudo).TSFlags; 2457 2458 // Check that we're dropping the mask operand, and any policy operand 2459 // when we transform to this unmasked tu pseudo. 2460 assert(RISCVII::hasMergeOp(TSFlags) && RISCVII::hasDummyMaskOp(TSFlags) && 2461 !RISCVII::hasVecPolicyOp(TSFlags) && 2462 "Unexpected pseudo to transform to"); 2463 (void)TSFlags; 2464 } 2465 2466 unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo; 2467 SmallVector<SDValue, 8> Ops; 2468 // Skip the merge operand at index 0 if IsTA 2469 for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) { 2470 // Skip the mask, the policy, and the Glue. 2471 SDValue Op = N->getOperand(I); 2472 if (I == MaskOpIdx || I == TailPolicyOpIdx || 2473 Op.getValueType() == MVT::Glue) 2474 continue; 2475 Ops.push_back(Op); 2476 } 2477 2478 // Transitively apply any node glued to our new node. 2479 if (auto *TGlued = Glued->getGluedNode()) 2480 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); 2481 2482 SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 2483 ReplaceUses(N, Result); 2484 2485 return true; 2486 } 2487 2488 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 2489 // for instruction scheduling. 2490 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, 2491 CodeGenOpt::Level OptLevel) { 2492 return new RISCVDAGToDAGISel(TM, OptLevel); 2493 } 2494