1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISCV target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVMCTargetDesc.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCVISelLowering.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/IR/IntrinsicsRISCV.h" 20 #include "llvm/Support/Alignment.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/KnownBits.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "riscv-isel" 29 30 namespace llvm { 31 namespace RISCV { 32 #define GET_RISCVVSSEGTable_IMPL 33 #define GET_RISCVVLSEGTable_IMPL 34 #define GET_RISCVVLXSEGTable_IMPL 35 #define GET_RISCVVSXSEGTable_IMPL 36 #define GET_RISCVVLETable_IMPL 37 #define GET_RISCVVSETable_IMPL 38 #define GET_RISCVVLXTable_IMPL 39 #define GET_RISCVVSXTable_IMPL 40 #define GET_RISCVMaskedPseudosTable_IMPL 41 #include "RISCVGenSearchableTables.inc" 42 } // namespace RISCV 43 } // namespace llvm 44 45 void RISCVDAGToDAGISel::PreprocessISelDAG() { 46 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 47 48 bool MadeChange = false; 49 while (Position != CurDAG->allnodes_begin()) { 50 SDNode *N = &*--Position; 51 if (N->use_empty()) 52 continue; 53 54 SDValue Result; 55 switch (N->getOpcode()) { 56 case ISD::SPLAT_VECTOR: { 57 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point 58 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. 59 MVT VT = N->getSimpleValueType(0); 60 unsigned Opc = 61 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; 62 SDLoc DL(N); 63 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); 64 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), 65 N->getOperand(0), VL); 66 break; 67 } 68 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: { 69 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 70 // load. Done after lowering and combining so that we have a chance to 71 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 72 assert(N->getNumOperands() == 4 && "Unexpected number of operands"); 73 MVT VT = N->getSimpleValueType(0); 74 SDValue Passthru = N->getOperand(0); 75 SDValue Lo = N->getOperand(1); 76 SDValue Hi = N->getOperand(2); 77 SDValue VL = N->getOperand(3); 78 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 79 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 80 "Unexpected VTs!"); 81 MachineFunction &MF = CurDAG->getMachineFunction(); 82 RISCVMachineFunctionInfo *FuncInfo = 83 MF.getInfo<RISCVMachineFunctionInfo>(); 84 SDLoc DL(N); 85 86 // We use the same frame index we use for moving two i32s into 64-bit FPR. 87 // This is an analogous operation. 88 int FI = FuncInfo->getMoveF64FrameIndex(MF); 89 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 90 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); 91 SDValue StackSlot = 92 CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); 93 94 SDValue Chain = CurDAG->getEntryNode(); 95 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 96 97 SDValue OffsetSlot = 98 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); 99 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 100 Align(8)); 101 102 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 103 104 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 105 SDValue IntID = 106 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 107 SDValue Ops[] = {Chain, 108 IntID, 109 Passthru, 110 StackSlot, 111 CurDAG->getRegister(RISCV::X0, MVT::i64), 112 VL}; 113 114 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 115 MVT::i64, MPI, Align(8), 116 MachineMemOperand::MOLoad); 117 break; 118 } 119 } 120 121 if (Result) { 122 LLVM_DEBUG(dbgs() << "RISCV DAG preprocessing replacing:\nOld: "); 123 LLVM_DEBUG(N->dump(CurDAG)); 124 LLVM_DEBUG(dbgs() << "\nNew: "); 125 LLVM_DEBUG(Result->dump(CurDAG)); 126 LLVM_DEBUG(dbgs() << "\n"); 127 128 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 129 MadeChange = true; 130 } 131 } 132 133 if (MadeChange) 134 CurDAG->RemoveDeadNodes(); 135 } 136 137 void RISCVDAGToDAGISel::PostprocessISelDAG() { 138 HandleSDNode Dummy(CurDAG->getRoot()); 139 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 140 141 bool MadeChange = false; 142 while (Position != CurDAG->allnodes_begin()) { 143 SDNode *N = &*--Position; 144 // Skip dead nodes and any non-machine opcodes. 145 if (N->use_empty() || !N->isMachineOpcode()) 146 continue; 147 148 MadeChange |= doPeepholeSExtW(N); 149 MadeChange |= doPeepholeLoadStoreADDI(N); 150 MadeChange |= doPeepholeMaskedRVV(N); 151 } 152 153 CurDAG->setRoot(Dummy.getValue()); 154 155 if (MadeChange) 156 CurDAG->RemoveDeadNodes(); 157 } 158 159 // Returns true if N is a MachineSDNode that has a reg and simm12 memory 160 // operand. The indices of the base pointer and offset are returned in BaseOpIdx 161 // and OffsetOpIdx. 162 static bool hasMemOffset(SDNode *N, unsigned &BaseOpIdx, 163 unsigned &OffsetOpIdx) { 164 switch (N->getMachineOpcode()) { 165 case RISCV::LB: 166 case RISCV::LH: 167 case RISCV::LW: 168 case RISCV::LBU: 169 case RISCV::LHU: 170 case RISCV::LWU: 171 case RISCV::LD: 172 case RISCV::FLH: 173 case RISCV::FLW: 174 case RISCV::FLD: 175 BaseOpIdx = 0; 176 OffsetOpIdx = 1; 177 return true; 178 case RISCV::SB: 179 case RISCV::SH: 180 case RISCV::SW: 181 case RISCV::SD: 182 case RISCV::FSH: 183 case RISCV::FSW: 184 case RISCV::FSD: 185 BaseOpIdx = 1; 186 OffsetOpIdx = 2; 187 return true; 188 } 189 190 return false; 191 } 192 193 static SDNode *selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 194 RISCVMatInt::InstSeq &Seq) { 195 SDNode *Result = nullptr; 196 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT); 197 for (RISCVMatInt::Inst &Inst : Seq) { 198 SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, VT); 199 switch (Inst.getOpndKind()) { 200 case RISCVMatInt::Imm: 201 Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SDImm); 202 break; 203 case RISCVMatInt::RegX0: 204 Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, 205 CurDAG->getRegister(RISCV::X0, VT)); 206 break; 207 case RISCVMatInt::RegReg: 208 Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SrcReg); 209 break; 210 case RISCVMatInt::RegImm: 211 Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SDImm); 212 break; 213 } 214 215 // Only the first instruction has X0 as its source. 216 SrcReg = SDValue(Result, 0); 217 } 218 219 return Result; 220 } 221 222 static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 223 int64_t Imm, const RISCVSubtarget &Subtarget) { 224 RISCVMatInt::InstSeq Seq = 225 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); 226 227 return selectImmSeq(CurDAG, DL, VT, Seq); 228 } 229 230 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 231 unsigned NF, RISCVII::VLMUL LMUL) { 232 static const unsigned M1TupleRegClassIDs[] = { 233 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, 234 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, 235 RISCV::VRN8M1RegClassID}; 236 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID, 237 RISCV::VRN3M2RegClassID, 238 RISCV::VRN4M2RegClassID}; 239 240 assert(Regs.size() >= 2 && Regs.size() <= 8); 241 242 unsigned RegClassID; 243 unsigned SubReg0; 244 switch (LMUL) { 245 default: 246 llvm_unreachable("Invalid LMUL."); 247 case RISCVII::VLMUL::LMUL_F8: 248 case RISCVII::VLMUL::LMUL_F4: 249 case RISCVII::VLMUL::LMUL_F2: 250 case RISCVII::VLMUL::LMUL_1: 251 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 252 "Unexpected subreg numbering"); 253 SubReg0 = RISCV::sub_vrm1_0; 254 RegClassID = M1TupleRegClassIDs[NF - 2]; 255 break; 256 case RISCVII::VLMUL::LMUL_2: 257 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 258 "Unexpected subreg numbering"); 259 SubReg0 = RISCV::sub_vrm2_0; 260 RegClassID = M2TupleRegClassIDs[NF - 2]; 261 break; 262 case RISCVII::VLMUL::LMUL_4: 263 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 264 "Unexpected subreg numbering"); 265 SubReg0 = RISCV::sub_vrm4_0; 266 RegClassID = RISCV::VRN2M4RegClassID; 267 break; 268 } 269 270 SDLoc DL(Regs[0]); 271 SmallVector<SDValue, 8> Ops; 272 273 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); 274 275 for (unsigned I = 0; I < Regs.size(); ++I) { 276 Ops.push_back(Regs[I]); 277 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); 278 } 279 SDNode *N = 280 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 281 return SDValue(N, 0); 282 } 283 284 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 285 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 286 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 287 bool IsLoad, MVT *IndexVT) { 288 SDValue Chain = Node->getOperand(0); 289 SDValue Glue; 290 291 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer. 292 293 if (IsStridedOrIndexed) { 294 Operands.push_back(Node->getOperand(CurOp++)); // Index. 295 if (IndexVT) 296 *IndexVT = Operands.back()->getSimpleValueType(0); 297 } 298 299 if (IsMasked) { 300 // Mask needs to be copied to V0. 301 SDValue Mask = Node->getOperand(CurOp++); 302 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); 303 Glue = Chain.getValue(1); 304 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); 305 } 306 SDValue VL; 307 selectVLOp(Node->getOperand(CurOp++), VL); 308 Operands.push_back(VL); 309 310 MVT XLenVT = Subtarget->getXLenVT(); 311 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 312 Operands.push_back(SEWOp); 313 314 // Masked load has the tail policy argument. 315 if (IsMasked && IsLoad) { 316 // Policy must be a constant. 317 uint64_t Policy = Node->getConstantOperandVal(CurOp++); 318 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 319 Operands.push_back(PolicyOp); 320 } 321 322 Operands.push_back(Chain); // Chain. 323 if (Glue) 324 Operands.push_back(Glue); 325 } 326 327 static bool isAllUndef(ArrayRef<SDValue> Values) { 328 return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); }); 329 } 330 331 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, 332 bool IsStrided) { 333 SDLoc DL(Node); 334 unsigned NF = Node->getNumValues() - 1; 335 MVT VT = Node->getSimpleValueType(0); 336 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 337 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 338 339 unsigned CurOp = 2; 340 SmallVector<SDValue, 8> Operands; 341 342 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 343 Node->op_begin() + CurOp + NF); 344 bool IsTU = IsMasked || !isAllUndef(Regs); 345 if (IsTU) { 346 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL); 347 Operands.push_back(Merge); 348 } 349 CurOp += NF; 350 351 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 352 Operands, /*IsLoad=*/true); 353 354 const RISCV::VLSEGPseudo *P = 355 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 356 static_cast<unsigned>(LMUL)); 357 MachineSDNode *Load = 358 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 359 360 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 361 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 362 363 SDValue SuperReg = SDValue(Load, 0); 364 for (unsigned I = 0; I < NF; ++I) { 365 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 366 ReplaceUses(SDValue(Node, I), 367 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 368 } 369 370 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 371 CurDAG->RemoveDeadNode(Node); 372 } 373 374 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { 375 SDLoc DL(Node); 376 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. 377 MVT VT = Node->getSimpleValueType(0); 378 MVT XLenVT = Subtarget->getXLenVT(); 379 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 380 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 381 382 unsigned CurOp = 2; 383 SmallVector<SDValue, 7> Operands; 384 385 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 386 Node->op_begin() + CurOp + NF); 387 bool IsTU = IsMasked || !isAllUndef(Regs); 388 if (IsTU) { 389 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 390 Operands.push_back(MaskedOff); 391 } 392 CurOp += NF; 393 394 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 395 /*IsStridedOrIndexed*/ false, Operands, 396 /*IsLoad=*/true); 397 398 const RISCV::VLSEGPseudo *P = 399 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 400 Log2SEW, static_cast<unsigned>(LMUL)); 401 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 402 XLenVT, MVT::Other, Operands); 403 404 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 405 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 406 407 SDValue SuperReg = SDValue(Load, 0); 408 for (unsigned I = 0; I < NF; ++I) { 409 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 410 ReplaceUses(SDValue(Node, I), 411 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 412 } 413 414 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL 415 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain 416 CurDAG->RemoveDeadNode(Node); 417 } 418 419 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, 420 bool IsOrdered) { 421 SDLoc DL(Node); 422 unsigned NF = Node->getNumValues() - 1; 423 MVT VT = Node->getSimpleValueType(0); 424 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 425 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 426 427 unsigned CurOp = 2; 428 SmallVector<SDValue, 8> Operands; 429 430 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 431 Node->op_begin() + CurOp + NF); 432 bool IsTU = IsMasked || !isAllUndef(Regs); 433 if (IsTU) { 434 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 435 Operands.push_back(MaskedOff); 436 } 437 CurOp += NF; 438 439 MVT IndexVT; 440 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 441 /*IsStridedOrIndexed*/ true, Operands, 442 /*IsLoad=*/true, &IndexVT); 443 444 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 445 "Element count mismatch"); 446 447 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 448 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 449 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 450 report_fatal_error("The V extension does not support EEW=64 for index " 451 "values when XLEN=32"); 452 } 453 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 454 NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 455 static_cast<unsigned>(IndexLMUL)); 456 MachineSDNode *Load = 457 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 458 459 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 460 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 461 462 SDValue SuperReg = SDValue(Load, 0); 463 for (unsigned I = 0; I < NF; ++I) { 464 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 465 ReplaceUses(SDValue(Node, I), 466 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 467 } 468 469 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 470 CurDAG->RemoveDeadNode(Node); 471 } 472 473 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, 474 bool IsStrided) { 475 SDLoc DL(Node); 476 unsigned NF = Node->getNumOperands() - 4; 477 if (IsStrided) 478 NF--; 479 if (IsMasked) 480 NF--; 481 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 482 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 483 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 484 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 485 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 486 487 SmallVector<SDValue, 8> Operands; 488 Operands.push_back(StoreVal); 489 unsigned CurOp = 2 + NF; 490 491 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 492 Operands); 493 494 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 495 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 496 MachineSDNode *Store = 497 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 498 499 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 500 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 501 502 ReplaceNode(Node, Store); 503 } 504 505 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, 506 bool IsOrdered) { 507 SDLoc DL(Node); 508 unsigned NF = Node->getNumOperands() - 5; 509 if (IsMasked) 510 --NF; 511 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 512 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 513 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 514 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 515 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 516 517 SmallVector<SDValue, 8> Operands; 518 Operands.push_back(StoreVal); 519 unsigned CurOp = 2 + NF; 520 521 MVT IndexVT; 522 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 523 /*IsStridedOrIndexed*/ true, Operands, 524 /*IsLoad=*/false, &IndexVT); 525 526 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 527 "Element count mismatch"); 528 529 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 530 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 531 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 532 report_fatal_error("The V extension does not support EEW=64 for index " 533 "values when XLEN=32"); 534 } 535 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 536 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 537 static_cast<unsigned>(IndexLMUL)); 538 MachineSDNode *Store = 539 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 540 541 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 542 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 543 544 ReplaceNode(Node, Store); 545 } 546 547 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { 548 if (!Subtarget->hasVInstructions()) 549 return; 550 551 assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN || 552 Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) && 553 "Unexpected opcode"); 554 555 SDLoc DL(Node); 556 MVT XLenVT = Subtarget->getXLenVT(); 557 558 bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN; 559 unsigned IntNoOffset = HasChain ? 1 : 0; 560 unsigned IntNo = Node->getConstantOperandVal(IntNoOffset); 561 562 assert((IntNo == Intrinsic::riscv_vsetvli || 563 IntNo == Intrinsic::riscv_vsetvlimax || 564 IntNo == Intrinsic::riscv_vsetvli_opt || 565 IntNo == Intrinsic::riscv_vsetvlimax_opt) && 566 "Unexpected vsetvli intrinsic"); 567 568 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax || 569 IntNo == Intrinsic::riscv_vsetvlimax_opt; 570 unsigned Offset = IntNoOffset + (VLMax ? 1 : 2); 571 572 assert(Node->getNumOperands() == Offset + 2 && 573 "Unexpected number of operands"); 574 575 unsigned SEW = 576 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 577 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( 578 Node->getConstantOperandVal(Offset + 1) & 0x7); 579 580 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true, 581 /*MaskAgnostic*/ false); 582 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 583 584 SmallVector<EVT, 2> VTs = {XLenVT}; 585 if (HasChain) 586 VTs.push_back(MVT::Other); 587 588 SDValue VLOperand; 589 unsigned Opcode = RISCV::PseudoVSETVLI; 590 if (VLMax) { 591 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 592 Opcode = RISCV::PseudoVSETVLIX0; 593 } else { 594 VLOperand = Node->getOperand(IntNoOffset + 1); 595 596 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 597 uint64_t AVL = C->getZExtValue(); 598 if (isUInt<5>(AVL)) { 599 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 600 SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp}; 601 if (HasChain) 602 Ops.push_back(Node->getOperand(0)); 603 ReplaceNode( 604 Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops)); 605 return; 606 } 607 } 608 } 609 610 SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp}; 611 if (HasChain) 612 Ops.push_back(Node->getOperand(0)); 613 614 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops)); 615 } 616 617 void RISCVDAGToDAGISel::Select(SDNode *Node) { 618 // If we have a custom node, we have already selected. 619 if (Node->isMachineOpcode()) { 620 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 621 Node->setNodeId(-1); 622 return; 623 } 624 625 // Instruction Selection not handled by the auto-generated tablegen selection 626 // should be handled here. 627 unsigned Opcode = Node->getOpcode(); 628 MVT XLenVT = Subtarget->getXLenVT(); 629 SDLoc DL(Node); 630 MVT VT = Node->getSimpleValueType(0); 631 632 switch (Opcode) { 633 case ISD::Constant: { 634 auto *ConstNode = cast<ConstantSDNode>(Node); 635 if (VT == XLenVT && ConstNode->isZero()) { 636 SDValue New = 637 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT); 638 ReplaceNode(Node, New.getNode()); 639 return; 640 } 641 int64_t Imm = ConstNode->getSExtValue(); 642 // If the upper XLen-16 bits are not used, try to convert this to a simm12 643 // by sign extending bit 15. 644 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) && 645 hasAllHUsers(Node)) 646 Imm = SignExtend64<16>(Imm); 647 // If the upper 32-bits are not used try to convert this into a simm32 by 648 // sign extending bit 32. 649 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 650 Imm = SignExtend64<32>(Imm); 651 652 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); 653 return; 654 } 655 case ISD::ADD: { 656 // Try to select ADD + immediate used as memory addresses to 657 // (ADDI (ADD X, Imm-Lo12), Lo12) if it will allow the ADDI to be removed by 658 // doPeepholeLoadStoreADDI. 659 660 // LHS should be an immediate. 661 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 662 if (!N1C) 663 break; 664 665 int64_t Offset = N1C->getSExtValue(); 666 int64_t Lo12 = SignExtend64<12>(Offset); 667 668 // Don't do this if the lower 12 bits are 0 or we could use ADDI directly. 669 if (Lo12 == 0 || isInt<12>(Offset)) 670 break; 671 672 // Don't do this if we can use a pair of ADDIs. 673 if (isInt<12>(Offset / 2) && isInt<12>(Offset - Offset / 2)) 674 break; 675 676 RISCVMatInt::InstSeq Seq = 677 RISCVMatInt::generateInstSeq(Offset, Subtarget->getFeatureBits()); 678 679 Offset -= Lo12; 680 // Restore sign bits for RV32. 681 if (!Subtarget->is64Bit()) 682 Offset = SignExtend64<32>(Offset); 683 684 // We can fold if the last operation is an ADDI or its an ADDIW that could 685 // be treated as an ADDI. 686 if (Seq.back().Opc != RISCV::ADDI && 687 !(Seq.back().Opc == RISCV::ADDIW && isInt<32>(Offset))) 688 break; 689 assert(Seq.back().Imm == Lo12 && "Expected immediate to match Lo12"); 690 // Drop the last operation. 691 Seq.pop_back(); 692 assert(!Seq.empty() && "Expected more instructions in sequence"); 693 694 bool AllPointerUses = true; 695 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 696 SDNode *User = *UI; 697 698 // Is this user a memory instruction that uses a register and immediate 699 // that has this ADD as its pointer. 700 unsigned BaseOpIdx, OffsetOpIdx; 701 if (!User->isMachineOpcode() || 702 !hasMemOffset(User, BaseOpIdx, OffsetOpIdx) || 703 UI.getOperandNo() != BaseOpIdx) { 704 AllPointerUses = false; 705 break; 706 } 707 708 // If the memory instruction already has an offset, don't allow folding. 709 int64_t MemOffs = 710 cast<ConstantSDNode>(User->getOperand(OffsetOpIdx))->getSExtValue(); 711 if (MemOffs != 0) { 712 AllPointerUses = false; 713 break; 714 } 715 } 716 717 if (!AllPointerUses) 718 break; 719 720 // Emit (ADDI (ADD X, Hi), Lo) 721 SDNode *Imm = selectImmSeq(CurDAG, DL, VT, Seq); 722 SDNode *ADD = CurDAG->getMachineNode(RISCV::ADD, DL, VT, 723 Node->getOperand(0), SDValue(Imm, 0)); 724 SDNode *ADDI = 725 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, SDValue(ADD, 0), 726 CurDAG->getTargetConstant(Lo12, DL, VT)); 727 ReplaceNode(Node, ADDI); 728 return; 729 } 730 case ISD::SHL: { 731 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 732 if (!N1C) 733 break; 734 SDValue N0 = Node->getOperand(0); 735 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 736 !isa<ConstantSDNode>(N0.getOperand(1))) 737 break; 738 unsigned ShAmt = N1C->getZExtValue(); 739 uint64_t Mask = N0.getConstantOperandVal(1); 740 741 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has 742 // 32 leading zeros and C3 trailing zeros. 743 if (ShAmt <= 32 && isShiftedMask_64(Mask)) { 744 unsigned XLen = Subtarget->getXLen(); 745 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask)); 746 unsigned TrailingZeros = countTrailingZeros(Mask); 747 if (TrailingZeros > 0 && LeadingZeros == 32) { 748 SDNode *SRLIW = CurDAG->getMachineNode( 749 RISCV::SRLIW, DL, VT, N0->getOperand(0), 750 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 751 SDNode *SLLI = CurDAG->getMachineNode( 752 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 753 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT)); 754 ReplaceNode(Node, SLLI); 755 return; 756 } 757 } 758 break; 759 } 760 case ISD::SRL: { 761 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 762 if (!N1C) 763 break; 764 SDValue N0 = Node->getOperand(0); 765 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 766 !isa<ConstantSDNode>(N0.getOperand(1))) 767 break; 768 unsigned ShAmt = N1C->getZExtValue(); 769 uint64_t Mask = N0.getConstantOperandVal(1); 770 771 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has 772 // 32 leading zeros and C3 trailing zeros. 773 if (isShiftedMask_64(Mask)) { 774 unsigned XLen = Subtarget->getXLen(); 775 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask)); 776 unsigned TrailingZeros = countTrailingZeros(Mask); 777 if (LeadingZeros == 32 && TrailingZeros > ShAmt) { 778 SDNode *SRLIW = CurDAG->getMachineNode( 779 RISCV::SRLIW, DL, VT, N0->getOperand(0), 780 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 781 SDNode *SLLI = CurDAG->getMachineNode( 782 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 783 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT)); 784 ReplaceNode(Node, SLLI); 785 return; 786 } 787 } 788 789 // Optimize (srl (and X, C2), C) -> 790 // (srli (slli X, (XLen-C3), (XLen-C3) + C) 791 // Where C2 is a mask with C3 trailing ones. 792 // Taking into account that the C2 may have had lower bits unset by 793 // SimplifyDemandedBits. This avoids materializing the C2 immediate. 794 // This pattern occurs when type legalizing right shifts for types with 795 // less than XLen bits. 796 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 797 if (!isMask_64(Mask)) 798 break; 799 unsigned TrailingOnes = countTrailingOnes(Mask); 800 // 32 trailing ones should use srliw via tablegen pattern. 801 if (TrailingOnes == 32 || ShAmt >= TrailingOnes) 802 break; 803 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; 804 SDNode *SLLI = 805 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 806 CurDAG->getTargetConstant(LShAmt, DL, VT)); 807 SDNode *SRLI = CurDAG->getMachineNode( 808 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 809 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 810 ReplaceNode(Node, SRLI); 811 return; 812 } 813 case ISD::SRA: { 814 // Optimize (sra (sext_inreg X, i16), C) -> 815 // (srai (slli X, (XLen-16), (XLen-16) + C) 816 // And (sra (sext_inreg X, i8), C) -> 817 // (srai (slli X, (XLen-8), (XLen-8) + C) 818 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. 819 // This transform matches the code we get without Zbb. The shifts are more 820 // compressible, and this can help expose CSE opportunities in the sdiv by 821 // constant optimization. 822 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 823 if (!N1C) 824 break; 825 SDValue N0 = Node->getOperand(0); 826 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) 827 break; 828 unsigned ShAmt = N1C->getZExtValue(); 829 unsigned ExtSize = 830 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 831 // ExtSize of 32 should use sraiw via tablegen pattern. 832 if (ExtSize >= 32 || ShAmt >= ExtSize) 833 break; 834 unsigned LShAmt = Subtarget->getXLen() - ExtSize; 835 SDNode *SLLI = 836 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 837 CurDAG->getTargetConstant(LShAmt, DL, VT)); 838 SDNode *SRAI = CurDAG->getMachineNode( 839 RISCV::SRAI, DL, VT, SDValue(SLLI, 0), 840 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 841 ReplaceNode(Node, SRAI); 842 return; 843 } 844 case ISD::AND: { 845 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 846 if (!N1C) 847 break; 848 849 SDValue N0 = Node->getOperand(0); 850 851 bool LeftShift = N0.getOpcode() == ISD::SHL; 852 if (!LeftShift && N0.getOpcode() != ISD::SRL) 853 break; 854 855 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 856 if (!C) 857 break; 858 unsigned C2 = C->getZExtValue(); 859 unsigned XLen = Subtarget->getXLen(); 860 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!"); 861 862 uint64_t C1 = N1C->getZExtValue(); 863 864 // Keep track of whether this is a c.andi. If we can't use c.andi, the 865 // shift pair might offer more compression opportunities. 866 // TODO: We could check for C extension here, but we don't have many lit 867 // tests with the C extension enabled so not checking gets better coverage. 868 // TODO: What if ANDI faster than shift? 869 bool IsCANDI = isInt<6>(N1C->getSExtValue()); 870 871 // Clear irrelevant bits in the mask. 872 if (LeftShift) 873 C1 &= maskTrailingZeros<uint64_t>(C2); 874 else 875 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 876 877 // Some transforms should only be done if the shift has a single use or 878 // the AND would become (srli (slli X, 32), 32) 879 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 880 881 SDValue X = N0.getOperand(0); 882 883 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 884 // with c3 leading zeros. 885 if (!LeftShift && isMask_64(C1)) { 886 unsigned Leading = XLen - (64 - countLeadingZeros(C1)); 887 if (C2 < Leading) { 888 // If the number of leading zeros is C2+32 this can be SRLIW. 889 if (C2 + 32 == Leading) { 890 SDNode *SRLIW = CurDAG->getMachineNode( 891 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); 892 ReplaceNode(Node, SRLIW); 893 return; 894 } 895 896 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if 897 // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 898 // 899 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 900 // legalized and goes through DAG combine. 901 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() && 902 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 903 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { 904 SDNode *SRAIW = 905 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0), 906 CurDAG->getTargetConstant(31, DL, VT)); 907 SDNode *SRLIW = CurDAG->getMachineNode( 908 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0), 909 CurDAG->getTargetConstant(Leading - 32, DL, VT)); 910 ReplaceNode(Node, SRLIW); 911 return; 912 } 913 914 // (srli (slli x, c3-c2), c3). 915 // Skip if we could use (zext.w (sraiw X, C2)). 916 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 && 917 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 918 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32; 919 // Also Skip if we can use bexti. 920 Skip |= Subtarget->hasStdExtZbs() && Leading == XLen - 1; 921 if (OneUseOrZExtW && !Skip) { 922 SDNode *SLLI = CurDAG->getMachineNode( 923 RISCV::SLLI, DL, VT, X, 924 CurDAG->getTargetConstant(Leading - C2, DL, VT)); 925 SDNode *SRLI = CurDAG->getMachineNode( 926 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 927 CurDAG->getTargetConstant(Leading, DL, VT)); 928 ReplaceNode(Node, SRLI); 929 return; 930 } 931 } 932 } 933 934 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 935 // shifted by c2 bits with c3 leading zeros. 936 if (LeftShift && isShiftedMask_64(C1)) { 937 unsigned Leading = XLen - (64 - countLeadingZeros(C1)); 938 939 if (C2 + Leading < XLen && 940 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) { 941 // Use slli.uw when possible. 942 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) { 943 SDNode *SLLI_UW = CurDAG->getMachineNode( 944 RISCV::SLLI_UW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); 945 ReplaceNode(Node, SLLI_UW); 946 return; 947 } 948 949 // (srli (slli c2+c3), c3) 950 if (OneUseOrZExtW && !IsCANDI) { 951 SDNode *SLLI = CurDAG->getMachineNode( 952 RISCV::SLLI, DL, VT, X, 953 CurDAG->getTargetConstant(C2 + Leading, DL, VT)); 954 SDNode *SRLI = CurDAG->getMachineNode( 955 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 956 CurDAG->getTargetConstant(Leading, DL, VT)); 957 ReplaceNode(Node, SRLI); 958 return; 959 } 960 } 961 } 962 963 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 964 // shifted mask with c2 leading zeros and c3 trailing zeros. 965 if (!LeftShift && isShiftedMask_64(C1)) { 966 unsigned Leading = XLen - (64 - countLeadingZeros(C1)); 967 unsigned Trailing = countTrailingZeros(C1); 968 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW && !IsCANDI) { 969 unsigned SrliOpc = RISCV::SRLI; 970 // If the input is zexti32 we should use SRLIW. 971 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) && 972 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) { 973 SrliOpc = RISCV::SRLIW; 974 X = X.getOperand(0); 975 } 976 SDNode *SRLI = CurDAG->getMachineNode( 977 SrliOpc, DL, VT, X, 978 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 979 SDNode *SLLI = 980 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 981 CurDAG->getTargetConstant(Trailing, DL, VT)); 982 ReplaceNode(Node, SLLI); 983 return; 984 } 985 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 986 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 && 987 OneUseOrZExtW && !IsCANDI) { 988 SDNode *SRLIW = CurDAG->getMachineNode( 989 RISCV::SRLIW, DL, VT, X, 990 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 991 SDNode *SLLI = 992 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 993 CurDAG->getTargetConstant(Trailing, DL, VT)); 994 ReplaceNode(Node, SLLI); 995 return; 996 } 997 } 998 999 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 1000 // shifted mask with no leading zeros and c3 trailing zeros. 1001 if (LeftShift && isShiftedMask_64(C1)) { 1002 unsigned Leading = XLen - (64 - countLeadingZeros(C1)); 1003 unsigned Trailing = countTrailingZeros(C1); 1004 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) { 1005 SDNode *SRLI = CurDAG->getMachineNode( 1006 RISCV::SRLI, DL, VT, X, 1007 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1008 SDNode *SLLI = 1009 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 1010 CurDAG->getTargetConstant(Trailing, DL, VT)); 1011 ReplaceNode(Node, SLLI); 1012 return; 1013 } 1014 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 1015 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { 1016 SDNode *SRLIW = CurDAG->getMachineNode( 1017 RISCV::SRLIW, DL, VT, X, 1018 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1019 SDNode *SLLI = 1020 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1021 CurDAG->getTargetConstant(Trailing, DL, VT)); 1022 ReplaceNode(Node, SLLI); 1023 return; 1024 } 1025 } 1026 1027 break; 1028 } 1029 case ISD::MUL: { 1030 // Special case for calculating (mul (and X, C2), C1) where the full product 1031 // fits in XLen bits. We can shift X left by the number of leading zeros in 1032 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 1033 // product has XLen trailing zeros, putting it in the output of MULHU. This 1034 // can avoid materializing a constant in a register for C2. 1035 1036 // RHS should be a constant. 1037 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1038 if (!N1C || !N1C->hasOneUse()) 1039 break; 1040 1041 // LHS should be an AND with constant. 1042 SDValue N0 = Node->getOperand(0); 1043 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 1044 break; 1045 1046 uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 1047 1048 // Constant should be a mask. 1049 if (!isMask_64(C2)) 1050 break; 1051 1052 // This should be the only use of the AND unless we will use 1053 // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND 1054 // constants. 1055 if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF)) 1056 break; 1057 1058 // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this 1059 // optimization. 1060 if (isInt<12>(C2) || 1061 (C2 == UINT64_C(0xFFFF) && 1062 (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) || 1063 (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())) 1064 break; 1065 1066 // We need to shift left the AND input and C1 by a total of XLen bits. 1067 1068 // How far left do we need to shift the AND input? 1069 unsigned XLen = Subtarget->getXLen(); 1070 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2)); 1071 1072 // The constant gets shifted by the remaining amount unless that would 1073 // shift bits out. 1074 uint64_t C1 = N1C->getZExtValue(); 1075 unsigned ConstantShift = XLen - LeadingZeros; 1076 if (ConstantShift > (XLen - (64 - countLeadingZeros(C1)))) 1077 break; 1078 1079 uint64_t ShiftedC1 = C1 << ConstantShift; 1080 // If this RV32, we need to sign extend the constant. 1081 if (XLen == 32) 1082 ShiftedC1 = SignExtend64<32>(ShiftedC1); 1083 1084 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 1085 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); 1086 SDNode *SLLI = 1087 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 1088 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 1089 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 1090 SDValue(SLLI, 0), SDValue(Imm, 0)); 1091 ReplaceNode(Node, MULHU); 1092 return; 1093 } 1094 case ISD::INTRINSIC_WO_CHAIN: { 1095 unsigned IntNo = Node->getConstantOperandVal(0); 1096 switch (IntNo) { 1097 // By default we do not custom select any intrinsic. 1098 default: 1099 break; 1100 case Intrinsic::riscv_vmsgeu: 1101 case Intrinsic::riscv_vmsge: { 1102 SDValue Src1 = Node->getOperand(1); 1103 SDValue Src2 = Node->getOperand(2); 1104 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 1105 bool IsCmpUnsignedZero = false; 1106 // Only custom select scalar second operand. 1107 if (Src2.getValueType() != XLenVT) 1108 break; 1109 // Small constants are handled with patterns. 1110 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1111 int64_t CVal = C->getSExtValue(); 1112 if (CVal >= -15 && CVal <= 16) { 1113 if (!IsUnsigned || CVal != 0) 1114 break; 1115 IsCmpUnsignedZero = true; 1116 } 1117 } 1118 MVT Src1VT = Src1.getSimpleValueType(); 1119 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; 1120 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1121 default: 1122 llvm_unreachable("Unexpected LMUL!"); 1123 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 1124 case RISCVII::VLMUL::lmulenum: \ 1125 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1126 : RISCV::PseudoVMSLT_VX_##suffix; \ 1127 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 1128 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 1129 break; 1130 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1) 1131 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2) 1132 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4) 1133 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8) 1134 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16) 1135 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32) 1136 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64) 1137 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES 1138 } 1139 SDValue SEW = CurDAG->getTargetConstant( 1140 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1141 SDValue VL; 1142 selectVLOp(Node->getOperand(3), VL); 1143 1144 // If vmsgeu with 0 immediate, expand it to vmset. 1145 if (IsCmpUnsignedZero) { 1146 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW)); 1147 return; 1148 } 1149 1150 // Expand to 1151 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 1152 SDValue Cmp = SDValue( 1153 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1154 0); 1155 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 1156 {Cmp, Cmp, VL, SEW})); 1157 return; 1158 } 1159 case Intrinsic::riscv_vmsgeu_mask: 1160 case Intrinsic::riscv_vmsge_mask: { 1161 SDValue Src1 = Node->getOperand(2); 1162 SDValue Src2 = Node->getOperand(3); 1163 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 1164 bool IsCmpUnsignedZero = false; 1165 // Only custom select scalar second operand. 1166 if (Src2.getValueType() != XLenVT) 1167 break; 1168 // Small constants are handled with patterns. 1169 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1170 int64_t CVal = C->getSExtValue(); 1171 if (CVal >= -15 && CVal <= 16) { 1172 if (!IsUnsigned || CVal != 0) 1173 break; 1174 IsCmpUnsignedZero = true; 1175 } 1176 } 1177 MVT Src1VT = Src1.getSimpleValueType(); 1178 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, 1179 VMOROpcode; 1180 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1181 default: 1182 llvm_unreachable("Unexpected LMUL!"); 1183 #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \ 1184 case RISCVII::VLMUL::lmulenum: \ 1185 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1186 : RISCV::PseudoVMSLT_VX_##suffix; \ 1187 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 1188 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 1189 break; 1190 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1) 1191 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2) 1192 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4) 1193 CASE_VMSLT_OPCODES(LMUL_1, M1, B8) 1194 CASE_VMSLT_OPCODES(LMUL_2, M2, B16) 1195 CASE_VMSLT_OPCODES(LMUL_4, M4, B32) 1196 CASE_VMSLT_OPCODES(LMUL_8, M8, B64) 1197 #undef CASE_VMSLT_OPCODES 1198 } 1199 // Mask operations use the LMUL from the mask type. 1200 switch (RISCVTargetLowering::getLMUL(VT)) { 1201 default: 1202 llvm_unreachable("Unexpected LMUL!"); 1203 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ 1204 case RISCVII::VLMUL::lmulenum: \ 1205 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 1206 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 1207 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ 1208 break; 1209 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8) 1210 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4) 1211 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2) 1212 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1) 1213 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2) 1214 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4) 1215 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8) 1216 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES 1217 } 1218 SDValue SEW = CurDAG->getTargetConstant( 1219 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1220 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 1221 SDValue VL; 1222 selectVLOp(Node->getOperand(5), VL); 1223 SDValue MaskedOff = Node->getOperand(1); 1224 SDValue Mask = Node->getOperand(4); 1225 1226 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff. 1227 if (IsCmpUnsignedZero) { 1228 // We don't need vmor if the MaskedOff and the Mask are the same 1229 // value. 1230 if (Mask == MaskedOff) { 1231 ReplaceUses(Node, Mask.getNode()); 1232 return; 1233 } 1234 ReplaceNode(Node, 1235 CurDAG->getMachineNode(VMOROpcode, DL, VT, 1236 {Mask, MaskedOff, VL, MaskSEW})); 1237 return; 1238 } 1239 1240 // If the MaskedOff value and the Mask are the same value use 1241 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 1242 // This avoids needing to copy v0 to vd before starting the next sequence. 1243 if (Mask == MaskedOff) { 1244 SDValue Cmp = SDValue( 1245 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1246 0); 1247 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 1248 {Mask, Cmp, VL, MaskSEW})); 1249 return; 1250 } 1251 1252 // Mask needs to be copied to V0. 1253 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1254 RISCV::V0, Mask, SDValue()); 1255 SDValue Glue = Chain.getValue(1); 1256 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); 1257 1258 // Otherwise use 1259 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 1260 // The result is mask undisturbed. 1261 // We use the same instructions to emulate mask agnostic behavior, because 1262 // the agnostic result can be either undisturbed or all 1. 1263 SDValue Cmp = SDValue( 1264 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 1265 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 1266 0); 1267 // vmxor.mm vd, vd, v0 is used to update active value. 1268 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 1269 {Cmp, Mask, VL, MaskSEW})); 1270 return; 1271 } 1272 case Intrinsic::riscv_vsetvli_opt: 1273 case Intrinsic::riscv_vsetvlimax_opt: 1274 return selectVSETVLI(Node); 1275 } 1276 break; 1277 } 1278 case ISD::INTRINSIC_W_CHAIN: { 1279 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1280 switch (IntNo) { 1281 // By default we do not custom select any intrinsic. 1282 default: 1283 break; 1284 case Intrinsic::riscv_vsetvli: 1285 case Intrinsic::riscv_vsetvlimax: 1286 return selectVSETVLI(Node); 1287 case Intrinsic::riscv_vlseg2: 1288 case Intrinsic::riscv_vlseg3: 1289 case Intrinsic::riscv_vlseg4: 1290 case Intrinsic::riscv_vlseg5: 1291 case Intrinsic::riscv_vlseg6: 1292 case Intrinsic::riscv_vlseg7: 1293 case Intrinsic::riscv_vlseg8: { 1294 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1295 return; 1296 } 1297 case Intrinsic::riscv_vlseg2_mask: 1298 case Intrinsic::riscv_vlseg3_mask: 1299 case Intrinsic::riscv_vlseg4_mask: 1300 case Intrinsic::riscv_vlseg5_mask: 1301 case Intrinsic::riscv_vlseg6_mask: 1302 case Intrinsic::riscv_vlseg7_mask: 1303 case Intrinsic::riscv_vlseg8_mask: { 1304 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1305 return; 1306 } 1307 case Intrinsic::riscv_vlsseg2: 1308 case Intrinsic::riscv_vlsseg3: 1309 case Intrinsic::riscv_vlsseg4: 1310 case Intrinsic::riscv_vlsseg5: 1311 case Intrinsic::riscv_vlsseg6: 1312 case Intrinsic::riscv_vlsseg7: 1313 case Intrinsic::riscv_vlsseg8: { 1314 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1315 return; 1316 } 1317 case Intrinsic::riscv_vlsseg2_mask: 1318 case Intrinsic::riscv_vlsseg3_mask: 1319 case Intrinsic::riscv_vlsseg4_mask: 1320 case Intrinsic::riscv_vlsseg5_mask: 1321 case Intrinsic::riscv_vlsseg6_mask: 1322 case Intrinsic::riscv_vlsseg7_mask: 1323 case Intrinsic::riscv_vlsseg8_mask: { 1324 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1325 return; 1326 } 1327 case Intrinsic::riscv_vloxseg2: 1328 case Intrinsic::riscv_vloxseg3: 1329 case Intrinsic::riscv_vloxseg4: 1330 case Intrinsic::riscv_vloxseg5: 1331 case Intrinsic::riscv_vloxseg6: 1332 case Intrinsic::riscv_vloxseg7: 1333 case Intrinsic::riscv_vloxseg8: 1334 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1335 return; 1336 case Intrinsic::riscv_vluxseg2: 1337 case Intrinsic::riscv_vluxseg3: 1338 case Intrinsic::riscv_vluxseg4: 1339 case Intrinsic::riscv_vluxseg5: 1340 case Intrinsic::riscv_vluxseg6: 1341 case Intrinsic::riscv_vluxseg7: 1342 case Intrinsic::riscv_vluxseg8: 1343 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1344 return; 1345 case Intrinsic::riscv_vloxseg2_mask: 1346 case Intrinsic::riscv_vloxseg3_mask: 1347 case Intrinsic::riscv_vloxseg4_mask: 1348 case Intrinsic::riscv_vloxseg5_mask: 1349 case Intrinsic::riscv_vloxseg6_mask: 1350 case Intrinsic::riscv_vloxseg7_mask: 1351 case Intrinsic::riscv_vloxseg8_mask: 1352 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1353 return; 1354 case Intrinsic::riscv_vluxseg2_mask: 1355 case Intrinsic::riscv_vluxseg3_mask: 1356 case Intrinsic::riscv_vluxseg4_mask: 1357 case Intrinsic::riscv_vluxseg5_mask: 1358 case Intrinsic::riscv_vluxseg6_mask: 1359 case Intrinsic::riscv_vluxseg7_mask: 1360 case Intrinsic::riscv_vluxseg8_mask: 1361 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1362 return; 1363 case Intrinsic::riscv_vlseg8ff: 1364 case Intrinsic::riscv_vlseg7ff: 1365 case Intrinsic::riscv_vlseg6ff: 1366 case Intrinsic::riscv_vlseg5ff: 1367 case Intrinsic::riscv_vlseg4ff: 1368 case Intrinsic::riscv_vlseg3ff: 1369 case Intrinsic::riscv_vlseg2ff: { 1370 selectVLSEGFF(Node, /*IsMasked*/ false); 1371 return; 1372 } 1373 case Intrinsic::riscv_vlseg8ff_mask: 1374 case Intrinsic::riscv_vlseg7ff_mask: 1375 case Intrinsic::riscv_vlseg6ff_mask: 1376 case Intrinsic::riscv_vlseg5ff_mask: 1377 case Intrinsic::riscv_vlseg4ff_mask: 1378 case Intrinsic::riscv_vlseg3ff_mask: 1379 case Intrinsic::riscv_vlseg2ff_mask: { 1380 selectVLSEGFF(Node, /*IsMasked*/ true); 1381 return; 1382 } 1383 case Intrinsic::riscv_vloxei: 1384 case Intrinsic::riscv_vloxei_mask: 1385 case Intrinsic::riscv_vluxei: 1386 case Intrinsic::riscv_vluxei_mask: { 1387 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 1388 IntNo == Intrinsic::riscv_vluxei_mask; 1389 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 1390 IntNo == Intrinsic::riscv_vloxei_mask; 1391 1392 MVT VT = Node->getSimpleValueType(0); 1393 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1394 1395 unsigned CurOp = 2; 1396 // Masked intrinsic only have TU version pseduo instructions. 1397 bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef(); 1398 SmallVector<SDValue, 8> Operands; 1399 if (IsTU) 1400 Operands.push_back(Node->getOperand(CurOp++)); 1401 else 1402 // Skip the undef passthru operand for nomask TA version pseudo 1403 CurOp++; 1404 1405 MVT IndexVT; 1406 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1407 /*IsStridedOrIndexed*/ true, Operands, 1408 /*IsLoad=*/true, &IndexVT); 1409 1410 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1411 "Element count mismatch"); 1412 1413 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1414 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1415 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1416 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1417 report_fatal_error("The V extension does not support EEW=64 for index " 1418 "values when XLEN=32"); 1419 } 1420 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 1421 IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1422 static_cast<unsigned>(IndexLMUL)); 1423 MachineSDNode *Load = 1424 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1425 1426 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1427 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1428 1429 ReplaceNode(Node, Load); 1430 return; 1431 } 1432 case Intrinsic::riscv_vlm: 1433 case Intrinsic::riscv_vle: 1434 case Intrinsic::riscv_vle_mask: 1435 case Intrinsic::riscv_vlse: 1436 case Intrinsic::riscv_vlse_mask: { 1437 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 1438 IntNo == Intrinsic::riscv_vlse_mask; 1439 bool IsStrided = 1440 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 1441 1442 MVT VT = Node->getSimpleValueType(0); 1443 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1444 1445 unsigned CurOp = 2; 1446 // The riscv_vlm intrinsic are always tail agnostic and no passthru operand. 1447 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; 1448 // Masked intrinsic only have TU version pseduo instructions. 1449 bool IsTU = HasPassthruOperand && 1450 (IsMasked || !Node->getOperand(CurOp).isUndef()); 1451 SmallVector<SDValue, 8> Operands; 1452 if (IsTU) 1453 Operands.push_back(Node->getOperand(CurOp++)); 1454 else if (HasPassthruOperand) 1455 // Skip the undef passthru operand for nomask TA version pseudo 1456 CurOp++; 1457 1458 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1459 Operands, /*IsLoad=*/true); 1460 1461 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1462 const RISCV::VLEPseudo *P = 1463 RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 1464 static_cast<unsigned>(LMUL)); 1465 MachineSDNode *Load = 1466 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1467 1468 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1469 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1470 1471 ReplaceNode(Node, Load); 1472 return; 1473 } 1474 case Intrinsic::riscv_vleff: 1475 case Intrinsic::riscv_vleff_mask: { 1476 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 1477 1478 MVT VT = Node->getSimpleValueType(0); 1479 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1480 1481 unsigned CurOp = 2; 1482 // Masked intrinsic only have TU version pseduo instructions. 1483 bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef(); 1484 SmallVector<SDValue, 7> Operands; 1485 if (IsTU) 1486 Operands.push_back(Node->getOperand(CurOp++)); 1487 else 1488 // Skip the undef passthru operand for nomask TA version pseudo 1489 CurOp++; 1490 1491 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1492 /*IsStridedOrIndexed*/ false, Operands, 1493 /*IsLoad=*/true); 1494 1495 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1496 const RISCV::VLEPseudo *P = 1497 RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 1498 Log2SEW, static_cast<unsigned>(LMUL)); 1499 MachineSDNode *Load = CurDAG->getMachineNode( 1500 P->Pseudo, DL, Node->getVTList(), Operands); 1501 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1502 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1503 1504 ReplaceNode(Node, Load); 1505 return; 1506 } 1507 } 1508 break; 1509 } 1510 case ISD::INTRINSIC_VOID: { 1511 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1512 switch (IntNo) { 1513 case Intrinsic::riscv_vsseg2: 1514 case Intrinsic::riscv_vsseg3: 1515 case Intrinsic::riscv_vsseg4: 1516 case Intrinsic::riscv_vsseg5: 1517 case Intrinsic::riscv_vsseg6: 1518 case Intrinsic::riscv_vsseg7: 1519 case Intrinsic::riscv_vsseg8: { 1520 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1521 return; 1522 } 1523 case Intrinsic::riscv_vsseg2_mask: 1524 case Intrinsic::riscv_vsseg3_mask: 1525 case Intrinsic::riscv_vsseg4_mask: 1526 case Intrinsic::riscv_vsseg5_mask: 1527 case Intrinsic::riscv_vsseg6_mask: 1528 case Intrinsic::riscv_vsseg7_mask: 1529 case Intrinsic::riscv_vsseg8_mask: { 1530 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1531 return; 1532 } 1533 case Intrinsic::riscv_vssseg2: 1534 case Intrinsic::riscv_vssseg3: 1535 case Intrinsic::riscv_vssseg4: 1536 case Intrinsic::riscv_vssseg5: 1537 case Intrinsic::riscv_vssseg6: 1538 case Intrinsic::riscv_vssseg7: 1539 case Intrinsic::riscv_vssseg8: { 1540 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1541 return; 1542 } 1543 case Intrinsic::riscv_vssseg2_mask: 1544 case Intrinsic::riscv_vssseg3_mask: 1545 case Intrinsic::riscv_vssseg4_mask: 1546 case Intrinsic::riscv_vssseg5_mask: 1547 case Intrinsic::riscv_vssseg6_mask: 1548 case Intrinsic::riscv_vssseg7_mask: 1549 case Intrinsic::riscv_vssseg8_mask: { 1550 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1551 return; 1552 } 1553 case Intrinsic::riscv_vsoxseg2: 1554 case Intrinsic::riscv_vsoxseg3: 1555 case Intrinsic::riscv_vsoxseg4: 1556 case Intrinsic::riscv_vsoxseg5: 1557 case Intrinsic::riscv_vsoxseg6: 1558 case Intrinsic::riscv_vsoxseg7: 1559 case Intrinsic::riscv_vsoxseg8: 1560 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1561 return; 1562 case Intrinsic::riscv_vsuxseg2: 1563 case Intrinsic::riscv_vsuxseg3: 1564 case Intrinsic::riscv_vsuxseg4: 1565 case Intrinsic::riscv_vsuxseg5: 1566 case Intrinsic::riscv_vsuxseg6: 1567 case Intrinsic::riscv_vsuxseg7: 1568 case Intrinsic::riscv_vsuxseg8: 1569 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1570 return; 1571 case Intrinsic::riscv_vsoxseg2_mask: 1572 case Intrinsic::riscv_vsoxseg3_mask: 1573 case Intrinsic::riscv_vsoxseg4_mask: 1574 case Intrinsic::riscv_vsoxseg5_mask: 1575 case Intrinsic::riscv_vsoxseg6_mask: 1576 case Intrinsic::riscv_vsoxseg7_mask: 1577 case Intrinsic::riscv_vsoxseg8_mask: 1578 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1579 return; 1580 case Intrinsic::riscv_vsuxseg2_mask: 1581 case Intrinsic::riscv_vsuxseg3_mask: 1582 case Intrinsic::riscv_vsuxseg4_mask: 1583 case Intrinsic::riscv_vsuxseg5_mask: 1584 case Intrinsic::riscv_vsuxseg6_mask: 1585 case Intrinsic::riscv_vsuxseg7_mask: 1586 case Intrinsic::riscv_vsuxseg8_mask: 1587 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1588 return; 1589 case Intrinsic::riscv_vsoxei: 1590 case Intrinsic::riscv_vsoxei_mask: 1591 case Intrinsic::riscv_vsuxei: 1592 case Intrinsic::riscv_vsuxei_mask: { 1593 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 1594 IntNo == Intrinsic::riscv_vsuxei_mask; 1595 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 1596 IntNo == Intrinsic::riscv_vsoxei_mask; 1597 1598 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1599 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1600 1601 unsigned CurOp = 2; 1602 SmallVector<SDValue, 8> Operands; 1603 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1604 1605 MVT IndexVT; 1606 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1607 /*IsStridedOrIndexed*/ true, Operands, 1608 /*IsLoad=*/false, &IndexVT); 1609 1610 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1611 "Element count mismatch"); 1612 1613 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1614 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1615 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1616 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1617 report_fatal_error("The V extension does not support EEW=64 for index " 1618 "values when XLEN=32"); 1619 } 1620 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 1621 IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW, 1622 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); 1623 MachineSDNode *Store = 1624 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1625 1626 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1627 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1628 1629 ReplaceNode(Node, Store); 1630 return; 1631 } 1632 case Intrinsic::riscv_vsm: 1633 case Intrinsic::riscv_vse: 1634 case Intrinsic::riscv_vse_mask: 1635 case Intrinsic::riscv_vsse: 1636 case Intrinsic::riscv_vsse_mask: { 1637 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 1638 IntNo == Intrinsic::riscv_vsse_mask; 1639 bool IsStrided = 1640 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 1641 1642 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1643 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1644 1645 unsigned CurOp = 2; 1646 SmallVector<SDValue, 8> Operands; 1647 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1648 1649 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1650 Operands); 1651 1652 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1653 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 1654 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 1655 MachineSDNode *Store = 1656 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1657 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1658 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1659 1660 ReplaceNode(Node, Store); 1661 return; 1662 } 1663 } 1664 break; 1665 } 1666 case ISD::BITCAST: { 1667 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 1668 // Just drop bitcasts between vectors if both are fixed or both are 1669 // scalable. 1670 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 1671 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 1672 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 1673 CurDAG->RemoveDeadNode(Node); 1674 return; 1675 } 1676 break; 1677 } 1678 case ISD::INSERT_SUBVECTOR: { 1679 SDValue V = Node->getOperand(0); 1680 SDValue SubV = Node->getOperand(1); 1681 SDLoc DL(SubV); 1682 auto Idx = Node->getConstantOperandVal(2); 1683 MVT SubVecVT = SubV.getSimpleValueType(); 1684 1685 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1686 MVT SubVecContainerVT = SubVecVT; 1687 // Establish the correct scalable-vector types for any fixed-length type. 1688 if (SubVecVT.isFixedLengthVector()) 1689 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 1690 if (VT.isFixedLengthVector()) 1691 VT = TLI.getContainerForFixedLengthVector(VT); 1692 1693 const auto *TRI = Subtarget->getRegisterInfo(); 1694 unsigned SubRegIdx; 1695 std::tie(SubRegIdx, Idx) = 1696 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1697 VT, SubVecContainerVT, Idx, TRI); 1698 1699 // If the Idx hasn't been completely eliminated then this is a subvector 1700 // insert which doesn't naturally align to a vector register. These must 1701 // be handled using instructions to manipulate the vector registers. 1702 if (Idx != 0) 1703 break; 1704 1705 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); 1706 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 1707 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 1708 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 1709 (void)IsSubVecPartReg; // Silence unused variable warning without asserts. 1710 assert((!IsSubVecPartReg || V.isUndef()) && 1711 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 1712 "the subvector is smaller than a full-sized register"); 1713 1714 // If we haven't set a SubRegIdx, then we must be going between 1715 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 1716 if (SubRegIdx == RISCV::NoSubRegister) { 1717 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT); 1718 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1719 InRegClassID && 1720 "Unexpected subvector extraction"); 1721 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1722 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 1723 DL, VT, SubV, RC); 1724 ReplaceNode(Node, NewNode); 1725 return; 1726 } 1727 1728 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 1729 ReplaceNode(Node, Insert.getNode()); 1730 return; 1731 } 1732 case ISD::EXTRACT_SUBVECTOR: { 1733 SDValue V = Node->getOperand(0); 1734 auto Idx = Node->getConstantOperandVal(1); 1735 MVT InVT = V.getSimpleValueType(); 1736 SDLoc DL(V); 1737 1738 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1739 MVT SubVecContainerVT = VT; 1740 // Establish the correct scalable-vector types for any fixed-length type. 1741 if (VT.isFixedLengthVector()) 1742 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 1743 if (InVT.isFixedLengthVector()) 1744 InVT = TLI.getContainerForFixedLengthVector(InVT); 1745 1746 const auto *TRI = Subtarget->getRegisterInfo(); 1747 unsigned SubRegIdx; 1748 std::tie(SubRegIdx, Idx) = 1749 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1750 InVT, SubVecContainerVT, Idx, TRI); 1751 1752 // If the Idx hasn't been completely eliminated then this is a subvector 1753 // extract which doesn't naturally align to a vector register. These must 1754 // be handled using instructions to manipulate the vector registers. 1755 if (Idx != 0) 1756 break; 1757 1758 // If we haven't set a SubRegIdx, then we must be going between 1759 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 1760 if (SubRegIdx == RISCV::NoSubRegister) { 1761 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 1762 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1763 InRegClassID && 1764 "Unexpected subvector extraction"); 1765 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1766 SDNode *NewNode = 1767 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 1768 ReplaceNode(Node, NewNode); 1769 return; 1770 } 1771 1772 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 1773 ReplaceNode(Node, Extract.getNode()); 1774 return; 1775 } 1776 case ISD::SPLAT_VECTOR: 1777 case RISCVISD::VMV_S_X_VL: 1778 case RISCVISD::VFMV_S_F_VL: 1779 case RISCVISD::VMV_V_X_VL: 1780 case RISCVISD::VFMV_V_F_VL: { 1781 // Try to match splat of a scalar load to a strided load with stride of x0. 1782 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || 1783 Node->getOpcode() == RISCVISD::VFMV_S_F_VL; 1784 bool HasPassthruOperand = Node->getOpcode() != ISD::SPLAT_VECTOR; 1785 if (HasPassthruOperand && !Node->getOperand(0).isUndef()) 1786 break; 1787 SDValue Src = HasPassthruOperand ? Node->getOperand(1) : Node->getOperand(0); 1788 auto *Ld = dyn_cast<LoadSDNode>(Src); 1789 if (!Ld) 1790 break; 1791 EVT MemVT = Ld->getMemoryVT(); 1792 // The memory VT should be the same size as the element type. 1793 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 1794 break; 1795 if (!IsProfitableToFold(Src, Node, Node) || 1796 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 1797 break; 1798 1799 SDValue VL; 1800 if (Node->getOpcode() == ISD::SPLAT_VECTOR) 1801 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); 1802 else if (IsScalarMove) { 1803 // We could deal with more VL if we update the VSETVLI insert pass to 1804 // avoid introducing more VSETVLI. 1805 if (!isOneConstant(Node->getOperand(2))) 1806 break; 1807 selectVLOp(Node->getOperand(2), VL); 1808 } else 1809 selectVLOp(Node->getOperand(2), VL); 1810 1811 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1812 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 1813 1814 SDValue Operands[] = {Ld->getBasePtr(), 1815 CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW, 1816 Ld->getChain()}; 1817 1818 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1819 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 1820 /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false, 1821 Log2SEW, static_cast<unsigned>(LMUL)); 1822 MachineSDNode *Load = 1823 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1824 1825 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()}); 1826 1827 ReplaceNode(Node, Load); 1828 return; 1829 } 1830 } 1831 1832 // Select the default instruction. 1833 SelectCode(Node); 1834 } 1835 1836 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 1837 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 1838 switch (ConstraintID) { 1839 case InlineAsm::Constraint_m: 1840 // We just support simple memory operands that have a single address 1841 // operand and need no special handling. 1842 OutOps.push_back(Op); 1843 return false; 1844 case InlineAsm::Constraint_A: 1845 OutOps.push_back(Op); 1846 return false; 1847 default: 1848 break; 1849 } 1850 1851 return true; 1852 } 1853 1854 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base, 1855 SDValue &Offset) { 1856 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 1857 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1858 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT()); 1859 return true; 1860 } 1861 1862 return false; 1863 } 1864 1865 // Select a frame index and an optional immediate offset from an ADD or OR. 1866 bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, 1867 SDValue &Offset) { 1868 if (SelectAddrFrameIndex(Addr, Base, Offset)) 1869 return true; 1870 1871 if (!CurDAG->isBaseWithConstantOffset(Addr)) 1872 return false; 1873 1874 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { 1875 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 1876 if (isInt<12>(CVal)) { 1877 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), 1878 Subtarget->getXLenVT()); 1879 Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr), 1880 Subtarget->getXLenVT()); 1881 return true; 1882 } 1883 } 1884 1885 return false; 1886 } 1887 1888 // Fold constant addresses. 1889 static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, 1890 const MVT VT, const RISCVSubtarget *Subtarget, 1891 SDValue Addr, SDValue &Base, SDValue &Offset) { 1892 if (!isa<ConstantSDNode>(Addr)) 1893 return false; 1894 1895 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue(); 1896 1897 // If the constant is a simm12, we can fold the whole constant and use X0 as 1898 // the base. If the constant can be materialized with LUI+simm12, use LUI as 1899 // the base. We can't use generateInstSeq because it favors LUI+ADDIW. 1900 int64_t Lo12 = SignExtend64<12>(CVal); 1901 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12; 1902 if (!Subtarget->is64Bit() || isInt<32>(Hi)) { 1903 if (Hi) { 1904 int64_t Hi20 = (Hi >> 12) & 0xfffff; 1905 Base = SDValue( 1906 CurDAG->getMachineNode(RISCV::LUI, DL, VT, 1907 CurDAG->getTargetConstant(Hi20, DL, VT)), 1908 0); 1909 } else { 1910 Base = CurDAG->getRegister(RISCV::X0, VT); 1911 } 1912 Offset = CurDAG->getTargetConstant(Lo12, DL, VT); 1913 return true; 1914 } 1915 1916 // Ask how constant materialization would handle this constant. 1917 RISCVMatInt::InstSeq Seq = 1918 RISCVMatInt::generateInstSeq(CVal, Subtarget->getFeatureBits()); 1919 1920 // If the last instruction would be an ADDI, we can fold its immediate and 1921 // emit the rest of the sequence as the base. 1922 if (Seq.back().Opc != RISCV::ADDI) 1923 return false; 1924 Lo12 = Seq.back().Imm; 1925 1926 // Drop the last instruction. 1927 Seq.pop_back(); 1928 assert(!Seq.empty() && "Expected more instructions in sequence"); 1929 1930 Base = SDValue(selectImmSeq(CurDAG, DL, VT, Seq), 0); 1931 Offset = CurDAG->getTargetConstant(Lo12, DL, VT); 1932 return true; 1933 } 1934 1935 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, 1936 SDValue &Offset) { 1937 if (SelectAddrFrameIndex(Addr, Base, Offset)) 1938 return true; 1939 1940 SDLoc DL(Addr); 1941 MVT VT = Addr.getSimpleValueType(); 1942 1943 if (Addr.getOpcode() == RISCVISD::ADD_LO) { 1944 Base = Addr.getOperand(0); 1945 Offset = Addr.getOperand(1); 1946 return true; 1947 } 1948 1949 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1950 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 1951 if (isInt<12>(CVal)) { 1952 Base = Addr.getOperand(0); 1953 if (Base.getOpcode() == RISCVISD::ADD_LO) { 1954 SDValue LoOperand = Base.getOperand(1); 1955 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) { 1956 // If the Lo in (ADD_LO hi, lo) is a global variable's address 1957 // (its low part, really), then we can rely on the alignment of that 1958 // variable to provide a margin of safety before low part can overflow 1959 // the 12 bits of the load/store offset. Check if CVal falls within 1960 // that margin; if so (low part + CVal) can't overflow. 1961 const DataLayout &DL = CurDAG->getDataLayout(); 1962 Align Alignment = commonAlignment( 1963 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); 1964 if (CVal == 0 || Alignment > CVal) { 1965 int64_t CombinedOffset = CVal + GA->getOffset(); 1966 Base = Base.getOperand(0); 1967 Offset = CurDAG->getTargetGlobalAddress( 1968 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(), 1969 CombinedOffset, GA->getTargetFlags()); 1970 return true; 1971 } 1972 } 1973 } 1974 1975 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base)) 1976 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); 1977 Offset = CurDAG->getTargetConstant(CVal, DL, VT); 1978 return true; 1979 } 1980 } 1981 1982 // Handle ADD with large immediates. 1983 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) { 1984 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 1985 assert(!isInt<12>(CVal) && "simm12 not already handled?"); 1986 1987 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) { 1988 // We can use an ADDI for part of the offset and fold the rest into the 1989 // load/store. This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. 1990 int64_t Adj = CVal < 0 ? -2048 : 2047; 1991 Base = SDValue( 1992 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0), 1993 CurDAG->getTargetConstant(Adj, DL, VT)), 1994 0); 1995 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT); 1996 return true; 1997 } 1998 } 1999 2000 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset)) 2001 return true; 2002 2003 Base = Addr; 2004 Offset = CurDAG->getTargetConstant(0, DL, VT); 2005 return true; 2006 } 2007 2008 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 2009 SDValue &ShAmt) { 2010 // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift 2011 // amount. If there is an AND on the shift amount, we can bypass it if it 2012 // doesn't affect any of those bits. 2013 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 2014 const APInt &AndMask = N->getConstantOperandAPInt(1); 2015 2016 // Since the max shift amount is a power of 2 we can subtract 1 to make a 2017 // mask that covers the bits needed to represent all shift amounts. 2018 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 2019 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 2020 2021 if (ShMask.isSubsetOf(AndMask)) { 2022 ShAmt = N.getOperand(0); 2023 return true; 2024 } 2025 2026 // SimplifyDemandedBits may have optimized the mask so try restoring any 2027 // bits that are known zero. 2028 KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0)); 2029 if (ShMask.isSubsetOf(AndMask | Known.Zero)) { 2030 ShAmt = N.getOperand(0); 2031 return true; 2032 } 2033 } else if (N.getOpcode() == ISD::SUB && 2034 isa<ConstantSDNode>(N.getOperand(0))) { 2035 uint64_t Imm = N.getConstantOperandVal(0); 2036 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 2037 // generate a NEG instead of a SUB of a constant. 2038 if (Imm != 0 && Imm % ShiftWidth == 0) { 2039 SDLoc DL(N); 2040 EVT VT = N.getValueType(); 2041 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT); 2042 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; 2043 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, 2044 N.getOperand(1)); 2045 ShAmt = SDValue(Neg, 0); 2046 return true; 2047 } 2048 } 2049 2050 ShAmt = N; 2051 return true; 2052 } 2053 2054 bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { 2055 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 2056 cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { 2057 Val = N.getOperand(0); 2058 return true; 2059 } 2060 MVT VT = N.getSimpleValueType(); 2061 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { 2062 Val = N; 2063 return true; 2064 } 2065 2066 return false; 2067 } 2068 2069 bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { 2070 if (N.getOpcode() == ISD::AND) { 2071 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2072 if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) { 2073 Val = N.getOperand(0); 2074 return true; 2075 } 2076 } 2077 MVT VT = N.getSimpleValueType(); 2078 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32); 2079 if (CurDAG->MaskedValueIsZero(N, Mask)) { 2080 Val = N; 2081 return true; 2082 } 2083 2084 return false; 2085 } 2086 2087 /// Look for various patterns that can be done with a SHL that can be folded 2088 /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which 2089 /// SHXADD we are trying to match. 2090 bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt, 2091 SDValue &Val) { 2092 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 2093 SDValue N0 = N.getOperand(0); 2094 2095 bool LeftShift = N0.getOpcode() == ISD::SHL; 2096 if ((LeftShift || N0.getOpcode() == ISD::SRL) && 2097 isa<ConstantSDNode>(N0.getOperand(1))) { 2098 uint64_t Mask = N.getConstantOperandVal(1); 2099 unsigned C2 = N0.getConstantOperandVal(1); 2100 2101 unsigned XLen = Subtarget->getXLen(); 2102 if (LeftShift) 2103 Mask &= maskTrailingZeros<uint64_t>(C2); 2104 else 2105 Mask &= maskTrailingOnes<uint64_t>(XLen - C2); 2106 2107 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no 2108 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3 2109 // followed by a SHXADD with c3 for the X amount. 2110 if (isShiftedMask_64(Mask)) { 2111 unsigned Leading = XLen - (64 - countLeadingZeros(Mask)); 2112 unsigned Trailing = countTrailingZeros(Mask); 2113 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) { 2114 SDLoc DL(N); 2115 EVT VT = N.getValueType(); 2116 Val = SDValue(CurDAG->getMachineNode( 2117 RISCV::SRLI, DL, VT, N0.getOperand(0), 2118 CurDAG->getTargetConstant(Trailing - C2, DL, VT)), 2119 0); 2120 return true; 2121 } 2122 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2 2123 // leading zeros and c3 trailing zeros. We can use an SRLI by C3 2124 // followed by a SHXADD using c3 for the X amount. 2125 if (!LeftShift && Leading == C2 && Trailing == ShAmt) { 2126 SDLoc DL(N); 2127 EVT VT = N.getValueType(); 2128 Val = SDValue( 2129 CurDAG->getMachineNode( 2130 RISCV::SRLI, DL, VT, N0.getOperand(0), 2131 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)), 2132 0); 2133 return true; 2134 } 2135 } 2136 } 2137 } 2138 2139 bool LeftShift = N.getOpcode() == ISD::SHL; 2140 if ((LeftShift || N.getOpcode() == ISD::SRL) && 2141 isa<ConstantSDNode>(N.getOperand(1))) { 2142 SDValue N0 = N.getOperand(0); 2143 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && 2144 isa<ConstantSDNode>(N0.getOperand(1))) { 2145 uint64_t Mask = N0.getConstantOperandVal(1); 2146 if (isShiftedMask_64(Mask)) { 2147 unsigned C1 = N.getConstantOperandVal(1); 2148 unsigned XLen = Subtarget->getXLen(); 2149 unsigned Leading = XLen - (64 - countLeadingZeros(Mask)); 2150 unsigned Trailing = countTrailingZeros(Mask); 2151 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and 2152 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD. 2153 if (LeftShift && Leading == 32 && Trailing > 0 && 2154 (Trailing + C1) == ShAmt) { 2155 SDLoc DL(N); 2156 EVT VT = N.getValueType(); 2157 Val = SDValue(CurDAG->getMachineNode( 2158 RISCV::SRLIW, DL, VT, N0.getOperand(0), 2159 CurDAG->getTargetConstant(Trailing, DL, VT)), 2160 0); 2161 return true; 2162 } 2163 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and 2164 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD. 2165 if (!LeftShift && Leading == 32 && Trailing > C1 && 2166 (Trailing - C1) == ShAmt) { 2167 SDLoc DL(N); 2168 EVT VT = N.getValueType(); 2169 Val = SDValue(CurDAG->getMachineNode( 2170 RISCV::SRLIW, DL, VT, N0.getOperand(0), 2171 CurDAG->getTargetConstant(Trailing, DL, VT)), 2172 0); 2173 return true; 2174 } 2175 } 2176 } 2177 } 2178 2179 return false; 2180 } 2181 2182 // Return true if all users of this SDNode* only consume the lower \p Bits. 2183 // This can be used to form W instructions for add/sub/mul/shl even when the 2184 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 2185 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 2186 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 2187 // the add/sub/mul/shl to become non-W instructions. By checking the users we 2188 // may be able to use a W instruction and CSE with the other instruction if 2189 // this has happened. We could try to detect that the CSE opportunity exists 2190 // before doing this, but that would be more complicated. 2191 // TODO: Does this need to look through AND/OR/XOR to their users to find more 2192 // opportunities. 2193 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { 2194 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 2195 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 2196 Node->getOpcode() == ISD::SRL || 2197 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 2198 Node->getOpcode() == RISCVISD::GREV || 2199 Node->getOpcode() == RISCVISD::GORC || 2200 isa<ConstantSDNode>(Node)) && 2201 "Unexpected opcode"); 2202 2203 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 2204 SDNode *User = *UI; 2205 // Users of this node should have already been instruction selected 2206 if (!User->isMachineOpcode()) 2207 return false; 2208 2209 // TODO: Add more opcodes? 2210 switch (User->getMachineOpcode()) { 2211 default: 2212 return false; 2213 case RISCV::ADDW: 2214 case RISCV::ADDIW: 2215 case RISCV::SUBW: 2216 case RISCV::MULW: 2217 case RISCV::SLLW: 2218 case RISCV::SLLIW: 2219 case RISCV::SRAW: 2220 case RISCV::SRAIW: 2221 case RISCV::SRLW: 2222 case RISCV::SRLIW: 2223 case RISCV::DIVW: 2224 case RISCV::DIVUW: 2225 case RISCV::REMW: 2226 case RISCV::REMUW: 2227 case RISCV::ROLW: 2228 case RISCV::RORW: 2229 case RISCV::RORIW: 2230 case RISCV::CLZW: 2231 case RISCV::CTZW: 2232 case RISCV::CPOPW: 2233 case RISCV::SLLI_UW: 2234 case RISCV::FMV_W_X: 2235 case RISCV::FCVT_H_W: 2236 case RISCV::FCVT_H_WU: 2237 case RISCV::FCVT_S_W: 2238 case RISCV::FCVT_S_WU: 2239 case RISCV::FCVT_D_W: 2240 case RISCV::FCVT_D_WU: 2241 if (Bits < 32) 2242 return false; 2243 break; 2244 case RISCV::SLLI: 2245 // SLLI only uses the lower (XLen - ShAmt) bits. 2246 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1)) 2247 return false; 2248 break; 2249 case RISCV::ANDI: 2250 if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1)))) 2251 return false; 2252 break; 2253 case RISCV::SEXT_B: 2254 if (Bits < 8) 2255 return false; 2256 break; 2257 case RISCV::SEXT_H: 2258 case RISCV::FMV_H_X: 2259 case RISCV::ZEXT_H_RV32: 2260 case RISCV::ZEXT_H_RV64: 2261 if (Bits < 16) 2262 return false; 2263 break; 2264 case RISCV::ADD_UW: 2265 case RISCV::SH1ADD_UW: 2266 case RISCV::SH2ADD_UW: 2267 case RISCV::SH3ADD_UW: 2268 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 2269 // 32 bits. 2270 if (UI.getOperandNo() != 0 || Bits < 32) 2271 return false; 2272 break; 2273 case RISCV::SB: 2274 if (UI.getOperandNo() != 0 || Bits < 8) 2275 return false; 2276 break; 2277 case RISCV::SH: 2278 if (UI.getOperandNo() != 0 || Bits < 16) 2279 return false; 2280 break; 2281 case RISCV::SW: 2282 if (UI.getOperandNo() != 0 || Bits < 32) 2283 return false; 2284 break; 2285 } 2286 } 2287 2288 return true; 2289 } 2290 2291 // Select VL as a 5 bit immediate or a value that will become a register. This 2292 // allows us to choose betwen VSETIVLI or VSETVLI later. 2293 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 2294 auto *C = dyn_cast<ConstantSDNode>(N); 2295 if (C && isUInt<5>(C->getZExtValue())) { 2296 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 2297 N->getValueType(0)); 2298 } else if (C && C->isAllOnesValue()) { 2299 // Treat all ones as VLMax. 2300 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 2301 N->getValueType(0)); 2302 } else if (isa<RegisterSDNode>(N) && 2303 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) { 2304 // All our VL operands use an operand that allows GPRNoX0 or an immediate 2305 // as the register class. Convert X0 to a special immediate to pass the 2306 // MachineVerifier. This is recognized specially by the vsetvli insertion 2307 // pass. 2308 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 2309 N->getValueType(0)); 2310 } else { 2311 VL = N; 2312 } 2313 2314 return true; 2315 } 2316 2317 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 2318 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef()) 2319 return false; 2320 SplatVal = N.getOperand(1); 2321 return true; 2322 } 2323 2324 using ValidateFn = bool (*)(int64_t); 2325 2326 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, 2327 SelectionDAG &DAG, 2328 const RISCVSubtarget &Subtarget, 2329 ValidateFn ValidateImm) { 2330 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 2331 !isa<ConstantSDNode>(N.getOperand(1))) 2332 return false; 2333 2334 int64_t SplatImm = 2335 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 2336 2337 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand 2338 // type is wider than the resulting vector element type: an implicit 2339 // truncation first takes place. Therefore, perform a manual 2340 // truncation/sign-extension in order to ignore any truncated bits and catch 2341 // any zero-extended immediate. 2342 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 2343 // sign-extending to (XLenVT -1). 2344 MVT XLenVT = Subtarget.getXLenVT(); 2345 assert(XLenVT == N.getOperand(1).getSimpleValueType() && 2346 "Unexpected splat operand type"); 2347 MVT EltVT = N.getSimpleValueType().getVectorElementType(); 2348 if (EltVT.bitsLT(XLenVT)) 2349 SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits()); 2350 2351 if (!ValidateImm(SplatImm)) 2352 return false; 2353 2354 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT); 2355 return true; 2356 } 2357 2358 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 2359 return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget, 2360 [](int64_t Imm) { return isInt<5>(Imm); }); 2361 } 2362 2363 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 2364 return selectVSplatSimmHelper( 2365 N, SplatVal, *CurDAG, *Subtarget, 2366 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); 2367 } 2368 2369 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 2370 SDValue &SplatVal) { 2371 return selectVSplatSimmHelper( 2372 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { 2373 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 2374 }); 2375 } 2376 2377 bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { 2378 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 2379 !isa<ConstantSDNode>(N.getOperand(1))) 2380 return false; 2381 2382 int64_t SplatImm = 2383 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 2384 2385 if (!isUInt<5>(SplatImm)) 2386 return false; 2387 2388 SplatVal = 2389 CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT()); 2390 2391 return true; 2392 } 2393 2394 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 2395 SDValue &Imm) { 2396 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 2397 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 2398 2399 if (!isInt<5>(ImmVal)) 2400 return false; 2401 2402 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); 2403 return true; 2404 } 2405 2406 return false; 2407 } 2408 2409 // Merge an ADDI into the offset of a load/store instruction where possible. 2410 // (load (addi base, off1), off2) -> (load base, off1+off2) 2411 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) 2412 // (load (add base, (addi src, off1)), off2) 2413 // -> (load (add base, src), off1+off2) 2414 // (store val, (add base, (addi src, off1)), off2) 2415 // -> (store val, (add base, src), off1+off2) 2416 // This is possible when off1+off2 fits a 12-bit immediate. 2417 bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { 2418 unsigned OffsetOpIdx, BaseOpIdx; 2419 if (!hasMemOffset(N, BaseOpIdx, OffsetOpIdx)) 2420 return false; 2421 2422 if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx))) 2423 return false; 2424 2425 SDValue Base = N->getOperand(BaseOpIdx); 2426 2427 if (!Base.isMachineOpcode()) 2428 return false; 2429 2430 if (Base.getMachineOpcode() == RISCV::ADDI) { 2431 // If the base is an ADDI, we can merge it in to the load/store. 2432 } else if (Base.getMachineOpcode() == RISCV::ADDIW && 2433 isa<ConstantSDNode>(Base.getOperand(1)) && 2434 Base.getOperand(0).isMachineOpcode() && 2435 Base.getOperand(0).getMachineOpcode() == RISCV::LUI && 2436 isa<ConstantSDNode>(Base.getOperand(0).getOperand(0))) { 2437 // ADDIW can be merged if it's part of LUI+ADDIW constant materialization 2438 // and LUI+ADDI would have produced the same result. This is true for all 2439 // simm32 values except 0x7ffff800-0x7fffffff. 2440 int64_t Offset = 2441 SignExtend64<32>(Base.getOperand(0).getConstantOperandVal(0) << 12); 2442 Offset += cast<ConstantSDNode>(Base.getOperand(1))->getSExtValue(); 2443 if (!isInt<32>(Offset)) 2444 return false; 2445 } else 2446 return false; 2447 2448 SDValue ImmOperand = Base.getOperand(1); 2449 uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); 2450 2451 if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) { 2452 int64_t Offset1 = Const->getSExtValue(); 2453 int64_t CombinedOffset = Offset1 + Offset2; 2454 if (!isInt<12>(CombinedOffset)) 2455 return false; 2456 ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand), 2457 ImmOperand.getValueType()); 2458 } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) { 2459 // If the off1 in (addi base, off1) is a global variable's address (its 2460 // low part, really), then we can rely on the alignment of that variable 2461 // to provide a margin of safety before off1 can overflow the 12 bits. 2462 // Check if off2 falls within that margin; if so off1+off2 can't overflow. 2463 const DataLayout &DL = CurDAG->getDataLayout(); 2464 Align Alignment = commonAlignment(GA->getGlobal()->getPointerAlignment(DL), 2465 GA->getOffset()); 2466 if (Offset2 != 0 && Alignment <= Offset2) 2467 return false; 2468 int64_t Offset1 = GA->getOffset(); 2469 int64_t CombinedOffset = Offset1 + Offset2; 2470 ImmOperand = CurDAG->getTargetGlobalAddress( 2471 GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), 2472 CombinedOffset, GA->getTargetFlags()); 2473 } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) { 2474 // Ditto. 2475 Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset()); 2476 if (Offset2 != 0 && Alignment <= Offset2) 2477 return false; 2478 int64_t Offset1 = CP->getOffset(); 2479 int64_t CombinedOffset = Offset1 + Offset2; 2480 ImmOperand = CurDAG->getTargetConstantPool( 2481 CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(), 2482 CombinedOffset, CP->getTargetFlags()); 2483 } else { 2484 return false; 2485 } 2486 2487 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 2488 LLVM_DEBUG(Base->dump(CurDAG)); 2489 LLVM_DEBUG(dbgs() << "\nN: "); 2490 LLVM_DEBUG(N->dump(CurDAG)); 2491 LLVM_DEBUG(dbgs() << "\n"); 2492 2493 // Modify the offset operand of the load/store. 2494 if (BaseOpIdx == 0) { // Load 2495 N = CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, 2496 N->getOperand(2)); 2497 } else { // Store 2498 N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), 2499 ImmOperand, N->getOperand(3)); 2500 } 2501 2502 return true; 2503 } 2504 2505 // Try to remove sext.w if the input is a W instruction or can be made into 2506 // a W instruction cheaply. 2507 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 2508 // Look for the sext.w pattern, addiw rd, rs1, 0. 2509 if (N->getMachineOpcode() != RISCV::ADDIW || 2510 !isNullConstant(N->getOperand(1))) 2511 return false; 2512 2513 SDValue N0 = N->getOperand(0); 2514 if (!N0.isMachineOpcode()) 2515 return false; 2516 2517 switch (N0.getMachineOpcode()) { 2518 default: 2519 break; 2520 case RISCV::ADD: 2521 case RISCV::ADDI: 2522 case RISCV::SUB: 2523 case RISCV::MUL: 2524 case RISCV::SLLI: { 2525 // Convert sext.w+add/sub/mul to their W instructions. This will create 2526 // a new independent instruction. This improves latency. 2527 unsigned Opc; 2528 switch (N0.getMachineOpcode()) { 2529 default: 2530 llvm_unreachable("Unexpected opcode!"); 2531 case RISCV::ADD: Opc = RISCV::ADDW; break; 2532 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 2533 case RISCV::SUB: Opc = RISCV::SUBW; break; 2534 case RISCV::MUL: Opc = RISCV::MULW; break; 2535 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 2536 } 2537 2538 SDValue N00 = N0.getOperand(0); 2539 SDValue N01 = N0.getOperand(1); 2540 2541 // Shift amount needs to be uimm5. 2542 if (N0.getMachineOpcode() == RISCV::SLLI && 2543 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 2544 break; 2545 2546 SDNode *Result = 2547 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 2548 N00, N01); 2549 ReplaceUses(N, Result); 2550 return true; 2551 } 2552 case RISCV::ADDW: 2553 case RISCV::ADDIW: 2554 case RISCV::SUBW: 2555 case RISCV::MULW: 2556 case RISCV::SLLIW: 2557 case RISCV::GREVIW: 2558 case RISCV::GORCIW: 2559 // Result is already sign extended just remove the sext.w. 2560 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 2561 ReplaceUses(N, N0.getNode()); 2562 return true; 2563 } 2564 2565 return false; 2566 } 2567 2568 // Optimize masked RVV pseudo instructions with a known all-ones mask to their 2569 // corresponding "unmasked" pseudo versions. The mask we're interested in will 2570 // take the form of a V0 physical register operand, with a glued 2571 // register-setting instruction. 2572 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { 2573 const RISCV::RISCVMaskedPseudoInfo *I = 2574 RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); 2575 if (!I) 2576 return false; 2577 2578 unsigned MaskOpIdx = I->MaskOpIdx; 2579 2580 // Check that we're using V0 as a mask register. 2581 if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) || 2582 cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0) 2583 return false; 2584 2585 // The glued user defines V0. 2586 const auto *Glued = N->getGluedNode(); 2587 2588 if (!Glued || Glued->getOpcode() != ISD::CopyToReg) 2589 return false; 2590 2591 // Check that we're defining V0 as a mask register. 2592 if (!isa<RegisterSDNode>(Glued->getOperand(1)) || 2593 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0) 2594 return false; 2595 2596 // Check the instruction defining V0; it needs to be a VMSET pseudo. 2597 SDValue MaskSetter = Glued->getOperand(2); 2598 2599 const auto IsVMSet = [](unsigned Opc) { 2600 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || 2601 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || 2602 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || 2603 Opc == RISCV::PseudoVMSET_M_B8; 2604 }; 2605 2606 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has 2607 // undefined behaviour if it's the wrong bitwidth, so we could choose to 2608 // assume that it's all-ones? Same applies to its VL. 2609 if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode())) 2610 return false; 2611 2612 // Retrieve the tail policy operand index, if any. 2613 Optional<unsigned> TailPolicyOpIdx; 2614 const RISCVInstrInfo &TII = *Subtarget->getInstrInfo(); 2615 const MCInstrDesc &MaskedMCID = TII.get(N->getMachineOpcode()); 2616 2617 bool IsTA = true; 2618 if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) { 2619 // The last operand of the pseudo is the policy op, but we might have a 2620 // Glue operand last. We might also have a chain. 2621 TailPolicyOpIdx = N->getNumOperands() - 1; 2622 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Glue) 2623 (*TailPolicyOpIdx)--; 2624 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Other) 2625 (*TailPolicyOpIdx)--; 2626 2627 if (!(N->getConstantOperandVal(*TailPolicyOpIdx) & 2628 RISCVII::TAIL_AGNOSTIC)) { 2629 // Keep the true-masked instruction when there is no unmasked TU 2630 // instruction 2631 if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef()) 2632 return false; 2633 // We can't use TA if the tie-operand is not IMPLICIT_DEF 2634 if (!N->getOperand(0).isUndef()) 2635 IsTA = false; 2636 } 2637 } 2638 2639 unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo; 2640 2641 // Check that we're dropping the mask operand and any policy operand 2642 // when we transform to this unmasked pseudo. Additionally, if this insturtion 2643 // is tail agnostic, the unmasked instruction should not have a merge op. 2644 uint64_t TSFlags = TII.get(Opc).TSFlags; 2645 assert((IsTA != RISCVII::hasMergeOp(TSFlags)) && 2646 RISCVII::hasDummyMaskOp(TSFlags) && 2647 !RISCVII::hasVecPolicyOp(TSFlags) && 2648 "Unexpected pseudo to transform to"); 2649 (void)TSFlags; 2650 2651 SmallVector<SDValue, 8> Ops; 2652 // Skip the merge operand at index 0 if IsTA 2653 for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) { 2654 // Skip the mask, the policy, and the Glue. 2655 SDValue Op = N->getOperand(I); 2656 if (I == MaskOpIdx || I == TailPolicyOpIdx || 2657 Op.getValueType() == MVT::Glue) 2658 continue; 2659 Ops.push_back(Op); 2660 } 2661 2662 // Transitively apply any node glued to our new node. 2663 if (auto *TGlued = Glued->getGluedNode()) 2664 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); 2665 2666 SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 2667 ReplaceUses(N, Result); 2668 2669 return true; 2670 } 2671 2672 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 2673 // for instruction scheduling. 2674 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, 2675 CodeGenOpt::Level OptLevel) { 2676 return new RISCVDAGToDAGISel(TM, OptLevel); 2677 } 2678