1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/IntrinsicsRISCV.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/KnownBits.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "riscv-lower" 42 43 STATISTIC(NumTailCalls, "Number of tail calls"); 44 45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 46 const RISCVSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 if (Subtarget.isRV32E()) 50 report_fatal_error("Codegen not yet implemented for RV32E"); 51 52 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 53 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 54 55 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 56 !Subtarget.hasStdExtF()) { 57 errs() << "Hard-float 'f' ABI can't be used for a target that " 58 "doesn't support the F instruction set extension (ignoring " 59 "target-abi)\n"; 60 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 61 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 62 !Subtarget.hasStdExtD()) { 63 errs() << "Hard-float 'd' ABI can't be used for a target that " 64 "doesn't support the D instruction set extension (ignoring " 65 "target-abi)\n"; 66 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 67 } 68 69 switch (ABI) { 70 default: 71 report_fatal_error("Don't know how to lower this ABI"); 72 case RISCVABI::ABI_ILP32: 73 case RISCVABI::ABI_ILP32F: 74 case RISCVABI::ABI_ILP32D: 75 case RISCVABI::ABI_LP64: 76 case RISCVABI::ABI_LP64F: 77 case RISCVABI::ABI_LP64D: 78 break; 79 } 80 81 MVT XLenVT = Subtarget.getXLenVT(); 82 83 // Set up the register classes. 84 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 85 86 if (Subtarget.hasStdExtZfh()) 87 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 88 if (Subtarget.hasStdExtF()) 89 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 90 if (Subtarget.hasStdExtD()) 91 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 92 93 static const MVT::SimpleValueType BoolVecVTs[] = { 94 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 95 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 96 static const MVT::SimpleValueType IntVecVTs[] = { 97 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 98 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 99 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 100 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 101 MVT::nxv4i64, MVT::nxv8i64}; 102 static const MVT::SimpleValueType F16VecVTs[] = { 103 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 104 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 105 static const MVT::SimpleValueType F32VecVTs[] = { 106 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 107 static const MVT::SimpleValueType F64VecVTs[] = { 108 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 109 110 if (Subtarget.hasStdExtV()) { 111 auto addRegClassForRVV = [this](MVT VT) { 112 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 113 assert(Size <= 512 && isPowerOf2_32(Size)); 114 const TargetRegisterClass *RC; 115 if (Size <= 64) 116 RC = &RISCV::VRRegClass; 117 else if (Size == 128) 118 RC = &RISCV::VRM2RegClass; 119 else if (Size == 256) 120 RC = &RISCV::VRM4RegClass; 121 else 122 RC = &RISCV::VRM8RegClass; 123 124 addRegisterClass(VT, RC); 125 }; 126 127 for (MVT VT : BoolVecVTs) 128 addRegClassForRVV(VT); 129 for (MVT VT : IntVecVTs) 130 addRegClassForRVV(VT); 131 132 if (Subtarget.hasStdExtZfh()) 133 for (MVT VT : F16VecVTs) 134 addRegClassForRVV(VT); 135 136 if (Subtarget.hasStdExtF()) 137 for (MVT VT : F32VecVTs) 138 addRegClassForRVV(VT); 139 140 if (Subtarget.hasStdExtD()) 141 for (MVT VT : F64VecVTs) 142 addRegClassForRVV(VT); 143 144 if (Subtarget.useRVVForFixedLengthVectors()) { 145 auto addRegClassForFixedVectors = [this](MVT VT) { 146 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 147 const TargetRegisterClass *RC; 148 if (LMul == 1 || VT.getVectorElementType() == MVT::i1) 149 RC = &RISCV::VRRegClass; 150 else if (LMul == 2) 151 RC = &RISCV::VRM2RegClass; 152 else if (LMul == 4) 153 RC = &RISCV::VRM4RegClass; 154 else if (LMul == 8) 155 RC = &RISCV::VRM8RegClass; 156 else 157 llvm_unreachable("Unexpected LMul!"); 158 159 addRegisterClass(VT, RC); 160 }; 161 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 162 if (useRVVForFixedLengthVectorVT(VT)) 163 addRegClassForFixedVectors(VT); 164 165 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) 166 if (useRVVForFixedLengthVectorVT(VT)) 167 addRegClassForFixedVectors(VT); 168 } 169 } 170 171 // Compute derived properties from the register classes. 172 computeRegisterProperties(STI.getRegisterInfo()); 173 174 setStackPointerRegisterToSaveRestore(RISCV::X2); 175 176 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 177 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 178 179 // TODO: add all necessary setOperationAction calls. 180 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 181 182 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 183 setOperationAction(ISD::BR_CC, XLenVT, Expand); 184 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 185 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 186 187 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 188 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 189 190 setOperationAction(ISD::VASTART, MVT::Other, Custom); 191 setOperationAction(ISD::VAARG, MVT::Other, Expand); 192 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 193 setOperationAction(ISD::VAEND, MVT::Other, Expand); 194 195 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 196 if (!Subtarget.hasStdExtZbb()) { 197 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 198 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 199 } 200 201 if (Subtarget.is64Bit()) { 202 setOperationAction(ISD::ADD, MVT::i32, Custom); 203 setOperationAction(ISD::SUB, MVT::i32, Custom); 204 setOperationAction(ISD::SHL, MVT::i32, Custom); 205 setOperationAction(ISD::SRA, MVT::i32, Custom); 206 setOperationAction(ISD::SRL, MVT::i32, Custom); 207 208 setOperationAction(ISD::UADDO, MVT::i32, Custom); 209 setOperationAction(ISD::USUBO, MVT::i32, Custom); 210 setOperationAction(ISD::UADDSAT, MVT::i32, Custom); 211 setOperationAction(ISD::USUBSAT, MVT::i32, Custom); 212 } 213 214 if (!Subtarget.hasStdExtM()) { 215 setOperationAction(ISD::MUL, XLenVT, Expand); 216 setOperationAction(ISD::MULHS, XLenVT, Expand); 217 setOperationAction(ISD::MULHU, XLenVT, Expand); 218 setOperationAction(ISD::SDIV, XLenVT, Expand); 219 setOperationAction(ISD::UDIV, XLenVT, Expand); 220 setOperationAction(ISD::SREM, XLenVT, Expand); 221 setOperationAction(ISD::UREM, XLenVT, Expand); 222 } 223 224 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 225 setOperationAction(ISD::MUL, MVT::i32, Custom); 226 227 setOperationAction(ISD::SDIV, MVT::i8, Custom); 228 setOperationAction(ISD::UDIV, MVT::i8, Custom); 229 setOperationAction(ISD::UREM, MVT::i8, Custom); 230 setOperationAction(ISD::SDIV, MVT::i16, Custom); 231 setOperationAction(ISD::UDIV, MVT::i16, Custom); 232 setOperationAction(ISD::UREM, MVT::i16, Custom); 233 setOperationAction(ISD::SDIV, MVT::i32, Custom); 234 setOperationAction(ISD::UDIV, MVT::i32, Custom); 235 setOperationAction(ISD::UREM, MVT::i32, Custom); 236 } 237 238 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 239 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 240 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 241 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 242 243 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 244 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 245 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 246 247 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { 248 if (Subtarget.is64Bit()) { 249 setOperationAction(ISD::ROTL, MVT::i32, Custom); 250 setOperationAction(ISD::ROTR, MVT::i32, Custom); 251 } 252 } else { 253 setOperationAction(ISD::ROTL, XLenVT, Expand); 254 setOperationAction(ISD::ROTR, XLenVT, Expand); 255 } 256 257 if (Subtarget.hasStdExtZbp()) { 258 // Custom lower bswap/bitreverse so we can convert them to GREVI to enable 259 // more combining. 260 setOperationAction(ISD::BITREVERSE, XLenVT, Custom); 261 setOperationAction(ISD::BSWAP, XLenVT, Custom); 262 263 if (Subtarget.is64Bit()) { 264 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 265 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 266 } 267 } else { 268 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 269 // pattern match it directly in isel. 270 setOperationAction(ISD::BSWAP, XLenVT, 271 Subtarget.hasStdExtZbb() ? Legal : Expand); 272 } 273 274 if (Subtarget.hasStdExtZbb()) { 275 setOperationAction(ISD::SMIN, XLenVT, Legal); 276 setOperationAction(ISD::SMAX, XLenVT, Legal); 277 setOperationAction(ISD::UMIN, XLenVT, Legal); 278 setOperationAction(ISD::UMAX, XLenVT, Legal); 279 } else { 280 setOperationAction(ISD::CTTZ, XLenVT, Expand); 281 setOperationAction(ISD::CTLZ, XLenVT, Expand); 282 setOperationAction(ISD::CTPOP, XLenVT, Expand); 283 } 284 285 if (Subtarget.hasStdExtZbt()) { 286 setOperationAction(ISD::FSHL, XLenVT, Custom); 287 setOperationAction(ISD::FSHR, XLenVT, Custom); 288 setOperationAction(ISD::SELECT, XLenVT, Legal); 289 290 if (Subtarget.is64Bit()) { 291 setOperationAction(ISD::FSHL, MVT::i32, Custom); 292 setOperationAction(ISD::FSHR, MVT::i32, Custom); 293 } 294 } else { 295 setOperationAction(ISD::SELECT, XLenVT, Custom); 296 } 297 298 ISD::CondCode FPCCToExpand[] = { 299 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 300 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 301 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 302 303 ISD::NodeType FPOpToExpand[] = { 304 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, 305 ISD::FP_TO_FP16}; 306 307 if (Subtarget.hasStdExtZfh()) 308 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 309 310 if (Subtarget.hasStdExtZfh()) { 311 setOperationAction(ISD::FMINNUM, MVT::f16, Legal); 312 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); 313 for (auto CC : FPCCToExpand) 314 setCondCodeAction(CC, MVT::f16, Expand); 315 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 316 setOperationAction(ISD::SELECT, MVT::f16, Custom); 317 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 318 for (auto Op : FPOpToExpand) 319 setOperationAction(Op, MVT::f16, Expand); 320 } 321 322 if (Subtarget.hasStdExtF()) { 323 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 324 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 325 for (auto CC : FPCCToExpand) 326 setCondCodeAction(CC, MVT::f32, Expand); 327 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 328 setOperationAction(ISD::SELECT, MVT::f32, Custom); 329 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 330 for (auto Op : FPOpToExpand) 331 setOperationAction(Op, MVT::f32, Expand); 332 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 333 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 334 } 335 336 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 337 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 338 339 if (Subtarget.hasStdExtD()) { 340 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 341 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 342 for (auto CC : FPCCToExpand) 343 setCondCodeAction(CC, MVT::f64, Expand); 344 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 345 setOperationAction(ISD::SELECT, MVT::f64, Custom); 346 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 347 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 348 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 349 for (auto Op : FPOpToExpand) 350 setOperationAction(Op, MVT::f64, Expand); 351 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 352 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 353 } 354 355 if (Subtarget.is64Bit()) { 356 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 357 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 358 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 359 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 360 } 361 362 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 363 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 364 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 365 setOperationAction(ISD::JumpTable, XLenVT, Custom); 366 367 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 368 369 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 370 // Unfortunately this can't be determined just from the ISA naming string. 371 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 372 Subtarget.is64Bit() ? Legal : Custom); 373 374 setOperationAction(ISD::TRAP, MVT::Other, Legal); 375 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 376 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 377 378 if (Subtarget.hasStdExtA()) { 379 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 380 setMinCmpXchgSizeInBits(32); 381 } else { 382 setMaxAtomicSizeInBitsSupported(0); 383 } 384 385 setBooleanContents(ZeroOrOneBooleanContent); 386 387 if (Subtarget.hasStdExtV()) { 388 setBooleanVectorContents(ZeroOrOneBooleanContent); 389 390 setOperationAction(ISD::VSCALE, XLenVT, Custom); 391 392 // RVV intrinsics may have illegal operands. 393 // We also need to custom legalize vmv.x.s. 394 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); 395 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); 396 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 397 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); 398 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 399 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 400 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 401 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 402 403 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 404 405 if (!Subtarget.is64Bit()) { 406 // We must custom-lower certain vXi64 operations on RV32 due to the vector 407 // element type being illegal. 408 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); 409 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); 410 411 setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom); 412 setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom); 413 setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom); 414 setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom); 415 setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom); 416 setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom); 417 setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom); 418 setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom); 419 } 420 421 for (MVT VT : BoolVecVTs) { 422 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 423 424 // Mask VTs are custom-expanded into a series of standard nodes 425 setOperationAction(ISD::TRUNCATE, VT, Custom); 426 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 427 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 428 429 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 430 } 431 432 for (MVT VT : IntVecVTs) { 433 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 434 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); 435 436 setOperationAction(ISD::SMIN, VT, Legal); 437 setOperationAction(ISD::SMAX, VT, Legal); 438 setOperationAction(ISD::UMIN, VT, Legal); 439 setOperationAction(ISD::UMAX, VT, Legal); 440 441 setOperationAction(ISD::ROTL, VT, Expand); 442 setOperationAction(ISD::ROTR, VT, Expand); 443 444 // Custom-lower extensions and truncations from/to mask types. 445 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 446 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 447 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 448 449 // RVV has native int->float & float->int conversions where the 450 // element type sizes are within one power-of-two of each other. Any 451 // wider distances between type sizes have to be lowered as sequences 452 // which progressively narrow the gap in stages. 453 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 454 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 455 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 456 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 457 458 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" 459 // nodes which truncate by one power of two at a time. 460 setOperationAction(ISD::TRUNCATE, VT, Custom); 461 462 // Custom-lower insert/extract operations to simplify patterns. 463 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 464 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 465 466 // Custom-lower reduction operations to set up the corresponding custom 467 // nodes' operands. 468 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 469 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 470 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 471 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 472 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 473 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 474 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 475 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 476 477 setOperationAction(ISD::MLOAD, VT, Custom); 478 setOperationAction(ISD::MSTORE, VT, Custom); 479 setOperationAction(ISD::MGATHER, VT, Custom); 480 setOperationAction(ISD::MSCATTER, VT, Custom); 481 482 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 483 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 484 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 485 486 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 487 } 488 489 // Expand various CCs to best match the RVV ISA, which natively supports UNE 490 // but no other unordered comparisons, and supports all ordered comparisons 491 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 492 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 493 // and we pattern-match those back to the "original", swapping operands once 494 // more. This way we catch both operations and both "vf" and "fv" forms with 495 // fewer patterns. 496 ISD::CondCode VFPCCToExpand[] = { 497 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 498 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 499 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 500 }; 501 502 // Sets common operation actions on RVV floating-point vector types. 503 const auto SetCommonVFPActions = [&](MVT VT) { 504 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 505 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 506 // sizes are within one power-of-two of each other. Therefore conversions 507 // between vXf16 and vXf64 must be lowered as sequences which convert via 508 // vXf32. 509 setOperationAction(ISD::FP_ROUND, VT, Custom); 510 setOperationAction(ISD::FP_EXTEND, VT, Custom); 511 // Custom-lower insert/extract operations to simplify patterns. 512 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 513 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 514 // Expand various condition codes (explained above). 515 for (auto CC : VFPCCToExpand) 516 setCondCodeAction(CC, VT, Expand); 517 518 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 519 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 520 setOperationAction(ISD::FCOPYSIGN, VT, Legal); 521 522 setOperationAction(ISD::MLOAD, VT, Custom); 523 setOperationAction(ISD::MSTORE, VT, Custom); 524 setOperationAction(ISD::MGATHER, VT, Custom); 525 setOperationAction(ISD::MSCATTER, VT, Custom); 526 527 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 528 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 529 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 530 531 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 532 }; 533 534 if (Subtarget.hasStdExtZfh()) 535 for (MVT VT : F16VecVTs) 536 SetCommonVFPActions(VT); 537 538 if (Subtarget.hasStdExtF()) 539 for (MVT VT : F32VecVTs) 540 SetCommonVFPActions(VT); 541 542 if (Subtarget.hasStdExtD()) 543 for (MVT VT : F64VecVTs) 544 SetCommonVFPActions(VT); 545 546 if (Subtarget.useRVVForFixedLengthVectors()) { 547 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 548 if (!useRVVForFixedLengthVectorVT(VT)) 549 continue; 550 551 // By default everything must be expanded. 552 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 553 setOperationAction(Op, VT, Expand); 554 for (MVT OtherVT : MVT::fixedlen_vector_valuetypes()) 555 setTruncStoreAction(VT, OtherVT, Expand); 556 557 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 558 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 559 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 560 561 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 562 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 563 564 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 565 566 setOperationAction(ISD::LOAD, VT, Custom); 567 setOperationAction(ISD::STORE, VT, Custom); 568 569 setOperationAction(ISD::SETCC, VT, Custom); 570 571 setOperationAction(ISD::TRUNCATE, VT, Custom); 572 573 setOperationAction(ISD::BITCAST, VT, Custom); 574 575 // Operations below are different for between masks and other vectors. 576 if (VT.getVectorElementType() == MVT::i1) { 577 setOperationAction(ISD::AND, VT, Custom); 578 setOperationAction(ISD::OR, VT, Custom); 579 setOperationAction(ISD::XOR, VT, Custom); 580 continue; 581 } 582 583 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 584 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 585 586 setOperationAction(ISD::MLOAD, VT, Custom); 587 setOperationAction(ISD::MSTORE, VT, Custom); 588 setOperationAction(ISD::MGATHER, VT, Custom); 589 setOperationAction(ISD::MSCATTER, VT, Custom); 590 setOperationAction(ISD::ADD, VT, Custom); 591 setOperationAction(ISD::MUL, VT, Custom); 592 setOperationAction(ISD::SUB, VT, Custom); 593 setOperationAction(ISD::AND, VT, Custom); 594 setOperationAction(ISD::OR, VT, Custom); 595 setOperationAction(ISD::XOR, VT, Custom); 596 setOperationAction(ISD::SDIV, VT, Custom); 597 setOperationAction(ISD::SREM, VT, Custom); 598 setOperationAction(ISD::UDIV, VT, Custom); 599 setOperationAction(ISD::UREM, VT, Custom); 600 setOperationAction(ISD::SHL, VT, Custom); 601 setOperationAction(ISD::SRA, VT, Custom); 602 setOperationAction(ISD::SRL, VT, Custom); 603 604 setOperationAction(ISD::SMIN, VT, Custom); 605 setOperationAction(ISD::SMAX, VT, Custom); 606 setOperationAction(ISD::UMIN, VT, Custom); 607 setOperationAction(ISD::UMAX, VT, Custom); 608 setOperationAction(ISD::ABS, VT, Custom); 609 610 setOperationAction(ISD::MULHS, VT, Custom); 611 setOperationAction(ISD::MULHU, VT, Custom); 612 613 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 614 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 615 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 616 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 617 618 setOperationAction(ISD::VSELECT, VT, Custom); 619 620 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 621 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 622 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 623 624 // Custom-lower reduction operations to set up the corresponding custom 625 // nodes' operands. 626 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 627 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 628 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 629 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 630 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 631 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 632 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 633 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 634 } 635 636 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { 637 if (!useRVVForFixedLengthVectorVT(VT)) 638 continue; 639 640 // By default everything must be expanded. 641 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 642 setOperationAction(Op, VT, Expand); 643 for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) { 644 setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); 645 setTruncStoreAction(VT, OtherVT, Expand); 646 } 647 648 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 649 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 650 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 651 652 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 653 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 654 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 655 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 656 657 setOperationAction(ISD::LOAD, VT, Custom); 658 setOperationAction(ISD::STORE, VT, Custom); 659 setOperationAction(ISD::MLOAD, VT, Custom); 660 setOperationAction(ISD::MSTORE, VT, Custom); 661 setOperationAction(ISD::MGATHER, VT, Custom); 662 setOperationAction(ISD::MSCATTER, VT, Custom); 663 setOperationAction(ISD::FADD, VT, Custom); 664 setOperationAction(ISD::FSUB, VT, Custom); 665 setOperationAction(ISD::FMUL, VT, Custom); 666 setOperationAction(ISD::FDIV, VT, Custom); 667 setOperationAction(ISD::FNEG, VT, Custom); 668 setOperationAction(ISD::FABS, VT, Custom); 669 setOperationAction(ISD::FCOPYSIGN, VT, Custom); 670 setOperationAction(ISD::FSQRT, VT, Custom); 671 setOperationAction(ISD::FMA, VT, Custom); 672 673 setOperationAction(ISD::FP_ROUND, VT, Custom); 674 setOperationAction(ISD::FP_EXTEND, VT, Custom); 675 676 for (auto CC : VFPCCToExpand) 677 setCondCodeAction(CC, VT, Expand); 678 679 setOperationAction(ISD::VSELECT, VT, Custom); 680 681 setOperationAction(ISD::BITCAST, VT, Custom); 682 683 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 684 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 685 } 686 } 687 } 688 689 // Function alignments. 690 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 691 setMinFunctionAlignment(FunctionAlignment); 692 setPrefFunctionAlignment(FunctionAlignment); 693 694 setMinimumJumpTableEntries(5); 695 696 // Jumps are expensive, compared to logic 697 setJumpIsExpensive(); 698 699 // We can use any register for comparisons 700 setHasMultipleConditionRegisters(); 701 702 if (Subtarget.hasStdExtZbp()) { 703 setTargetDAGCombine(ISD::OR); 704 } 705 if (Subtarget.hasStdExtV()) { 706 setTargetDAGCombine(ISD::FCOPYSIGN); 707 setTargetDAGCombine(ISD::MGATHER); 708 setTargetDAGCombine(ISD::MSCATTER); 709 } 710 } 711 712 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 713 LLVMContext &Context, 714 EVT VT) const { 715 if (!VT.isVector()) 716 return getPointerTy(DL); 717 if (Subtarget.hasStdExtV() && 718 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) 719 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 720 return VT.changeVectorElementTypeToInteger(); 721 } 722 723 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 724 const CallInst &I, 725 MachineFunction &MF, 726 unsigned Intrinsic) const { 727 switch (Intrinsic) { 728 default: 729 return false; 730 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 731 case Intrinsic::riscv_masked_atomicrmw_add_i32: 732 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 733 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 734 case Intrinsic::riscv_masked_atomicrmw_max_i32: 735 case Intrinsic::riscv_masked_atomicrmw_min_i32: 736 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 737 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 738 case Intrinsic::riscv_masked_cmpxchg_i32: 739 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 740 Info.opc = ISD::INTRINSIC_W_CHAIN; 741 Info.memVT = MVT::getVT(PtrTy->getElementType()); 742 Info.ptrVal = I.getArgOperand(0); 743 Info.offset = 0; 744 Info.align = Align(4); 745 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 746 MachineMemOperand::MOVolatile; 747 return true; 748 } 749 } 750 751 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 752 const AddrMode &AM, Type *Ty, 753 unsigned AS, 754 Instruction *I) const { 755 // No global is ever allowed as a base. 756 if (AM.BaseGV) 757 return false; 758 759 // Require a 12-bit signed offset. 760 if (!isInt<12>(AM.BaseOffs)) 761 return false; 762 763 switch (AM.Scale) { 764 case 0: // "r+i" or just "i", depending on HasBaseReg. 765 break; 766 case 1: 767 if (!AM.HasBaseReg) // allow "r+i". 768 break; 769 return false; // disallow "r+r" or "r+r+i". 770 default: 771 return false; 772 } 773 774 return true; 775 } 776 777 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 778 return isInt<12>(Imm); 779 } 780 781 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 782 return isInt<12>(Imm); 783 } 784 785 // On RV32, 64-bit integers are split into their high and low parts and held 786 // in two different registers, so the trunc is free since the low register can 787 // just be used. 788 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 789 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 790 return false; 791 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 792 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 793 return (SrcBits == 64 && DestBits == 32); 794 } 795 796 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 797 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 798 !SrcVT.isInteger() || !DstVT.isInteger()) 799 return false; 800 unsigned SrcBits = SrcVT.getSizeInBits(); 801 unsigned DestBits = DstVT.getSizeInBits(); 802 return (SrcBits == 64 && DestBits == 32); 803 } 804 805 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 806 // Zexts are free if they can be combined with a load. 807 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 808 EVT MemVT = LD->getMemoryVT(); 809 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 810 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 811 (LD->getExtensionType() == ISD::NON_EXTLOAD || 812 LD->getExtensionType() == ISD::ZEXTLOAD)) 813 return true; 814 } 815 816 return TargetLowering::isZExtFree(Val, VT2); 817 } 818 819 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 820 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 821 } 822 823 bool RISCVTargetLowering::isCheapToSpeculateCttz() const { 824 return Subtarget.hasStdExtZbb(); 825 } 826 827 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { 828 return Subtarget.hasStdExtZbb(); 829 } 830 831 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 832 bool ForCodeSize) const { 833 if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) 834 return false; 835 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 836 return false; 837 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 838 return false; 839 if (Imm.isNegZero()) 840 return false; 841 return Imm.isZero(); 842 } 843 844 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 845 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || 846 (VT == MVT::f32 && Subtarget.hasStdExtF()) || 847 (VT == MVT::f64 && Subtarget.hasStdExtD()); 848 } 849 850 // Changes the condition code and swaps operands if necessary, so the SetCC 851 // operation matches one of the comparisons supported directly by branches 852 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare 853 // with 1/-1. 854 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, 855 ISD::CondCode &CC, SelectionDAG &DAG) { 856 // Convert X > -1 to X >= 0. 857 if (CC == ISD::SETGT && isAllOnesConstant(RHS)) { 858 RHS = DAG.getConstant(0, DL, RHS.getValueType()); 859 CC = ISD::SETGE; 860 return; 861 } 862 // Convert X < 1 to 0 >= X. 863 if (CC == ISD::SETLT && isOneConstant(RHS)) { 864 RHS = LHS; 865 LHS = DAG.getConstant(0, DL, RHS.getValueType()); 866 CC = ISD::SETGE; 867 return; 868 } 869 870 switch (CC) { 871 default: 872 break; 873 case ISD::SETGT: 874 case ISD::SETLE: 875 case ISD::SETUGT: 876 case ISD::SETULE: 877 CC = ISD::getSetCCSwappedOperands(CC); 878 std::swap(LHS, RHS); 879 break; 880 } 881 } 882 883 // Return the RISC-V branch opcode that matches the given DAG integer 884 // condition code. The CondCode must be one of those supported by the RISC-V 885 // ISA (see translateSetCCForBranch). 886 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 887 switch (CC) { 888 default: 889 llvm_unreachable("Unsupported CondCode"); 890 case ISD::SETEQ: 891 return RISCV::BEQ; 892 case ISD::SETNE: 893 return RISCV::BNE; 894 case ISD::SETLT: 895 return RISCV::BLT; 896 case ISD::SETGE: 897 return RISCV::BGE; 898 case ISD::SETULT: 899 return RISCV::BLTU; 900 case ISD::SETUGE: 901 return RISCV::BGEU; 902 } 903 } 904 905 RISCVVLMUL RISCVTargetLowering::getLMUL(MVT VT) { 906 assert(VT.isScalableVector() && "Expecting a scalable vector type"); 907 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); 908 if (VT.getVectorElementType() == MVT::i1) 909 KnownSize *= 8; 910 911 switch (KnownSize) { 912 default: 913 llvm_unreachable("Invalid LMUL."); 914 case 8: 915 return RISCVVLMUL::LMUL_F8; 916 case 16: 917 return RISCVVLMUL::LMUL_F4; 918 case 32: 919 return RISCVVLMUL::LMUL_F2; 920 case 64: 921 return RISCVVLMUL::LMUL_1; 922 case 128: 923 return RISCVVLMUL::LMUL_2; 924 case 256: 925 return RISCVVLMUL::LMUL_4; 926 case 512: 927 return RISCVVLMUL::LMUL_8; 928 } 929 } 930 931 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVVLMUL LMul) { 932 switch (LMul) { 933 default: 934 llvm_unreachable("Invalid LMUL."); 935 case RISCVVLMUL::LMUL_F8: 936 case RISCVVLMUL::LMUL_F4: 937 case RISCVVLMUL::LMUL_F2: 938 case RISCVVLMUL::LMUL_1: 939 return RISCV::VRRegClassID; 940 case RISCVVLMUL::LMUL_2: 941 return RISCV::VRM2RegClassID; 942 case RISCVVLMUL::LMUL_4: 943 return RISCV::VRM4RegClassID; 944 case RISCVVLMUL::LMUL_8: 945 return RISCV::VRM8RegClassID; 946 } 947 } 948 949 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { 950 RISCVVLMUL LMUL = getLMUL(VT); 951 if (LMUL == RISCVVLMUL::LMUL_F8 || LMUL == RISCVVLMUL::LMUL_F4 || 952 LMUL == RISCVVLMUL::LMUL_F2 || LMUL == RISCVVLMUL::LMUL_1) { 953 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 954 "Unexpected subreg numbering"); 955 return RISCV::sub_vrm1_0 + Index; 956 } 957 if (LMUL == RISCVVLMUL::LMUL_2) { 958 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 959 "Unexpected subreg numbering"); 960 return RISCV::sub_vrm2_0 + Index; 961 } 962 if (LMUL == RISCVVLMUL::LMUL_4) { 963 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 964 "Unexpected subreg numbering"); 965 return RISCV::sub_vrm4_0 + Index; 966 } 967 llvm_unreachable("Invalid vector type."); 968 } 969 970 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { 971 if (VT.getVectorElementType() == MVT::i1) 972 return RISCV::VRRegClassID; 973 return getRegClassIDForLMUL(getLMUL(VT)); 974 } 975 976 // Attempt to decompose a subvector insert/extract between VecVT and 977 // SubVecVT via subregister indices. Returns the subregister index that 978 // can perform the subvector insert/extract with the given element index, as 979 // well as the index corresponding to any leftover subvectors that must be 980 // further inserted/extracted within the register class for SubVecVT. 981 std::pair<unsigned, unsigned> 982 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 983 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, 984 const RISCVRegisterInfo *TRI) { 985 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && 986 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && 987 RISCV::VRM2RegClassID > RISCV::VRRegClassID), 988 "Register classes not ordered"); 989 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT); 990 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT); 991 // Try to compose a subregister index that takes us from the incoming 992 // LMUL>1 register class down to the outgoing one. At each step we half 993 // the LMUL: 994 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 995 // Note that this is not guaranteed to find a subregister index, such as 996 // when we are extracting from one VR type to another. 997 unsigned SubRegIdx = RISCV::NoSubRegister; 998 for (const unsigned RCID : 999 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) 1000 if (VecRegClassID > RCID && SubRegClassID <= RCID) { 1001 VecVT = VecVT.getHalfNumVectorElementsVT(); 1002 bool IsHi = 1003 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); 1004 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, 1005 getSubregIndexByMVT(VecVT, IsHi)); 1006 if (IsHi) 1007 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); 1008 } 1009 return {SubRegIdx, InsertExtractIdx}; 1010 } 1011 1012 // Return the largest legal scalable vector type that matches VT's element type. 1013 MVT RISCVTargetLowering::getContainerForFixedLengthVector( 1014 const TargetLowering &TLI, MVT VT, const RISCVSubtarget &Subtarget) { 1015 assert(VT.isFixedLengthVector() && TLI.isTypeLegal(VT) && 1016 "Expected legal fixed length vector!"); 1017 1018 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 1019 assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!"); 1020 1021 MVT EltVT = VT.getVectorElementType(); 1022 switch (EltVT.SimpleTy) { 1023 default: 1024 llvm_unreachable("unexpected element type for RVV container"); 1025 case MVT::i1: { 1026 // Masks are calculated assuming 8-bit elements since that's when we need 1027 // the most elements. 1028 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8; 1029 return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock); 1030 } 1031 case MVT::i8: 1032 case MVT::i16: 1033 case MVT::i32: 1034 case MVT::i64: 1035 case MVT::f16: 1036 case MVT::f32: 1037 case MVT::f64: { 1038 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits(); 1039 return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock); 1040 } 1041 } 1042 } 1043 1044 MVT RISCVTargetLowering::getContainerForFixedLengthVector( 1045 SelectionDAG &DAG, MVT VT, const RISCVSubtarget &Subtarget) { 1046 return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT, 1047 Subtarget); 1048 } 1049 1050 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const { 1051 return getContainerForFixedLengthVector(*this, VT, getSubtarget()); 1052 } 1053 1054 // Grow V to consume an entire RVV register. 1055 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 1056 const RISCVSubtarget &Subtarget) { 1057 assert(VT.isScalableVector() && 1058 "Expected to convert into a scalable vector!"); 1059 assert(V.getValueType().isFixedLengthVector() && 1060 "Expected a fixed length vector operand!"); 1061 SDLoc DL(V); 1062 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1063 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); 1064 } 1065 1066 // Shrink V so it's just big enough to maintain a VT's worth of data. 1067 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 1068 const RISCVSubtarget &Subtarget) { 1069 assert(VT.isFixedLengthVector() && 1070 "Expected to convert into a fixed length vector!"); 1071 assert(V.getValueType().isScalableVector() && 1072 "Expected a scalable vector operand!"); 1073 SDLoc DL(V); 1074 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1075 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); 1076 } 1077 1078 // Gets the two common "VL" operands: an all-ones mask and the vector length. 1079 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is 1080 // the vector type that it is contained in. 1081 static std::pair<SDValue, SDValue> 1082 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, 1083 const RISCVSubtarget &Subtarget) { 1084 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 1085 MVT XLenVT = Subtarget.getXLenVT(); 1086 SDValue VL = VecVT.isFixedLengthVector() 1087 ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT) 1088 : DAG.getRegister(RISCV::X0, XLenVT); 1089 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 1090 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 1091 return {Mask, VL}; 1092 } 1093 1094 // As above but assuming the given type is a scalable vector type. 1095 static std::pair<SDValue, SDValue> 1096 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG, 1097 const RISCVSubtarget &Subtarget) { 1098 assert(VecVT.isScalableVector() && "Expecting a scalable vector"); 1099 return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget); 1100 } 1101 1102 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few 1103 // of either is (currently) supported. This can get us into an infinite loop 1104 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR 1105 // as a ..., etc. 1106 // Until either (or both) of these can reliably lower any node, reporting that 1107 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks 1108 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack, 1109 // which is not desirable. 1110 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( 1111 EVT VT, unsigned DefinedValues) const { 1112 return false; 1113 } 1114 1115 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { 1116 // Only splats are currently supported. 1117 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) 1118 return true; 1119 1120 return false; 1121 } 1122 1123 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 1124 const RISCVSubtarget &Subtarget) { 1125 MVT VT = Op.getSimpleValueType(); 1126 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 1127 1128 MVT ContainerVT = 1129 RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); 1130 1131 SDLoc DL(Op); 1132 SDValue Mask, VL; 1133 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1134 1135 MVT XLenVT = Subtarget.getXLenVT(); 1136 unsigned NumElts = Op.getNumOperands(); 1137 1138 if (VT.getVectorElementType() == MVT::i1) { 1139 if (ISD::isBuildVectorAllZeros(Op.getNode())) { 1140 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); 1141 return convertFromScalableVector(VT, VMClr, DAG, Subtarget); 1142 } 1143 1144 if (ISD::isBuildVectorAllOnes(Op.getNode())) { 1145 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 1146 return convertFromScalableVector(VT, VMSet, DAG, Subtarget); 1147 } 1148 1149 // Lower constant mask BUILD_VECTORs via an integer vector type, in 1150 // scalar integer chunks whose bit-width depends on the number of mask 1151 // bits and XLEN. 1152 // First, determine the most appropriate scalar integer type to use. This 1153 // is at most XLenVT, but may be shrunk to a smaller vector element type 1154 // according to the size of the final vector - use i8 chunks rather than 1155 // XLenVT if we're producing a v8i1. This results in more consistent 1156 // codegen across RV32 and RV64. 1157 // If we have to use more than one INSERT_VECTOR_ELT then this optimization 1158 // is likely to increase code size; avoid peforming it in such a case. 1159 unsigned NumViaIntegerBits = 1160 std::min(std::max(NumElts, 8u), Subtarget.getXLen()); 1161 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && 1162 (!DAG.shouldOptForSize() || NumElts <= NumViaIntegerBits)) { 1163 // Now we can create our integer vector type. Note that it may be larger 1164 // than the resulting mask type: v4i1 would use v1i8 as its integer type. 1165 MVT IntegerViaVecVT = 1166 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits), 1167 divideCeil(NumElts, NumViaIntegerBits)); 1168 1169 uint64_t Bits = 0; 1170 unsigned BitPos = 0, IntegerEltIdx = 0; 1171 SDValue Vec = DAG.getUNDEF(IntegerViaVecVT); 1172 1173 for (unsigned I = 0; I < NumElts; I++, BitPos++) { 1174 // Once we accumulate enough bits to fill our scalar type, insert into 1175 // our vector and clear our accumulated data. 1176 if (I != 0 && I % NumViaIntegerBits == 0) { 1177 if (NumViaIntegerBits <= 32) 1178 Bits = SignExtend64(Bits, 32); 1179 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); 1180 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, 1181 Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT)); 1182 Bits = 0; 1183 BitPos = 0; 1184 IntegerEltIdx++; 1185 } 1186 SDValue V = Op.getOperand(I); 1187 bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue(); 1188 Bits |= ((uint64_t)BitValue << BitPos); 1189 } 1190 1191 // Insert the (remaining) scalar value into position in our integer 1192 // vector type. 1193 if (NumViaIntegerBits <= 32) 1194 Bits = SignExtend64(Bits, 32); 1195 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); 1196 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt, 1197 DAG.getConstant(IntegerEltIdx, DL, XLenVT)); 1198 1199 if (NumElts < NumViaIntegerBits) { 1200 // If we're producing a smaller vector than our minimum legal integer 1201 // type, bitcast to the equivalent (known-legal) mask type, and extract 1202 // our final mask. 1203 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type"); 1204 Vec = DAG.getBitcast(MVT::v8i1, Vec); 1205 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, 1206 DAG.getConstant(0, DL, XLenVT)); 1207 } else { 1208 // Else we must have produced an integer type with the same size as the 1209 // mask type; bitcast for the final result. 1210 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits()); 1211 Vec = DAG.getBitcast(VT, Vec); 1212 } 1213 1214 return Vec; 1215 } 1216 1217 return SDValue(); 1218 } 1219 1220 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 1221 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 1222 : RISCVISD::VMV_V_X_VL; 1223 Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL); 1224 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 1225 } 1226 1227 // Try and match an index sequence, which we can lower directly to the vid 1228 // instruction. An all-undef vector is matched by getSplatValue, above. 1229 if (VT.isInteger()) { 1230 bool IsVID = true; 1231 for (unsigned I = 0; I < NumElts && IsVID; I++) 1232 IsVID &= Op.getOperand(I).isUndef() || 1233 (isa<ConstantSDNode>(Op.getOperand(I)) && 1234 Op.getConstantOperandVal(I) == I); 1235 1236 if (IsVID) { 1237 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); 1238 return convertFromScalableVector(VT, VID, DAG, Subtarget); 1239 } 1240 } 1241 1242 // Attempt to detect "hidden" splats, which only reveal themselves as splats 1243 // when re-interpreted as a vector with a larger element type. For example, 1244 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1 1245 // could be instead splat as 1246 // v2i32 = build_vector i32 0x00010000, i32 0x00010000 1247 // TODO: This optimization could also work on non-constant splats, but it 1248 // would require bit-manipulation instructions to construct the splat value. 1249 SmallVector<SDValue> Sequence; 1250 unsigned EltBitSize = VT.getScalarSizeInBits(); 1251 const auto *BV = cast<BuildVectorSDNode>(Op); 1252 if (VT.isInteger() && EltBitSize < 64 && 1253 ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && 1254 BV->getRepeatedSequence(Sequence) && 1255 (Sequence.size() * EltBitSize) <= 64) { 1256 unsigned SeqLen = Sequence.size(); 1257 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen); 1258 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen); 1259 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || 1260 ViaIntVT == MVT::i64) && 1261 "Unexpected sequence type"); 1262 1263 unsigned EltIdx = 0; 1264 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize); 1265 uint64_t SplatValue = 0; 1266 // Construct the amalgamated value which can be splatted as this larger 1267 // vector type. 1268 for (const auto &SeqV : Sequence) { 1269 if (!SeqV.isUndef()) 1270 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask) 1271 << (EltIdx * EltBitSize)); 1272 EltIdx++; 1273 } 1274 1275 // On RV64, sign-extend from 32 to 64 bits where possible in order to 1276 // achieve better constant materializion. 1277 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) 1278 SplatValue = SignExtend64(SplatValue, 32); 1279 1280 // Since we can't introduce illegal i64 types at this stage, we can only 1281 // perform an i64 splat on RV32 if it is its own sign-extended value. That 1282 // way we can use RVV instructions to splat. 1283 assert((ViaIntVT.bitsLE(XLenVT) || 1284 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) && 1285 "Unexpected bitcast sequence"); 1286 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) { 1287 SDValue ViaVL = 1288 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT); 1289 MVT ViaContainerVT = 1290 RISCVTargetLowering::getContainerForFixedLengthVector(DAG, ViaVecVT, 1291 Subtarget); 1292 SDValue Splat = 1293 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT, 1294 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL); 1295 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget); 1296 return DAG.getBitcast(VT, Splat); 1297 } 1298 } 1299 1300 // Try and optimize BUILD_VECTORs with "dominant values" - these are values 1301 // which constitute a large proportion of the elements. In such cases we can 1302 // splat a vector with the dominant element and make up the shortfall with 1303 // INSERT_VECTOR_ELTs. 1304 // Note that this includes vectors of 2 elements by association. The 1305 // upper-most element is the "dominant" one, allowing us to use a splat to 1306 // "insert" the upper element, and an insert of the lower element at position 1307 // 0, which improves codegen. 1308 SDValue DominantValue; 1309 unsigned MostCommonCount = 0; 1310 DenseMap<SDValue, unsigned> ValueCounts; 1311 unsigned NumUndefElts = 1312 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); 1313 1314 for (SDValue V : Op->op_values()) { 1315 if (V.isUndef()) 1316 continue; 1317 1318 ValueCounts.insert(std::make_pair(V, 0)); 1319 unsigned &Count = ValueCounts[V]; 1320 1321 // Is this value dominant? In case of a tie, prefer the highest element as 1322 // it's cheaper to insert near the beginning of a vector than it is at the 1323 // end. 1324 if (++Count >= MostCommonCount) { 1325 DominantValue = V; 1326 MostCommonCount = Count; 1327 } 1328 } 1329 1330 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR"); 1331 unsigned NumDefElts = NumElts - NumUndefElts; 1332 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; 1333 1334 // Don't perform this optimization when optimizing for size, since 1335 // materializing elements and inserting them tends to cause code bloat. 1336 if (!DAG.shouldOptForSize() && 1337 ((MostCommonCount > DominantValueCountThreshold) || 1338 (ValueCounts.size() <= Log2_32(NumDefElts)))) { 1339 // Start by splatting the most common element. 1340 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); 1341 1342 DenseSet<SDValue> Processed{DominantValue}; 1343 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1); 1344 for (const auto &OpIdx : enumerate(Op->ops())) { 1345 const SDValue &V = OpIdx.value(); 1346 if (V.isUndef() || !Processed.insert(V).second) 1347 continue; 1348 if (ValueCounts[V] == 1) { 1349 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, 1350 DAG.getConstant(OpIdx.index(), DL, XLenVT)); 1351 } else { 1352 // Blend in all instances of this value using a VSELECT, using a 1353 // mask where each bit signals whether that element is the one 1354 // we're after. 1355 SmallVector<SDValue> Ops; 1356 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) { 1357 return DAG.getConstant(V == V1, DL, XLenVT); 1358 }); 1359 Vec = DAG.getNode(ISD::VSELECT, DL, VT, 1360 DAG.getBuildVector(SelMaskTy, DL, Ops), 1361 DAG.getSplatBuildVector(VT, DL, V), Vec); 1362 } 1363 } 1364 1365 return Vec; 1366 } 1367 1368 return SDValue(); 1369 } 1370 1371 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, 1372 const RISCVSubtarget &Subtarget) { 1373 SDValue V1 = Op.getOperand(0); 1374 SDValue V2 = Op.getOperand(1); 1375 SDLoc DL(Op); 1376 MVT XLenVT = Subtarget.getXLenVT(); 1377 MVT VT = Op.getSimpleValueType(); 1378 unsigned NumElts = VT.getVectorNumElements(); 1379 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 1380 1381 if (SVN->isSplat()) { 1382 int Lane = SVN->getSplatIndex(); 1383 if (Lane >= 0) { 1384 MVT ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( 1385 DAG, VT, Subtarget); 1386 1387 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 1388 assert(Lane < (int)NumElts && "Unexpected lane!"); 1389 1390 SDValue Mask, VL; 1391 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1392 SDValue Gather = 1393 DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1, 1394 DAG.getConstant(Lane, DL, XLenVT), Mask, VL); 1395 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 1396 } 1397 } 1398 1399 // Detect shuffles which can be re-expressed as vector selects. 1400 SmallVector<SDValue> MaskVals; 1401 // By default we preserve the original operand order, and select LHS as true 1402 // and RHS as false. However, since RVV vector selects may feature splats but 1403 // only on the LHS, we may choose to invert our mask and instead select 1404 // between RHS and LHS. 1405 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1); 1406 1407 bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) { 1408 int MaskIndex = MaskIdx.value(); 1409 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps; 1410 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); 1411 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts; 1412 }); 1413 1414 if (IsSelect) { 1415 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); 1416 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); 1417 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); 1418 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SwapOps ? V2 : V1, 1419 SwapOps ? V1 : V2); 1420 } 1421 1422 return SDValue(); 1423 } 1424 1425 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT, 1426 SDLoc DL, SelectionDAG &DAG, 1427 const RISCVSubtarget &Subtarget) { 1428 if (VT.isScalableVector()) 1429 return DAG.getFPExtendOrRound(Op, DL, VT); 1430 assert(VT.isFixedLengthVector() && 1431 "Unexpected value type for RVV FP extend/round lowering"); 1432 SDValue Mask, VL; 1433 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1434 unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType()) 1435 ? RISCVISD::FP_EXTEND_VL 1436 : RISCVISD::FP_ROUND_VL; 1437 return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL); 1438 } 1439 1440 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 1441 SelectionDAG &DAG) const { 1442 switch (Op.getOpcode()) { 1443 default: 1444 report_fatal_error("unimplemented operand"); 1445 case ISD::GlobalAddress: 1446 return lowerGlobalAddress(Op, DAG); 1447 case ISD::BlockAddress: 1448 return lowerBlockAddress(Op, DAG); 1449 case ISD::ConstantPool: 1450 return lowerConstantPool(Op, DAG); 1451 case ISD::JumpTable: 1452 return lowerJumpTable(Op, DAG); 1453 case ISD::GlobalTLSAddress: 1454 return lowerGlobalTLSAddress(Op, DAG); 1455 case ISD::SELECT: 1456 return lowerSELECT(Op, DAG); 1457 case ISD::BRCOND: 1458 return lowerBRCOND(Op, DAG); 1459 case ISD::VASTART: 1460 return lowerVASTART(Op, DAG); 1461 case ISD::FRAMEADDR: 1462 return lowerFRAMEADDR(Op, DAG); 1463 case ISD::RETURNADDR: 1464 return lowerRETURNADDR(Op, DAG); 1465 case ISD::SHL_PARTS: 1466 return lowerShiftLeftParts(Op, DAG); 1467 case ISD::SRA_PARTS: 1468 return lowerShiftRightParts(Op, DAG, true); 1469 case ISD::SRL_PARTS: 1470 return lowerShiftRightParts(Op, DAG, false); 1471 case ISD::BITCAST: { 1472 SDValue Op0 = Op.getOperand(0); 1473 // We can handle fixed length vector bitcasts with a simple replacement 1474 // in isel. 1475 if (Op.getValueType().isFixedLengthVector()) { 1476 if (Op0.getValueType().isFixedLengthVector()) 1477 return Op; 1478 return SDValue(); 1479 } 1480 assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) || 1481 Subtarget.hasStdExtZfh()) && 1482 "Unexpected custom legalisation"); 1483 SDLoc DL(Op); 1484 if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) { 1485 if (Op0.getValueType() != MVT::i16) 1486 return SDValue(); 1487 SDValue NewOp0 = 1488 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0); 1489 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 1490 return FPConv; 1491 } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && 1492 Subtarget.hasStdExtF()) { 1493 if (Op0.getValueType() != MVT::i32) 1494 return SDValue(); 1495 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 1496 SDValue FPConv = 1497 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 1498 return FPConv; 1499 } 1500 return SDValue(); 1501 } 1502 case ISD::INTRINSIC_WO_CHAIN: 1503 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 1504 case ISD::INTRINSIC_W_CHAIN: 1505 return LowerINTRINSIC_W_CHAIN(Op, DAG); 1506 case ISD::BSWAP: 1507 case ISD::BITREVERSE: { 1508 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. 1509 assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 1510 MVT VT = Op.getSimpleValueType(); 1511 SDLoc DL(Op); 1512 // Start with the maximum immediate value which is the bitwidth - 1. 1513 unsigned Imm = VT.getSizeInBits() - 1; 1514 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. 1515 if (Op.getOpcode() == ISD::BSWAP) 1516 Imm &= ~0x7U; 1517 return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0), 1518 DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT())); 1519 } 1520 case ISD::FSHL: 1521 case ISD::FSHR: { 1522 MVT VT = Op.getSimpleValueType(); 1523 assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization"); 1524 SDLoc DL(Op); 1525 // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only 1526 // use log(XLen) bits. Mask the shift amount accordingly. 1527 unsigned ShAmtWidth = Subtarget.getXLen() - 1; 1528 SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2), 1529 DAG.getConstant(ShAmtWidth, DL, VT)); 1530 unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR; 1531 return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt); 1532 } 1533 case ISD::TRUNCATE: { 1534 SDLoc DL(Op); 1535 MVT VT = Op.getSimpleValueType(); 1536 // Only custom-lower vector truncates 1537 if (!VT.isVector()) 1538 return Op; 1539 1540 // Truncates to mask types are handled differently 1541 if (VT.getVectorElementType() == MVT::i1) 1542 return lowerVectorMaskTrunc(Op, DAG); 1543 1544 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 1545 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which 1546 // truncate by one power of two at a time. 1547 MVT DstEltVT = VT.getVectorElementType(); 1548 1549 SDValue Src = Op.getOperand(0); 1550 MVT SrcVT = Src.getSimpleValueType(); 1551 MVT SrcEltVT = SrcVT.getVectorElementType(); 1552 1553 assert(DstEltVT.bitsLT(SrcEltVT) && 1554 isPowerOf2_64(DstEltVT.getSizeInBits()) && 1555 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 1556 "Unexpected vector truncate lowering"); 1557 1558 MVT ContainerVT = SrcVT; 1559 if (SrcVT.isFixedLengthVector()) { 1560 ContainerVT = getContainerForFixedLengthVector(SrcVT); 1561 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 1562 } 1563 1564 SDValue Result = Src; 1565 SDValue Mask, VL; 1566 std::tie(Mask, VL) = 1567 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 1568 LLVMContext &Context = *DAG.getContext(); 1569 const ElementCount Count = ContainerVT.getVectorElementCount(); 1570 do { 1571 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); 1572 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 1573 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, 1574 Mask, VL); 1575 } while (SrcEltVT != DstEltVT); 1576 1577 if (SrcVT.isFixedLengthVector()) 1578 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 1579 1580 return Result; 1581 } 1582 case ISD::ANY_EXTEND: 1583 case ISD::ZERO_EXTEND: 1584 if (Op.getOperand(0).getValueType().isVector() && 1585 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 1586 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 1587 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL); 1588 case ISD::SIGN_EXTEND: 1589 if (Op.getOperand(0).getValueType().isVector() && 1590 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 1591 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 1592 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); 1593 case ISD::SPLAT_VECTOR_PARTS: 1594 return lowerSPLAT_VECTOR_PARTS(Op, DAG); 1595 case ISD::INSERT_VECTOR_ELT: 1596 return lowerINSERT_VECTOR_ELT(Op, DAG); 1597 case ISD::EXTRACT_VECTOR_ELT: 1598 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 1599 case ISD::VSCALE: { 1600 MVT VT = Op.getSimpleValueType(); 1601 SDLoc DL(Op); 1602 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); 1603 // We define our scalable vector types for lmul=1 to use a 64 bit known 1604 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 1605 // vscale as VLENB / 8. 1606 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, 1607 DAG.getConstant(3, DL, VT)); 1608 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); 1609 } 1610 case ISD::FP_EXTEND: { 1611 // RVV can only do fp_extend to types double the size as the source. We 1612 // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going 1613 // via f32. 1614 SDLoc DL(Op); 1615 MVT VT = Op.getSimpleValueType(); 1616 SDValue Src = Op.getOperand(0); 1617 MVT SrcVT = Src.getSimpleValueType(); 1618 1619 // Prepare any fixed-length vector operands. 1620 MVT ContainerVT = VT; 1621 if (SrcVT.isFixedLengthVector()) { 1622 ContainerVT = getContainerForFixedLengthVector(VT); 1623 MVT SrcContainerVT = 1624 ContainerVT.changeVectorElementType(SrcVT.getVectorElementType()); 1625 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1626 } 1627 1628 if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 || 1629 SrcVT.getVectorElementType() != MVT::f16) { 1630 // For scalable vectors, we only need to close the gap between 1631 // vXf16->vXf64. 1632 if (!VT.isFixedLengthVector()) 1633 return Op; 1634 // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version. 1635 Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); 1636 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1637 } 1638 1639 MVT InterVT = VT.changeVectorElementType(MVT::f32); 1640 MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32); 1641 SDValue IntermediateExtend = getRVVFPExtendOrRound( 1642 Src, InterVT, InterContainerVT, DL, DAG, Subtarget); 1643 1644 SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT, 1645 DL, DAG, Subtarget); 1646 if (VT.isFixedLengthVector()) 1647 return convertFromScalableVector(VT, Extend, DAG, Subtarget); 1648 return Extend; 1649 } 1650 case ISD::FP_ROUND: { 1651 // RVV can only do fp_round to types half the size as the source. We 1652 // custom-lower f64->f16 rounds via RVV's round-to-odd float 1653 // conversion instruction. 1654 SDLoc DL(Op); 1655 MVT VT = Op.getSimpleValueType(); 1656 SDValue Src = Op.getOperand(0); 1657 MVT SrcVT = Src.getSimpleValueType(); 1658 1659 // Prepare any fixed-length vector operands. 1660 MVT ContainerVT = VT; 1661 if (VT.isFixedLengthVector()) { 1662 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 1663 ContainerVT = 1664 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 1665 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1666 } 1667 1668 if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || 1669 SrcVT.getVectorElementType() != MVT::f64) { 1670 // For scalable vectors, we only need to close the gap between 1671 // vXf64<->vXf16. 1672 if (!VT.isFixedLengthVector()) 1673 return Op; 1674 // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version. 1675 Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); 1676 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1677 } 1678 1679 SDValue Mask, VL; 1680 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1681 1682 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); 1683 SDValue IntermediateRound = 1684 DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL); 1685 SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT, 1686 DL, DAG, Subtarget); 1687 1688 if (VT.isFixedLengthVector()) 1689 return convertFromScalableVector(VT, Round, DAG, Subtarget); 1690 return Round; 1691 } 1692 case ISD::FP_TO_SINT: 1693 case ISD::FP_TO_UINT: 1694 case ISD::SINT_TO_FP: 1695 case ISD::UINT_TO_FP: { 1696 // RVV can only do fp<->int conversions to types half/double the size as 1697 // the source. We custom-lower any conversions that do two hops into 1698 // sequences. 1699 MVT VT = Op.getSimpleValueType(); 1700 if (!VT.isVector()) 1701 return Op; 1702 SDLoc DL(Op); 1703 SDValue Src = Op.getOperand(0); 1704 MVT EltVT = VT.getVectorElementType(); 1705 MVT SrcVT = Src.getSimpleValueType(); 1706 MVT SrcEltVT = SrcVT.getVectorElementType(); 1707 unsigned EltSize = EltVT.getSizeInBits(); 1708 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 1709 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 1710 "Unexpected vector element types"); 1711 1712 bool IsInt2FP = SrcEltVT.isInteger(); 1713 // Widening conversions 1714 if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) { 1715 if (IsInt2FP) { 1716 // Do a regular integer sign/zero extension then convert to float. 1717 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()), 1718 VT.getVectorElementCount()); 1719 unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP 1720 ? ISD::ZERO_EXTEND 1721 : ISD::SIGN_EXTEND; 1722 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 1723 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 1724 } 1725 // FP2Int 1726 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 1727 // Do one doubling fp_extend then complete the operation by converting 1728 // to int. 1729 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1730 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 1731 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 1732 } 1733 1734 // Narrowing conversions 1735 if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) { 1736 if (IsInt2FP) { 1737 // One narrowing int_to_fp, then an fp_round. 1738 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 1739 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1740 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 1741 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 1742 } 1743 // FP2Int 1744 // One narrowing fp_to_int, then truncate the integer. If the float isn't 1745 // representable by the integer, the result is poison. 1746 MVT IVecVT = 1747 MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2), 1748 VT.getVectorElementCount()); 1749 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 1750 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 1751 } 1752 1753 // Scalable vectors can exit here. Patterns will handle equally-sized 1754 // conversions halving/doubling ones. 1755 if (!VT.isFixedLengthVector()) 1756 return Op; 1757 1758 // For fixed-length vectors we lower to a custom "VL" node. 1759 unsigned RVVOpc = 0; 1760 switch (Op.getOpcode()) { 1761 default: 1762 llvm_unreachable("Impossible opcode"); 1763 case ISD::FP_TO_SINT: 1764 RVVOpc = RISCVISD::FP_TO_SINT_VL; 1765 break; 1766 case ISD::FP_TO_UINT: 1767 RVVOpc = RISCVISD::FP_TO_UINT_VL; 1768 break; 1769 case ISD::SINT_TO_FP: 1770 RVVOpc = RISCVISD::SINT_TO_FP_VL; 1771 break; 1772 case ISD::UINT_TO_FP: 1773 RVVOpc = RISCVISD::UINT_TO_FP_VL; 1774 break; 1775 } 1776 1777 MVT ContainerVT, SrcContainerVT; 1778 // Derive the reference container type from the larger vector type. 1779 if (SrcEltSize > EltSize) { 1780 SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 1781 ContainerVT = 1782 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 1783 } else { 1784 ContainerVT = getContainerForFixedLengthVector(VT); 1785 SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT); 1786 } 1787 1788 SDValue Mask, VL; 1789 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1790 1791 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1792 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL); 1793 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1794 } 1795 case ISD::VECREDUCE_ADD: 1796 case ISD::VECREDUCE_UMAX: 1797 case ISD::VECREDUCE_SMAX: 1798 case ISD::VECREDUCE_UMIN: 1799 case ISD::VECREDUCE_SMIN: 1800 case ISD::VECREDUCE_AND: 1801 case ISD::VECREDUCE_OR: 1802 case ISD::VECREDUCE_XOR: 1803 return lowerVECREDUCE(Op, DAG); 1804 case ISD::VECREDUCE_FADD: 1805 case ISD::VECREDUCE_SEQ_FADD: 1806 return lowerFPVECREDUCE(Op, DAG); 1807 case ISD::INSERT_SUBVECTOR: 1808 return lowerINSERT_SUBVECTOR(Op, DAG); 1809 case ISD::EXTRACT_SUBVECTOR: 1810 return lowerEXTRACT_SUBVECTOR(Op, DAG); 1811 case ISD::VECTOR_REVERSE: 1812 return lowerVECTOR_REVERSE(Op, DAG); 1813 case ISD::BUILD_VECTOR: 1814 return lowerBUILD_VECTOR(Op, DAG, Subtarget); 1815 case ISD::VECTOR_SHUFFLE: 1816 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); 1817 case ISD::CONCAT_VECTORS: { 1818 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is 1819 // better than going through the stack, as the default expansion does. 1820 SDLoc DL(Op); 1821 MVT VT = Op.getSimpleValueType(); 1822 unsigned NumOpElts = 1823 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements(); 1824 SDValue Vec = DAG.getUNDEF(VT); 1825 for (const auto &OpIdx : enumerate(Op->ops())) 1826 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(), 1827 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL)); 1828 return Vec; 1829 } 1830 case ISD::LOAD: 1831 return lowerFixedLengthVectorLoadToRVV(Op, DAG); 1832 case ISD::STORE: 1833 return lowerFixedLengthVectorStoreToRVV(Op, DAG); 1834 case ISD::MLOAD: 1835 return lowerMLOAD(Op, DAG); 1836 case ISD::MSTORE: 1837 return lowerMSTORE(Op, DAG); 1838 case ISD::SETCC: 1839 return lowerFixedLengthVectorSetccToRVV(Op, DAG); 1840 case ISD::ADD: 1841 return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL); 1842 case ISD::SUB: 1843 return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL); 1844 case ISD::MUL: 1845 return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL); 1846 case ISD::MULHS: 1847 return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL); 1848 case ISD::MULHU: 1849 return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL); 1850 case ISD::AND: 1851 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL, 1852 RISCVISD::AND_VL); 1853 case ISD::OR: 1854 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL, 1855 RISCVISD::OR_VL); 1856 case ISD::XOR: 1857 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL, 1858 RISCVISD::XOR_VL); 1859 case ISD::SDIV: 1860 return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL); 1861 case ISD::SREM: 1862 return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL); 1863 case ISD::UDIV: 1864 return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL); 1865 case ISD::UREM: 1866 return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL); 1867 case ISD::SHL: 1868 return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL); 1869 case ISD::SRA: 1870 return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL); 1871 case ISD::SRL: 1872 return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL); 1873 case ISD::FADD: 1874 return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL); 1875 case ISD::FSUB: 1876 return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL); 1877 case ISD::FMUL: 1878 return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL); 1879 case ISD::FDIV: 1880 return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL); 1881 case ISD::FNEG: 1882 return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL); 1883 case ISD::FABS: 1884 return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL); 1885 case ISD::FSQRT: 1886 return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL); 1887 case ISD::FMA: 1888 return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL); 1889 case ISD::SMIN: 1890 return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL); 1891 case ISD::SMAX: 1892 return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL); 1893 case ISD::UMIN: 1894 return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL); 1895 case ISD::UMAX: 1896 return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL); 1897 case ISD::ABS: 1898 return lowerABS(Op, DAG); 1899 case ISD::VSELECT: 1900 return lowerFixedLengthVectorSelectToRVV(Op, DAG); 1901 case ISD::FCOPYSIGN: 1902 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); 1903 case ISD::MGATHER: 1904 return lowerMGATHER(Op, DAG); 1905 case ISD::MSCATTER: 1906 return lowerMSCATTER(Op, DAG); 1907 } 1908 } 1909 1910 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 1911 SelectionDAG &DAG, unsigned Flags) { 1912 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 1913 } 1914 1915 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 1916 SelectionDAG &DAG, unsigned Flags) { 1917 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 1918 Flags); 1919 } 1920 1921 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 1922 SelectionDAG &DAG, unsigned Flags) { 1923 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 1924 N->getOffset(), Flags); 1925 } 1926 1927 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 1928 SelectionDAG &DAG, unsigned Flags) { 1929 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 1930 } 1931 1932 template <class NodeTy> 1933 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 1934 bool IsLocal) const { 1935 SDLoc DL(N); 1936 EVT Ty = getPointerTy(DAG.getDataLayout()); 1937 1938 if (isPositionIndependent()) { 1939 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1940 if (IsLocal) 1941 // Use PC-relative addressing to access the symbol. This generates the 1942 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 1943 // %pcrel_lo(auipc)). 1944 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1945 1946 // Use PC-relative addressing to access the GOT for this symbol, then load 1947 // the address from the GOT. This generates the pattern (PseudoLA sym), 1948 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 1949 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 1950 } 1951 1952 switch (getTargetMachine().getCodeModel()) { 1953 default: 1954 report_fatal_error("Unsupported code model for lowering"); 1955 case CodeModel::Small: { 1956 // Generate a sequence for accessing addresses within the first 2 GiB of 1957 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 1958 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 1959 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 1960 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1961 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 1962 } 1963 case CodeModel::Medium: { 1964 // Generate a sequence for accessing addresses within any 2GiB range within 1965 // the address space. This generates the pattern (PseudoLLA sym), which 1966 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 1967 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1968 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1969 } 1970 } 1971 } 1972 1973 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 1974 SelectionDAG &DAG) const { 1975 SDLoc DL(Op); 1976 EVT Ty = Op.getValueType(); 1977 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1978 int64_t Offset = N->getOffset(); 1979 MVT XLenVT = Subtarget.getXLenVT(); 1980 1981 const GlobalValue *GV = N->getGlobal(); 1982 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 1983 SDValue Addr = getAddr(N, DAG, IsLocal); 1984 1985 // In order to maximise the opportunity for common subexpression elimination, 1986 // emit a separate ADD node for the global address offset instead of folding 1987 // it in the global address node. Later peephole optimisations may choose to 1988 // fold it back in when profitable. 1989 if (Offset != 0) 1990 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1991 DAG.getConstant(Offset, DL, XLenVT)); 1992 return Addr; 1993 } 1994 1995 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 1996 SelectionDAG &DAG) const { 1997 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 1998 1999 return getAddr(N, DAG); 2000 } 2001 2002 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 2003 SelectionDAG &DAG) const { 2004 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 2005 2006 return getAddr(N, DAG); 2007 } 2008 2009 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 2010 SelectionDAG &DAG) const { 2011 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 2012 2013 return getAddr(N, DAG); 2014 } 2015 2016 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 2017 SelectionDAG &DAG, 2018 bool UseGOT) const { 2019 SDLoc DL(N); 2020 EVT Ty = getPointerTy(DAG.getDataLayout()); 2021 const GlobalValue *GV = N->getGlobal(); 2022 MVT XLenVT = Subtarget.getXLenVT(); 2023 2024 if (UseGOT) { 2025 // Use PC-relative addressing to access the GOT for this TLS symbol, then 2026 // load the address from the GOT and add the thread pointer. This generates 2027 // the pattern (PseudoLA_TLS_IE sym), which expands to 2028 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 2029 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 2030 SDValue Load = 2031 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 2032 2033 // Add the thread pointer. 2034 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 2035 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 2036 } 2037 2038 // Generate a sequence for accessing the address relative to the thread 2039 // pointer, with the appropriate adjustment for the thread pointer offset. 2040 // This generates the pattern 2041 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 2042 SDValue AddrHi = 2043 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 2044 SDValue AddrAdd = 2045 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 2046 SDValue AddrLo = 2047 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 2048 2049 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 2050 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 2051 SDValue MNAdd = SDValue( 2052 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 2053 0); 2054 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 2055 } 2056 2057 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 2058 SelectionDAG &DAG) const { 2059 SDLoc DL(N); 2060 EVT Ty = getPointerTy(DAG.getDataLayout()); 2061 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 2062 const GlobalValue *GV = N->getGlobal(); 2063 2064 // Use a PC-relative addressing mode to access the global dynamic GOT address. 2065 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 2066 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 2067 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 2068 SDValue Load = 2069 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 2070 2071 // Prepare argument list to generate call. 2072 ArgListTy Args; 2073 ArgListEntry Entry; 2074 Entry.Node = Load; 2075 Entry.Ty = CallTy; 2076 Args.push_back(Entry); 2077 2078 // Setup call to __tls_get_addr. 2079 TargetLowering::CallLoweringInfo CLI(DAG); 2080 CLI.setDebugLoc(DL) 2081 .setChain(DAG.getEntryNode()) 2082 .setLibCallee(CallingConv::C, CallTy, 2083 DAG.getExternalSymbol("__tls_get_addr", Ty), 2084 std::move(Args)); 2085 2086 return LowerCallTo(CLI).first; 2087 } 2088 2089 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 2090 SelectionDAG &DAG) const { 2091 SDLoc DL(Op); 2092 EVT Ty = Op.getValueType(); 2093 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 2094 int64_t Offset = N->getOffset(); 2095 MVT XLenVT = Subtarget.getXLenVT(); 2096 2097 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 2098 2099 if (DAG.getMachineFunction().getFunction().getCallingConv() == 2100 CallingConv::GHC) 2101 report_fatal_error("In GHC calling convention TLS is not supported"); 2102 2103 SDValue Addr; 2104 switch (Model) { 2105 case TLSModel::LocalExec: 2106 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 2107 break; 2108 case TLSModel::InitialExec: 2109 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 2110 break; 2111 case TLSModel::LocalDynamic: 2112 case TLSModel::GeneralDynamic: 2113 Addr = getDynamicTLSAddr(N, DAG); 2114 break; 2115 } 2116 2117 // In order to maximise the opportunity for common subexpression elimination, 2118 // emit a separate ADD node for the global address offset instead of folding 2119 // it in the global address node. Later peephole optimisations may choose to 2120 // fold it back in when profitable. 2121 if (Offset != 0) 2122 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 2123 DAG.getConstant(Offset, DL, XLenVT)); 2124 return Addr; 2125 } 2126 2127 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 2128 SDValue CondV = Op.getOperand(0); 2129 SDValue TrueV = Op.getOperand(1); 2130 SDValue FalseV = Op.getOperand(2); 2131 SDLoc DL(Op); 2132 MVT XLenVT = Subtarget.getXLenVT(); 2133 2134 // If the result type is XLenVT and CondV is the output of a SETCC node 2135 // which also operated on XLenVT inputs, then merge the SETCC node into the 2136 // lowered RISCVISD::SELECT_CC to take advantage of the integer 2137 // compare+branch instructions. i.e.: 2138 // (select (setcc lhs, rhs, cc), truev, falsev) 2139 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 2140 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 2141 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 2142 SDValue LHS = CondV.getOperand(0); 2143 SDValue RHS = CondV.getOperand(1); 2144 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 2145 ISD::CondCode CCVal = CC->get(); 2146 2147 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 2148 2149 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 2150 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 2151 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 2152 } 2153 2154 // Otherwise: 2155 // (select condv, truev, falsev) 2156 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 2157 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 2158 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 2159 2160 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 2161 2162 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 2163 } 2164 2165 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { 2166 SDValue CondV = Op.getOperand(1); 2167 SDLoc DL(Op); 2168 MVT XLenVT = Subtarget.getXLenVT(); 2169 2170 if (CondV.getOpcode() == ISD::SETCC && 2171 CondV.getOperand(0).getValueType() == XLenVT) { 2172 SDValue LHS = CondV.getOperand(0); 2173 SDValue RHS = CondV.getOperand(1); 2174 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 2175 2176 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 2177 2178 SDValue TargetCC = DAG.getCondCode(CCVal); 2179 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 2180 LHS, RHS, TargetCC, Op.getOperand(2)); 2181 } 2182 2183 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 2184 CondV, DAG.getConstant(0, DL, XLenVT), 2185 DAG.getCondCode(ISD::SETNE), Op.getOperand(2)); 2186 } 2187 2188 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 2189 MachineFunction &MF = DAG.getMachineFunction(); 2190 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 2191 2192 SDLoc DL(Op); 2193 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 2194 getPointerTy(MF.getDataLayout())); 2195 2196 // vastart just stores the address of the VarArgsFrameIndex slot into the 2197 // memory location argument. 2198 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2199 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 2200 MachinePointerInfo(SV)); 2201 } 2202 2203 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 2204 SelectionDAG &DAG) const { 2205 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 2206 MachineFunction &MF = DAG.getMachineFunction(); 2207 MachineFrameInfo &MFI = MF.getFrameInfo(); 2208 MFI.setFrameAddressIsTaken(true); 2209 Register FrameReg = RI.getFrameRegister(MF); 2210 int XLenInBytes = Subtarget.getXLen() / 8; 2211 2212 EVT VT = Op.getValueType(); 2213 SDLoc DL(Op); 2214 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 2215 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2216 while (Depth--) { 2217 int Offset = -(XLenInBytes * 2); 2218 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 2219 DAG.getIntPtrConstant(Offset, DL)); 2220 FrameAddr = 2221 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 2222 } 2223 return FrameAddr; 2224 } 2225 2226 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 2227 SelectionDAG &DAG) const { 2228 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 2229 MachineFunction &MF = DAG.getMachineFunction(); 2230 MachineFrameInfo &MFI = MF.getFrameInfo(); 2231 MFI.setReturnAddressIsTaken(true); 2232 MVT XLenVT = Subtarget.getXLenVT(); 2233 int XLenInBytes = Subtarget.getXLen() / 8; 2234 2235 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 2236 return SDValue(); 2237 2238 EVT VT = Op.getValueType(); 2239 SDLoc DL(Op); 2240 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2241 if (Depth) { 2242 int Off = -XLenInBytes; 2243 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 2244 SDValue Offset = DAG.getConstant(Off, DL, VT); 2245 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 2246 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 2247 MachinePointerInfo()); 2248 } 2249 2250 // Return the value of the return address register, marking it an implicit 2251 // live-in. 2252 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 2253 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 2254 } 2255 2256 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 2257 SelectionDAG &DAG) const { 2258 SDLoc DL(Op); 2259 SDValue Lo = Op.getOperand(0); 2260 SDValue Hi = Op.getOperand(1); 2261 SDValue Shamt = Op.getOperand(2); 2262 EVT VT = Lo.getValueType(); 2263 2264 // if Shamt-XLEN < 0: // Shamt < XLEN 2265 // Lo = Lo << Shamt 2266 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 2267 // else: 2268 // Lo = 0 2269 // Hi = Lo << (Shamt-XLEN) 2270 2271 SDValue Zero = DAG.getConstant(0, DL, VT); 2272 SDValue One = DAG.getConstant(1, DL, VT); 2273 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 2274 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 2275 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 2276 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 2277 2278 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 2279 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 2280 SDValue ShiftRightLo = 2281 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 2282 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 2283 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 2284 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 2285 2286 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 2287 2288 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 2289 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 2290 2291 SDValue Parts[2] = {Lo, Hi}; 2292 return DAG.getMergeValues(Parts, DL); 2293 } 2294 2295 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 2296 bool IsSRA) const { 2297 SDLoc DL(Op); 2298 SDValue Lo = Op.getOperand(0); 2299 SDValue Hi = Op.getOperand(1); 2300 SDValue Shamt = Op.getOperand(2); 2301 EVT VT = Lo.getValueType(); 2302 2303 // SRA expansion: 2304 // if Shamt-XLEN < 0: // Shamt < XLEN 2305 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 2306 // Hi = Hi >>s Shamt 2307 // else: 2308 // Lo = Hi >>s (Shamt-XLEN); 2309 // Hi = Hi >>s (XLEN-1) 2310 // 2311 // SRL expansion: 2312 // if Shamt-XLEN < 0: // Shamt < XLEN 2313 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 2314 // Hi = Hi >>u Shamt 2315 // else: 2316 // Lo = Hi >>u (Shamt-XLEN); 2317 // Hi = 0; 2318 2319 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 2320 2321 SDValue Zero = DAG.getConstant(0, DL, VT); 2322 SDValue One = DAG.getConstant(1, DL, VT); 2323 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 2324 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 2325 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 2326 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 2327 2328 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 2329 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 2330 SDValue ShiftLeftHi = 2331 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 2332 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 2333 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 2334 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 2335 SDValue HiFalse = 2336 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 2337 2338 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 2339 2340 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 2341 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 2342 2343 SDValue Parts[2] = {Lo, Hi}; 2344 return DAG.getMergeValues(Parts, DL); 2345 } 2346 2347 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is 2348 // illegal (currently only vXi64 RV32). 2349 // FIXME: We could also catch non-constant sign-extended i32 values and lower 2350 // them to SPLAT_VECTOR_I64 2351 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op, 2352 SelectionDAG &DAG) const { 2353 SDLoc DL(Op); 2354 EVT VecVT = Op.getValueType(); 2355 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 2356 "Unexpected SPLAT_VECTOR_PARTS lowering"); 2357 2358 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!"); 2359 SDValue Lo = Op.getOperand(0); 2360 SDValue Hi = Op.getOperand(1); 2361 2362 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) { 2363 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue(); 2364 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue(); 2365 // If Hi constant is all the same sign bit as Lo, lower this as a custom 2366 // node in order to try and match RVV vector/scalar instructions. 2367 if ((LoC >> 31) == HiC) 2368 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 2369 } 2370 2371 // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not 2372 // to accidentally sign-extend the 32-bit halves to the e64 SEW: 2373 // vmv.v.x vX, hi 2374 // vsll.vx vX, vX, /*32*/ 2375 // vmv.v.x vY, lo 2376 // vsll.vx vY, vY, /*32*/ 2377 // vsrl.vx vY, vY, /*32*/ 2378 // vor.vv vX, vX, vY 2379 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT); 2380 2381 Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 2382 Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV); 2383 Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV); 2384 2385 if (isNullConstant(Hi)) 2386 return Lo; 2387 2388 Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi); 2389 Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV); 2390 2391 return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi); 2392 } 2393 2394 // Custom-lower extensions from mask vectors by using a vselect either with 1 2395 // for zero/any-extension or -1 for sign-extension: 2396 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 2397 // Note that any-extension is lowered identically to zero-extension. 2398 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 2399 int64_t ExtTrueVal) const { 2400 SDLoc DL(Op); 2401 MVT VecVT = Op.getSimpleValueType(); 2402 SDValue Src = Op.getOperand(0); 2403 // Only custom-lower extensions from mask types 2404 assert(Src.getValueType().isVector() && 2405 Src.getValueType().getVectorElementType() == MVT::i1); 2406 2407 MVT XLenVT = Subtarget.getXLenVT(); 2408 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 2409 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT); 2410 2411 if (VecVT.isScalableVector()) { 2412 // Be careful not to introduce illegal scalar types at this stage, and be 2413 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 2414 // illegal and must be expanded. Since we know that the constants are 2415 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 2416 bool IsRV32E64 = 2417 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 2418 2419 if (!IsRV32E64) { 2420 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 2421 SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal); 2422 } else { 2423 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 2424 SplatTrueVal = 2425 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal); 2426 } 2427 2428 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 2429 } 2430 2431 MVT ContainerVT = getContainerForFixedLengthVector(VecVT); 2432 MVT I1ContainerVT = 2433 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2434 2435 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget); 2436 2437 SDValue Mask, VL; 2438 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2439 2440 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL); 2441 SplatTrueVal = 2442 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL); 2443 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, 2444 SplatTrueVal, SplatZero, VL); 2445 2446 return convertFromScalableVector(VecVT, Select, DAG, Subtarget); 2447 } 2448 2449 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( 2450 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { 2451 MVT ExtVT = Op.getSimpleValueType(); 2452 // Only custom-lower extensions from fixed-length vector types. 2453 if (!ExtVT.isFixedLengthVector()) 2454 return Op; 2455 MVT VT = Op.getOperand(0).getSimpleValueType(); 2456 // Grab the canonical container type for the extended type. Infer the smaller 2457 // type from that to ensure the same number of vector elements, as we know 2458 // the LMUL will be sufficient to hold the smaller type. 2459 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT); 2460 // Get the extended container type manually to ensure the same number of 2461 // vector elements between source and dest. 2462 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), 2463 ContainerExtVT.getVectorElementCount()); 2464 2465 SDValue Op1 = 2466 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 2467 2468 SDLoc DL(Op); 2469 SDValue Mask, VL; 2470 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2471 2472 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL); 2473 2474 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget); 2475 } 2476 2477 // Custom-lower truncations from vectors to mask vectors by using a mask and a 2478 // setcc operation: 2479 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 2480 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, 2481 SelectionDAG &DAG) const { 2482 SDLoc DL(Op); 2483 EVT MaskVT = Op.getValueType(); 2484 // Only expect to custom-lower truncations to mask types 2485 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 2486 "Unexpected type for vector mask lowering"); 2487 SDValue Src = Op.getOperand(0); 2488 MVT VecVT = Src.getSimpleValueType(); 2489 2490 // If this is a fixed vector, we need to convert it to a scalable vector. 2491 MVT ContainerVT = VecVT; 2492 if (VecVT.isFixedLengthVector()) { 2493 ContainerVT = getContainerForFixedLengthVector(VecVT); 2494 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 2495 } 2496 2497 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 2498 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 2499 2500 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne); 2501 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero); 2502 2503 if (VecVT.isScalableVector()) { 2504 SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); 2505 return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); 2506 } 2507 2508 SDValue Mask, VL; 2509 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2510 2511 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 2512 SDValue Trunc = 2513 DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL); 2514 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero, 2515 DAG.getCondCode(ISD::SETNE), Mask, VL); 2516 return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); 2517 } 2518 2519 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the 2520 // first position of a vector, and that vector is slid up to the insert index. 2521 // By limiting the active vector length to index+1 and merging with the 2522 // original vector (with an undisturbed tail policy for elements >= VL), we 2523 // achieve the desired result of leaving all elements untouched except the one 2524 // at VL-1, which is replaced with the desired value. 2525 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 2526 SelectionDAG &DAG) const { 2527 SDLoc DL(Op); 2528 MVT VecVT = Op.getSimpleValueType(); 2529 SDValue Vec = Op.getOperand(0); 2530 SDValue Val = Op.getOperand(1); 2531 SDValue Idx = Op.getOperand(2); 2532 2533 MVT ContainerVT = VecVT; 2534 // If the operand is a fixed-length vector, convert to a scalable one. 2535 if (VecVT.isFixedLengthVector()) { 2536 ContainerVT = getContainerForFixedLengthVector(VecVT); 2537 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2538 } 2539 2540 MVT XLenVT = Subtarget.getXLenVT(); 2541 2542 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 2543 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64; 2544 // Even i64-element vectors on RV32 can be lowered without scalar 2545 // legalization if the most-significant 32 bits of the value are not affected 2546 // by the sign-extension of the lower 32 bits. 2547 // TODO: We could also catch sign extensions of a 32-bit value. 2548 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) { 2549 const auto *CVal = cast<ConstantSDNode>(Val); 2550 if (isInt<32>(CVal->getSExtValue())) { 2551 IsLegalInsert = true; 2552 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); 2553 } 2554 } 2555 2556 SDValue Mask, VL; 2557 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2558 2559 SDValue ValInVec; 2560 2561 if (IsLegalInsert) { 2562 if (isNullConstant(Idx)) { 2563 Vec = DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, Vec, Val, VL); 2564 if (!VecVT.isFixedLengthVector()) 2565 return Vec; 2566 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget); 2567 } 2568 ValInVec = DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, 2569 DAG.getUNDEF(ContainerVT), Val, VL); 2570 } else { 2571 // On RV32, i64-element vectors must be specially handled to place the 2572 // value at element 0, by using two vslide1up instructions in sequence on 2573 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for 2574 // this. 2575 SDValue One = DAG.getConstant(1, DL, XLenVT); 2576 SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero); 2577 SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One); 2578 MVT I32ContainerVT = 2579 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2); 2580 SDValue I32Mask = 2581 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first; 2582 // Limit the active VL to two. 2583 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT); 2584 // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied 2585 // undef doesn't obey the earlyclobber constraint. Just splat a zero value. 2586 ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero, 2587 InsertI64VL); 2588 // First slide in the hi value, then the lo in underneath it. 2589 ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec, 2590 ValHi, I32Mask, InsertI64VL); 2591 ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec, 2592 ValLo, I32Mask, InsertI64VL); 2593 // Bitcast back to the right container type. 2594 ValInVec = DAG.getBitcast(ContainerVT, ValInVec); 2595 } 2596 2597 // Now that the value is in a vector, slide it into position. 2598 SDValue InsertVL = 2599 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT)); 2600 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, 2601 ValInVec, Idx, Mask, InsertVL); 2602 if (!VecVT.isFixedLengthVector()) 2603 return Slideup; 2604 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 2605 } 2606 2607 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 2608 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer 2609 // types this is done using VMV_X_S to allow us to glean information about the 2610 // sign bits of the result. 2611 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 2612 SelectionDAG &DAG) const { 2613 SDLoc DL(Op); 2614 SDValue Idx = Op.getOperand(1); 2615 SDValue Vec = Op.getOperand(0); 2616 EVT EltVT = Op.getValueType(); 2617 MVT VecVT = Vec.getSimpleValueType(); 2618 MVT XLenVT = Subtarget.getXLenVT(); 2619 2620 if (VecVT.getVectorElementType() == MVT::i1) { 2621 // FIXME: For now we just promote to an i8 vector and extract from that, 2622 // but this is probably not optimal. 2623 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 2624 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); 2625 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx); 2626 } 2627 2628 // If this is a fixed vector, we need to convert it to a scalable vector. 2629 MVT ContainerVT = VecVT; 2630 if (VecVT.isFixedLengthVector()) { 2631 ContainerVT = getContainerForFixedLengthVector(VecVT); 2632 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2633 } 2634 2635 // If the index is 0, the vector is already in the right position. 2636 if (!isNullConstant(Idx)) { 2637 // Use a VL of 1 to avoid processing more elements than we need. 2638 SDValue VL = DAG.getConstant(1, DL, XLenVT); 2639 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2640 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2641 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 2642 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 2643 } 2644 2645 if (!EltVT.isInteger()) { 2646 // Floating-point extracts are handled in TableGen. 2647 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 2648 DAG.getConstant(0, DL, XLenVT)); 2649 } 2650 2651 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 2652 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 2653 } 2654 2655 // Called by type legalization to handle splat of i64 on RV32. 2656 // FIXME: We can optimize this when the type has sign or zero bits in one 2657 // of the halves. 2658 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar, 2659 SDValue VL, SelectionDAG &DAG) { 2660 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VT); 2661 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, 2662 DAG.getConstant(0, DL, MVT::i32)); 2663 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, 2664 DAG.getConstant(1, DL, MVT::i32)); 2665 2666 // vmv.v.x vX, hi 2667 // vsll.vx vX, vX, /*32*/ 2668 // vmv.v.x vY, lo 2669 // vsll.vx vY, vY, /*32*/ 2670 // vsrl.vx vY, vY, /*32*/ 2671 // vor.vv vX, vX, vY 2672 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); 2673 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2674 Lo = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL); 2675 Lo = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL); 2676 Lo = DAG.getNode(RISCVISD::SRL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL); 2677 2678 Hi = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Hi, VL); 2679 Hi = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Hi, ThirtyTwoV, Mask, VL); 2680 2681 return DAG.getNode(RISCVISD::OR_VL, DL, VT, Lo, Hi, Mask, VL); 2682 } 2683 2684 // Some RVV intrinsics may claim that they want an integer operand to be 2685 // promoted or expanded. 2686 static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG, 2687 const RISCVSubtarget &Subtarget) { 2688 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 2689 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && 2690 "Unexpected opcode"); 2691 2692 if (!Subtarget.hasStdExtV()) 2693 return SDValue(); 2694 2695 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; 2696 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); 2697 SDLoc DL(Op); 2698 2699 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 2700 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); 2701 if (!II || !II->SplatOperand) 2702 return SDValue(); 2703 2704 unsigned SplatOp = II->SplatOperand + HasChain; 2705 assert(SplatOp < Op.getNumOperands()); 2706 2707 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 2708 SDValue &ScalarOp = Operands[SplatOp]; 2709 MVT OpVT = ScalarOp.getSimpleValueType(); 2710 MVT VT = Op.getSimpleValueType(); 2711 MVT XLenVT = Subtarget.getXLenVT(); 2712 2713 // If this isn't a scalar, or its type is XLenVT we're done. 2714 if (!OpVT.isScalarInteger() || OpVT == XLenVT) 2715 return SDValue(); 2716 2717 // Simplest case is that the operand needs to be promoted to XLenVT. 2718 if (OpVT.bitsLT(XLenVT)) { 2719 // If the operand is a constant, sign extend to increase our chances 2720 // of being able to use a .vi instruction. ANY_EXTEND would become a 2721 // a zero extend and the simm5 check in isel would fail. 2722 // FIXME: Should we ignore the upper bits in isel instead? 2723 unsigned ExtOpc = 2724 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 2725 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp); 2726 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 2727 } 2728 2729 // The more complex case is when the scalar is larger than XLenVT. 2730 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 && 2731 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!"); 2732 2733 // If this is a sign-extended 32-bit constant, we can truncate it and rely 2734 // on the instruction to sign-extend since SEW>XLEN. 2735 if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) { 2736 if (isInt<32>(CVal->getSExtValue())) { 2737 ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); 2738 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 2739 } 2740 } 2741 2742 // We need to convert the scalar to a splat vector. 2743 // FIXME: Can we implicitly truncate the scalar if it is known to 2744 // be sign extended? 2745 // VL should be the last operand. 2746 SDValue VL = Op.getOperand(Op.getNumOperands() - 1); 2747 assert(VL.getValueType() == XLenVT); 2748 ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG); 2749 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 2750 } 2751 2752 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 2753 SelectionDAG &DAG) const { 2754 unsigned IntNo = Op.getConstantOperandVal(0); 2755 SDLoc DL(Op); 2756 MVT XLenVT = Subtarget.getXLenVT(); 2757 2758 switch (IntNo) { 2759 default: 2760 break; // Don't custom lower most intrinsics. 2761 case Intrinsic::thread_pointer: { 2762 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2763 return DAG.getRegister(RISCV::X4, PtrVT); 2764 } 2765 case Intrinsic::riscv_vmv_x_s: 2766 assert(Op.getValueType() == XLenVT && "Unexpected VT!"); 2767 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 2768 Op.getOperand(1)); 2769 case Intrinsic::riscv_vmv_v_x: { 2770 SDValue Scalar = Op.getOperand(1); 2771 if (Scalar.getValueType().bitsLE(XLenVT)) { 2772 unsigned ExtOpc = 2773 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 2774 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); 2775 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), Scalar, 2776 Op.getOperand(2)); 2777 } 2778 2779 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); 2780 2781 // If this is a sign-extended 32-bit constant, we can truncate it and rely 2782 // on the instruction to sign-extend since SEW>XLEN. 2783 if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar)) { 2784 if (isInt<32>(CVal->getSExtValue())) 2785 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), 2786 DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32), 2787 Op.getOperand(2)); 2788 } 2789 2790 // Otherwise use the more complicated splatting algorithm. 2791 return splatSplitI64WithVL(DL, Op.getSimpleValueType(), Scalar, 2792 Op.getOperand(2), DAG); 2793 } 2794 case Intrinsic::riscv_vfmv_v_f: 2795 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), 2796 Op.getOperand(1), Op.getOperand(2)); 2797 case Intrinsic::riscv_vmv_s_x: { 2798 SDValue Scalar = Op.getOperand(2); 2799 2800 if (Scalar.getValueType().bitsLE(XLenVT)) { 2801 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar); 2802 return DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, Op.getValueType(), 2803 Op.getOperand(1), Scalar, Op.getOperand(3)); 2804 } 2805 2806 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); 2807 2808 // This is an i64 value that lives in two scalar registers. We have to 2809 // insert this in a convoluted way. First we build vXi64 splat containing 2810 // the/ two values that we assemble using some bit math. Next we'll use 2811 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask 2812 // to merge element 0 from our splat into the source vector. 2813 // FIXME: This is probably not the best way to do this, but it is 2814 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting 2815 // point. 2816 // vmv.v.x vX, hi 2817 // vsll.vx vX, vX, /*32*/ 2818 // vmv.v.x vY, lo 2819 // vsll.vx vY, vY, /*32*/ 2820 // vsrl.vx vY, vY, /*32*/ 2821 // vor.vv vX, vX, vY 2822 // 2823 // vid.v vVid 2824 // vmseq.vx mMask, vVid, 0 2825 // vmerge.vvm vDest, vSrc, vVal, mMask 2826 MVT VT = Op.getSimpleValueType(); 2827 SDValue Vec = Op.getOperand(1); 2828 SDValue VL = Op.getOperand(3); 2829 2830 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); 2831 SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, 2832 DAG.getConstant(0, DL, MVT::i32), VL); 2833 2834 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); 2835 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2836 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 2837 SDValue SelectCond = 2838 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx, 2839 DAG.getCondCode(ISD::SETEQ), Mask, VL); 2840 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal, 2841 Vec, VL); 2842 } 2843 } 2844 2845 return lowerVectorIntrinsicSplats(Op, DAG, Subtarget); 2846 } 2847 2848 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 2849 SelectionDAG &DAG) const { 2850 return lowerVectorIntrinsicSplats(Op, DAG, Subtarget); 2851 } 2852 2853 static MVT getLMUL1VT(MVT VT) { 2854 assert(VT.getVectorElementType().getSizeInBits() <= 64 && 2855 "Unexpected vector MVT"); 2856 return MVT::getScalableVectorVT( 2857 VT.getVectorElementType(), 2858 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); 2859 } 2860 2861 static unsigned getRVVReductionOp(unsigned ISDOpcode) { 2862 switch (ISDOpcode) { 2863 default: 2864 llvm_unreachable("Unhandled reduction"); 2865 case ISD::VECREDUCE_ADD: 2866 return RISCVISD::VECREDUCE_ADD_VL; 2867 case ISD::VECREDUCE_UMAX: 2868 return RISCVISD::VECREDUCE_UMAX_VL; 2869 case ISD::VECREDUCE_SMAX: 2870 return RISCVISD::VECREDUCE_SMAX_VL; 2871 case ISD::VECREDUCE_UMIN: 2872 return RISCVISD::VECREDUCE_UMIN_VL; 2873 case ISD::VECREDUCE_SMIN: 2874 return RISCVISD::VECREDUCE_SMIN_VL; 2875 case ISD::VECREDUCE_AND: 2876 return RISCVISD::VECREDUCE_AND_VL; 2877 case ISD::VECREDUCE_OR: 2878 return RISCVISD::VECREDUCE_OR_VL; 2879 case ISD::VECREDUCE_XOR: 2880 return RISCVISD::VECREDUCE_XOR_VL; 2881 } 2882 } 2883 2884 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 2885 SelectionDAG &DAG) const { 2886 SDLoc DL(Op); 2887 SDValue Vec = Op.getOperand(0); 2888 EVT VecEVT = Vec.getValueType(); 2889 2890 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode()); 2891 2892 // Due to ordering in legalize types we may have a vector type that needs to 2893 // be split. Do that manually so we can get down to a legal type. 2894 while (getTypeAction(*DAG.getContext(), VecEVT) == 2895 TargetLowering::TypeSplitVector) { 2896 SDValue Lo, Hi; 2897 std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL); 2898 VecEVT = Lo.getValueType(); 2899 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi); 2900 } 2901 2902 // TODO: The type may need to be widened rather than split. Or widened before 2903 // it can be split. 2904 if (!isTypeLegal(VecEVT)) 2905 return SDValue(); 2906 2907 MVT VecVT = VecEVT.getSimpleVT(); 2908 MVT VecEltVT = VecVT.getVectorElementType(); 2909 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); 2910 2911 MVT ContainerVT = VecVT; 2912 if (VecVT.isFixedLengthVector()) { 2913 ContainerVT = getContainerForFixedLengthVector(VecVT); 2914 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2915 } 2916 2917 MVT M1VT = getLMUL1VT(ContainerVT); 2918 2919 SDValue Mask, VL; 2920 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2921 2922 // FIXME: This is a VLMAX splat which might be too large and can prevent 2923 // vsetvli removal. 2924 SDValue NeutralElem = 2925 DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); 2926 SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem); 2927 SDValue Reduction = 2928 DAG.getNode(RVVOpcode, DL, M1VT, Vec, IdentitySplat, Mask, VL); 2929 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 2930 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 2931 return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); 2932 } 2933 2934 // Given a reduction op, this function returns the matching reduction opcode, 2935 // the vector SDValue and the scalar SDValue required to lower this to a 2936 // RISCVISD node. 2937 static std::tuple<unsigned, SDValue, SDValue> 2938 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { 2939 SDLoc DL(Op); 2940 switch (Op.getOpcode()) { 2941 default: 2942 llvm_unreachable("Unhandled reduction"); 2943 case ISD::VECREDUCE_FADD: 2944 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), 2945 DAG.getConstantFP(0.0, DL, EltVT)); 2946 case ISD::VECREDUCE_SEQ_FADD: 2947 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), 2948 Op.getOperand(0)); 2949 } 2950 } 2951 2952 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, 2953 SelectionDAG &DAG) const { 2954 SDLoc DL(Op); 2955 MVT VecEltVT = Op.getSimpleValueType(); 2956 2957 unsigned RVVOpcode; 2958 SDValue VectorVal, ScalarVal; 2959 std::tie(RVVOpcode, VectorVal, ScalarVal) = 2960 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT); 2961 MVT VecVT = VectorVal.getSimpleValueType(); 2962 2963 MVT ContainerVT = VecVT; 2964 if (VecVT.isFixedLengthVector()) { 2965 ContainerVT = getContainerForFixedLengthVector(VecVT); 2966 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget); 2967 } 2968 2969 MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType()); 2970 2971 SDValue Mask, VL; 2972 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2973 2974 // FIXME: This is a VLMAX splat which might be too large and can prevent 2975 // vsetvli removal. 2976 SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal); 2977 SDValue Reduction = 2978 DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat, Mask, VL); 2979 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 2980 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 2981 } 2982 2983 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, 2984 SelectionDAG &DAG) const { 2985 SDValue Vec = Op.getOperand(0); 2986 SDValue SubVec = Op.getOperand(1); 2987 MVT VecVT = Vec.getSimpleValueType(); 2988 MVT SubVecVT = SubVec.getSimpleValueType(); 2989 2990 SDLoc DL(Op); 2991 MVT XLenVT = Subtarget.getXLenVT(); 2992 unsigned OrigIdx = Op.getConstantOperandVal(2); 2993 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2994 2995 // We don't have the ability to slide mask vectors up indexed by their i1 2996 // elements; the smallest we can do is i8. Often we are able to bitcast to 2997 // equivalent i8 vectors. Note that when inserting a fixed-length vector 2998 // into a scalable one, we might not necessarily have enough scalable 2999 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. 3000 if (SubVecVT.getVectorElementType() == MVT::i1 && 3001 (OrigIdx != 0 || !Vec.isUndef())) { 3002 if (VecVT.getVectorMinNumElements() >= 8 && 3003 SubVecVT.getVectorMinNumElements() >= 8) { 3004 assert(OrigIdx % 8 == 0 && "Invalid index"); 3005 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 3006 SubVecVT.getVectorMinNumElements() % 8 == 0 && 3007 "Unexpected mask vector lowering"); 3008 OrigIdx /= 8; 3009 SubVecVT = 3010 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 3011 SubVecVT.isScalableVector()); 3012 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 3013 VecVT.isScalableVector()); 3014 Vec = DAG.getBitcast(VecVT, Vec); 3015 SubVec = DAG.getBitcast(SubVecVT, SubVec); 3016 } else { 3017 // We can't slide this mask vector up indexed by its i1 elements. 3018 // This poses a problem when we wish to insert a scalable vector which 3019 // can't be re-expressed as a larger type. Just choose the slow path and 3020 // extend to a larger type, then truncate back down. 3021 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 3022 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 3023 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 3024 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec); 3025 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec, 3026 Op.getOperand(2)); 3027 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT); 3028 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE); 3029 } 3030 } 3031 3032 // If the subvector vector is a fixed-length type, we cannot use subregister 3033 // manipulation to simplify the codegen; we don't know which register of a 3034 // LMUL group contains the specific subvector as we only know the minimum 3035 // register size. Therefore we must slide the vector group up the full 3036 // amount. 3037 if (SubVecVT.isFixedLengthVector()) { 3038 if (OrigIdx == 0 && Vec.isUndef()) 3039 return Op; 3040 MVT ContainerVT = VecVT; 3041 if (VecVT.isFixedLengthVector()) { 3042 ContainerVT = getContainerForFixedLengthVector(VecVT); 3043 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3044 } 3045 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, 3046 DAG.getUNDEF(ContainerVT), SubVec, 3047 DAG.getConstant(0, DL, XLenVT)); 3048 SDValue Mask = 3049 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 3050 // Set the vector length to only the number of elements we care about. Note 3051 // that for slideup this includes the offset. 3052 SDValue VL = 3053 DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT); 3054 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 3055 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, 3056 SubVec, SlideupAmt, Mask, VL); 3057 if (VecVT.isFixedLengthVector()) 3058 Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 3059 return DAG.getBitcast(Op.getValueType(), Slideup); 3060 } 3061 3062 unsigned SubRegIdx, RemIdx; 3063 std::tie(SubRegIdx, RemIdx) = 3064 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 3065 VecVT, SubVecVT, OrigIdx, TRI); 3066 3067 RISCVVLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT); 3068 bool IsSubVecPartReg = SubVecLMUL == RISCVVLMUL::LMUL_F2 || 3069 SubVecLMUL == RISCVVLMUL::LMUL_F4 || 3070 SubVecLMUL == RISCVVLMUL::LMUL_F8; 3071 3072 // 1. If the Idx has been completely eliminated and this subvector's size is 3073 // a vector register or a multiple thereof, or the surrounding elements are 3074 // undef, then this is a subvector insert which naturally aligns to a vector 3075 // register. These can easily be handled using subregister manipulation. 3076 // 2. If the subvector is smaller than a vector register, then the insertion 3077 // must preserve the undisturbed elements of the register. We do this by 3078 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type 3079 // (which resolves to a subregister copy), performing a VSLIDEUP to place the 3080 // subvector within the vector register, and an INSERT_SUBVECTOR of that 3081 // LMUL=1 type back into the larger vector (resolving to another subregister 3082 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type 3083 // to avoid allocating a large register group to hold our subvector. 3084 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef())) 3085 return Op; 3086 3087 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements 3088 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy 3089 // (in our case undisturbed). This means we can set up a subvector insertion 3090 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the 3091 // size of the subvector. 3092 MVT InterSubVT = VecVT; 3093 SDValue AlignedExtract = Vec; 3094 unsigned AlignedIdx = OrigIdx - RemIdx; 3095 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 3096 InterSubVT = getLMUL1VT(VecVT); 3097 // Extract a subvector equal to the nearest full vector register type. This 3098 // should resolve to a EXTRACT_SUBREG instruction. 3099 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 3100 DAG.getConstant(AlignedIdx, DL, XLenVT)); 3101 } 3102 3103 SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT); 3104 // For scalable vectors this must be further multiplied by vscale. 3105 SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt); 3106 3107 SDValue Mask, VL; 3108 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 3109 3110 // Construct the vector length corresponding to RemIdx + length(SubVecVT). 3111 VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT); 3112 VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL); 3113 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL); 3114 3115 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT, 3116 DAG.getUNDEF(InterSubVT), SubVec, 3117 DAG.getConstant(0, DL, XLenVT)); 3118 3119 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT, 3120 AlignedExtract, SubVec, SlideupAmt, Mask, VL); 3121 3122 // If required, insert this subvector back into the correct vector register. 3123 // This should resolve to an INSERT_SUBREG instruction. 3124 if (VecVT.bitsGT(InterSubVT)) 3125 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup, 3126 DAG.getConstant(AlignedIdx, DL, XLenVT)); 3127 3128 // We might have bitcast from a mask type: cast back to the original type if 3129 // required. 3130 return DAG.getBitcast(Op.getSimpleValueType(), Slideup); 3131 } 3132 3133 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, 3134 SelectionDAG &DAG) const { 3135 SDValue Vec = Op.getOperand(0); 3136 MVT SubVecVT = Op.getSimpleValueType(); 3137 MVT VecVT = Vec.getSimpleValueType(); 3138 3139 SDLoc DL(Op); 3140 MVT XLenVT = Subtarget.getXLenVT(); 3141 unsigned OrigIdx = Op.getConstantOperandVal(1); 3142 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 3143 3144 // We don't have the ability to slide mask vectors down indexed by their i1 3145 // elements; the smallest we can do is i8. Often we are able to bitcast to 3146 // equivalent i8 vectors. Note that when extracting a fixed-length vector 3147 // from a scalable one, we might not necessarily have enough scalable 3148 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. 3149 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) { 3150 if (VecVT.getVectorMinNumElements() >= 8 && 3151 SubVecVT.getVectorMinNumElements() >= 8) { 3152 assert(OrigIdx % 8 == 0 && "Invalid index"); 3153 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 3154 SubVecVT.getVectorMinNumElements() % 8 == 0 && 3155 "Unexpected mask vector lowering"); 3156 OrigIdx /= 8; 3157 SubVecVT = 3158 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 3159 SubVecVT.isScalableVector()); 3160 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 3161 VecVT.isScalableVector()); 3162 Vec = DAG.getBitcast(VecVT, Vec); 3163 } else { 3164 // We can't slide this mask vector down, indexed by its i1 elements. 3165 // This poses a problem when we wish to extract a scalable vector which 3166 // can't be re-expressed as a larger type. Just choose the slow path and 3167 // extend to a larger type, then truncate back down. 3168 // TODO: We could probably improve this when extracting certain fixed 3169 // from fixed, where we can extract as i8 and shift the correct element 3170 // right to reach the desired subvector? 3171 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 3172 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 3173 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 3174 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec, 3175 Op.getOperand(1)); 3176 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT); 3177 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE); 3178 } 3179 } 3180 3181 // If the subvector vector is a fixed-length type, we cannot use subregister 3182 // manipulation to simplify the codegen; we don't know which register of a 3183 // LMUL group contains the specific subvector as we only know the minimum 3184 // register size. Therefore we must slide the vector group down the full 3185 // amount. 3186 if (SubVecVT.isFixedLengthVector()) { 3187 // With an index of 0 this is a cast-like subvector, which can be performed 3188 // with subregister operations. 3189 if (OrigIdx == 0) 3190 return Op; 3191 MVT ContainerVT = VecVT; 3192 if (VecVT.isFixedLengthVector()) { 3193 ContainerVT = getContainerForFixedLengthVector(VecVT); 3194 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3195 } 3196 SDValue Mask = 3197 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 3198 // Set the vector length to only the number of elements we care about. This 3199 // avoids sliding down elements we're going to discard straight away. 3200 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT); 3201 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 3202 SDValue Slidedown = 3203 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 3204 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL); 3205 // Now we can use a cast-like subvector extract to get the result. 3206 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 3207 DAG.getConstant(0, DL, XLenVT)); 3208 return DAG.getBitcast(Op.getValueType(), Slidedown); 3209 } 3210 3211 unsigned SubRegIdx, RemIdx; 3212 std::tie(SubRegIdx, RemIdx) = 3213 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 3214 VecVT, SubVecVT, OrigIdx, TRI); 3215 3216 // If the Idx has been completely eliminated then this is a subvector extract 3217 // which naturally aligns to a vector register. These can easily be handled 3218 // using subregister manipulation. 3219 if (RemIdx == 0) 3220 return Op; 3221 3222 // Else we must shift our vector register directly to extract the subvector. 3223 // Do this using VSLIDEDOWN. 3224 3225 // If the vector type is an LMUL-group type, extract a subvector equal to the 3226 // nearest full vector register type. This should resolve to a EXTRACT_SUBREG 3227 // instruction. 3228 MVT InterSubVT = VecVT; 3229 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 3230 InterSubVT = getLMUL1VT(VecVT); 3231 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 3232 DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT)); 3233 } 3234 3235 // Slide this vector register down by the desired number of elements in order 3236 // to place the desired subvector starting at element 0. 3237 SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT); 3238 // For scalable vectors this must be further multiplied by vscale. 3239 SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt); 3240 3241 SDValue Mask, VL; 3242 std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); 3243 SDValue Slidedown = 3244 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT, 3245 DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL); 3246 3247 // Now the vector is in the right position, extract our final subvector. This 3248 // should resolve to a COPY. 3249 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 3250 DAG.getConstant(0, DL, XLenVT)); 3251 3252 // We might have bitcast from a mask type: cast back to the original type if 3253 // required. 3254 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown); 3255 } 3256 3257 // Implement vector_reverse using vrgather.vv with indices determined by 3258 // subtracting the id of each element from (VLMAX-1). This will convert 3259 // the indices like so: 3260 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). 3261 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. 3262 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, 3263 SelectionDAG &DAG) const { 3264 SDLoc DL(Op); 3265 MVT VecVT = Op.getSimpleValueType(); 3266 unsigned EltSize = VecVT.getScalarSizeInBits(); 3267 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); 3268 3269 unsigned MaxVLMAX = 0; 3270 unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits(); 3271 if (VectorBitsMax != 0) 3272 MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; 3273 3274 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; 3275 MVT IntVT = VecVT.changeVectorElementTypeToInteger(); 3276 3277 // If this is SEW=8 and VLMAX is unknown or more than 256, we need 3278 // to use vrgatherei16.vv. 3279 // TODO: It's also possible to use vrgatherei16.vv for other types to 3280 // decrease register width for the index calculation. 3281 if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) { 3282 // If this is LMUL=8, we have to split before can use vrgatherei16.vv. 3283 // Reverse each half, then reassemble them in reverse order. 3284 // NOTE: It's also possible that after splitting that VLMAX no longer 3285 // requires vrgatherei16.vv. 3286 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { 3287 SDValue Lo, Hi; 3288 std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0); 3289 EVT LoVT, HiVT; 3290 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); 3291 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); 3292 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); 3293 // Reassemble the low and high pieces reversed. 3294 // FIXME: This is a CONCAT_VECTORS. 3295 SDValue Res = 3296 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi, 3297 DAG.getIntPtrConstant(0, DL)); 3298 return DAG.getNode( 3299 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo, 3300 DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL)); 3301 } 3302 3303 // Just promote the int type to i16 which will double the LMUL. 3304 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount()); 3305 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; 3306 } 3307 3308 MVT XLenVT = Subtarget.getXLenVT(); 3309 SDValue Mask, VL; 3310 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 3311 3312 // Calculate VLMAX-1 for the desired SEW. 3313 unsigned MinElts = VecVT.getVectorMinNumElements(); 3314 SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, 3315 DAG.getConstant(MinElts, DL, XLenVT)); 3316 SDValue VLMinus1 = 3317 DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT)); 3318 3319 // Splat VLMAX-1 taking care to handle SEW==64 on RV32. 3320 bool IsRV32E64 = 3321 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64; 3322 SDValue SplatVL; 3323 if (!IsRV32E64) 3324 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1); 3325 else 3326 SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1); 3327 3328 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL); 3329 SDValue Indices = 3330 DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL); 3331 3332 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL); 3333 } 3334 3335 SDValue 3336 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, 3337 SelectionDAG &DAG) const { 3338 auto *Load = cast<LoadSDNode>(Op); 3339 3340 SDLoc DL(Op); 3341 MVT VT = Op.getSimpleValueType(); 3342 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3343 3344 SDValue VL = 3345 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 3346 3347 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 3348 SDValue NewLoad = DAG.getMemIntrinsicNode( 3349 RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL}, 3350 Load->getMemoryVT(), Load->getMemOperand()); 3351 3352 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 3353 return DAG.getMergeValues({Result, Load->getChain()}, DL); 3354 } 3355 3356 SDValue 3357 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, 3358 SelectionDAG &DAG) const { 3359 auto *Store = cast<StoreSDNode>(Op); 3360 3361 SDLoc DL(Op); 3362 MVT VT = Store->getValue().getSimpleValueType(); 3363 3364 // FIXME: We probably need to zero any extra bits in a byte for mask stores. 3365 // This is tricky to do. 3366 3367 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3368 3369 SDValue VL = 3370 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 3371 3372 SDValue NewValue = 3373 convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget); 3374 return DAG.getMemIntrinsicNode( 3375 RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other), 3376 {Store->getChain(), NewValue, Store->getBasePtr(), VL}, 3377 Store->getMemoryVT(), Store->getMemOperand()); 3378 } 3379 3380 SDValue RISCVTargetLowering::lowerMLOAD(SDValue Op, SelectionDAG &DAG) const { 3381 auto *Load = cast<MaskedLoadSDNode>(Op); 3382 3383 SDLoc DL(Op); 3384 MVT VT = Op.getSimpleValueType(); 3385 MVT XLenVT = Subtarget.getXLenVT(); 3386 3387 SDValue Mask = Load->getMask(); 3388 SDValue PassThru = Load->getPassThru(); 3389 SDValue VL; 3390 3391 MVT ContainerVT = VT; 3392 if (VT.isFixedLengthVector()) { 3393 ContainerVT = getContainerForFixedLengthVector(VT); 3394 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3395 3396 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 3397 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 3398 VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 3399 } else 3400 VL = DAG.getRegister(RISCV::X0, XLenVT); 3401 3402 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 3403 SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vle_mask, DL, XLenVT); 3404 SDValue Ops[] = {Load->getChain(), IntID, PassThru, 3405 Load->getBasePtr(), Mask, VL}; 3406 SDValue Result = 3407 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 3408 Load->getMemoryVT(), Load->getMemOperand()); 3409 SDValue Chain = Result.getValue(1); 3410 3411 if (VT.isFixedLengthVector()) 3412 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 3413 3414 return DAG.getMergeValues({Result, Chain}, DL); 3415 } 3416 3417 SDValue RISCVTargetLowering::lowerMSTORE(SDValue Op, SelectionDAG &DAG) const { 3418 auto *Store = cast<MaskedStoreSDNode>(Op); 3419 3420 SDLoc DL(Op); 3421 SDValue Val = Store->getValue(); 3422 SDValue Mask = Store->getMask(); 3423 MVT VT = Val.getSimpleValueType(); 3424 MVT XLenVT = Subtarget.getXLenVT(); 3425 SDValue VL; 3426 3427 MVT ContainerVT = VT; 3428 if (VT.isFixedLengthVector()) { 3429 ContainerVT = getContainerForFixedLengthVector(VT); 3430 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3431 3432 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 3433 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 3434 VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 3435 } else 3436 VL = DAG.getRegister(RISCV::X0, XLenVT); 3437 3438 SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vse_mask, DL, XLenVT); 3439 return DAG.getMemIntrinsicNode( 3440 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), 3441 {Store->getChain(), IntID, Val, Store->getBasePtr(), Mask, VL}, 3442 Store->getMemoryVT(), Store->getMemOperand()); 3443 } 3444 3445 SDValue 3446 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, 3447 SelectionDAG &DAG) const { 3448 MVT InVT = Op.getOperand(0).getSimpleValueType(); 3449 MVT ContainerVT = getContainerForFixedLengthVector(InVT); 3450 3451 MVT VT = Op.getSimpleValueType(); 3452 3453 SDValue Op1 = 3454 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 3455 SDValue Op2 = 3456 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 3457 3458 SDLoc DL(Op); 3459 SDValue VL = 3460 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 3461 3462 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 3463 3464 bool Invert = false; 3465 Optional<unsigned> LogicOpc; 3466 if (ContainerVT.isFloatingPoint()) { 3467 bool Swap = false; 3468 switch (CC) { 3469 default: 3470 break; 3471 case ISD::SETULE: 3472 case ISD::SETULT: 3473 Swap = true; 3474 LLVM_FALLTHROUGH; 3475 case ISD::SETUGE: 3476 case ISD::SETUGT: 3477 CC = getSetCCInverse(CC, ContainerVT); 3478 Invert = true; 3479 break; 3480 case ISD::SETOGE: 3481 case ISD::SETOGT: 3482 case ISD::SETGE: 3483 case ISD::SETGT: 3484 Swap = true; 3485 break; 3486 case ISD::SETUEQ: 3487 // Use !((OLT Op1, Op2) || (OLT Op2, Op1)) 3488 Invert = true; 3489 LogicOpc = RISCVISD::VMOR_VL; 3490 CC = ISD::SETOLT; 3491 break; 3492 case ISD::SETONE: 3493 // Use ((OLT Op1, Op2) || (OLT Op2, Op1)) 3494 LogicOpc = RISCVISD::VMOR_VL; 3495 CC = ISD::SETOLT; 3496 break; 3497 case ISD::SETO: 3498 // Use (OEQ Op1, Op1) && (OEQ Op2, Op2) 3499 LogicOpc = RISCVISD::VMAND_VL; 3500 CC = ISD::SETOEQ; 3501 break; 3502 case ISD::SETUO: 3503 // Use (UNE Op1, Op1) || (UNE Op2, Op2) 3504 LogicOpc = RISCVISD::VMOR_VL; 3505 CC = ISD::SETUNE; 3506 break; 3507 } 3508 3509 if (Swap) { 3510 CC = getSetCCSwappedOperands(CC); 3511 std::swap(Op1, Op2); 3512 } 3513 } 3514 3515 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3516 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 3517 3518 // There are 3 cases we need to emit. 3519 // 1. For (OEQ Op1, Op1) && (OEQ Op2, Op2) or (UNE Op1, Op1) || (UNE Op2, Op2) 3520 // we need to compare each operand with itself. 3521 // 2. For (OLT Op1, Op2) || (OLT Op2, Op1) we need to compare Op1 and Op2 in 3522 // both orders. 3523 // 3. For any other case we just need one compare with Op1 and Op2. 3524 SDValue Cmp; 3525 if (LogicOpc && (CC == ISD::SETOEQ || CC == ISD::SETUNE)) { 3526 Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op1, 3527 DAG.getCondCode(CC), Mask, VL); 3528 SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op2, 3529 DAG.getCondCode(CC), Mask, VL); 3530 Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL); 3531 } else { 3532 Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2, 3533 DAG.getCondCode(CC), Mask, VL); 3534 if (LogicOpc) { 3535 SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op1, 3536 DAG.getCondCode(CC), Mask, VL); 3537 Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL); 3538 } 3539 } 3540 3541 if (Invert) { 3542 SDValue AllOnes = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 3543 Cmp = DAG.getNode(RISCVISD::VMXOR_VL, DL, MaskVT, Cmp, AllOnes, VL); 3544 } 3545 3546 return convertFromScalableVector(VT, Cmp, DAG, Subtarget); 3547 } 3548 3549 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV( 3550 SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const { 3551 MVT VT = Op.getSimpleValueType(); 3552 3553 if (VT.getVectorElementType() == MVT::i1) 3554 return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false); 3555 3556 return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true); 3557 } 3558 3559 // Lower vector ABS to smax(X, sub(0, X)). 3560 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { 3561 SDLoc DL(Op); 3562 MVT VT = Op.getSimpleValueType(); 3563 SDValue X = Op.getOperand(0); 3564 3565 assert(VT.isFixedLengthVector() && "Unexpected type"); 3566 3567 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3568 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); 3569 3570 SDValue Mask, VL; 3571 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3572 3573 SDValue SplatZero = 3574 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 3575 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 3576 SDValue NegX = 3577 DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL); 3578 SDValue Max = 3579 DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL); 3580 3581 return convertFromScalableVector(VT, Max, DAG, Subtarget); 3582 } 3583 3584 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV( 3585 SDValue Op, SelectionDAG &DAG) const { 3586 SDLoc DL(Op); 3587 MVT VT = Op.getSimpleValueType(); 3588 SDValue Mag = Op.getOperand(0); 3589 SDValue Sign = Op.getOperand(1); 3590 assert(Mag.getValueType() == Sign.getValueType() && 3591 "Can only handle COPYSIGN with matching types."); 3592 3593 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3594 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget); 3595 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget); 3596 3597 SDValue Mask, VL; 3598 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3599 3600 SDValue CopySign = 3601 DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL); 3602 3603 return convertFromScalableVector(VT, CopySign, DAG, Subtarget); 3604 } 3605 3606 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( 3607 SDValue Op, SelectionDAG &DAG) const { 3608 MVT VT = Op.getSimpleValueType(); 3609 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3610 3611 MVT I1ContainerVT = 3612 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3613 3614 SDValue CC = 3615 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget); 3616 SDValue Op1 = 3617 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 3618 SDValue Op2 = 3619 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget); 3620 3621 SDLoc DL(Op); 3622 SDValue Mask, VL; 3623 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3624 3625 SDValue Select = 3626 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL); 3627 3628 return convertFromScalableVector(VT, Select, DAG, Subtarget); 3629 } 3630 3631 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, 3632 unsigned NewOpc, 3633 bool HasMask) const { 3634 MVT VT = Op.getSimpleValueType(); 3635 assert(useRVVForFixedLengthVectorVT(VT) && 3636 "Only expected to lower fixed length vector operation!"); 3637 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3638 3639 // Create list of operands by converting existing ones to scalable types. 3640 SmallVector<SDValue, 6> Ops; 3641 for (const SDValue &V : Op->op_values()) { 3642 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 3643 3644 // Pass through non-vector operands. 3645 if (!V.getValueType().isVector()) { 3646 Ops.push_back(V); 3647 continue; 3648 } 3649 3650 // "cast" fixed length vector to a scalable vector. 3651 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && 3652 "Only fixed length vectors are supported!"); 3653 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 3654 } 3655 3656 SDLoc DL(Op); 3657 SDValue Mask, VL; 3658 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3659 if (HasMask) 3660 Ops.push_back(Mask); 3661 Ops.push_back(VL); 3662 3663 SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops); 3664 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 3665 } 3666 3667 // Custom lower MGATHER to a legalized form for RVV. It will then be matched to 3668 // a RVV indexed load. The RVV indexed load instructions only support the 3669 // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or 3670 // truncated to XLEN and are treated as byte offsets. Any signed or scaled 3671 // indexing is extended to the XLEN value type and scaled accordingly. 3672 SDValue RISCVTargetLowering::lowerMGATHER(SDValue Op, SelectionDAG &DAG) const { 3673 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode()); 3674 SDLoc DL(Op); 3675 3676 SDValue Index = MGN->getIndex(); 3677 SDValue Mask = MGN->getMask(); 3678 SDValue PassThru = MGN->getPassThru(); 3679 3680 MVT VT = Op.getSimpleValueType(); 3681 MVT IndexVT = Index.getSimpleValueType(); 3682 MVT XLenVT = Subtarget.getXLenVT(); 3683 3684 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 3685 "Unexpected VTs!"); 3686 assert(MGN->getBasePtr().getSimpleValueType() == XLenVT && 3687 "Unexpected pointer type"); 3688 // Targets have to explicitly opt-in for extending vector loads. 3689 assert(MGN->getExtensionType() == ISD::NON_EXTLOAD && 3690 "Unexpected extending MGATHER"); 3691 3692 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 3693 // the selection of the masked intrinsics doesn't do this for us. 3694 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 3695 3696 SDValue VL; 3697 MVT ContainerVT = VT; 3698 if (VT.isFixedLengthVector()) { 3699 // We need to use the larger of the result and index type to determine the 3700 // scalable type to use so we don't increase LMUL for any operand/result. 3701 if (VT.bitsGE(IndexVT)) { 3702 ContainerVT = getContainerForFixedLengthVector(VT); 3703 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 3704 ContainerVT.getVectorElementCount()); 3705 } else { 3706 IndexVT = getContainerForFixedLengthVector(IndexVT); 3707 ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(), 3708 IndexVT.getVectorElementCount()); 3709 } 3710 3711 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 3712 3713 if (!IsUnmasked) { 3714 MVT MaskVT = 3715 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3716 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 3717 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 3718 } 3719 3720 VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 3721 } else 3722 VL = DAG.getRegister(RISCV::X0, XLenVT); 3723 3724 unsigned IntID = 3725 IsUnmasked ? Intrinsic::riscv_vloxei : Intrinsic::riscv_vloxei_mask; 3726 SmallVector<SDValue, 8> Ops{MGN->getChain(), 3727 DAG.getTargetConstant(IntID, DL, XLenVT)}; 3728 if (!IsUnmasked) 3729 Ops.push_back(PassThru); 3730 Ops.push_back(MGN->getBasePtr()); 3731 Ops.push_back(Index); 3732 if (!IsUnmasked) 3733 Ops.push_back(Mask); 3734 Ops.push_back(VL); 3735 3736 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 3737 SDValue Result = 3738 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 3739 MGN->getMemoryVT(), MGN->getMemOperand()); 3740 SDValue Chain = Result.getValue(1); 3741 3742 if (VT.isFixedLengthVector()) 3743 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 3744 3745 return DAG.getMergeValues({Result, Chain}, DL); 3746 } 3747 3748 // Custom lower MSCATTER to a legalized form for RVV. It will then be matched to 3749 // a RVV indexed store. The RVV indexed store instructions only support the 3750 // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or 3751 // truncated to XLEN and are treated as byte offsets. Any signed or scaled 3752 // indexing is extended to the XLEN value type and scaled accordingly. 3753 SDValue RISCVTargetLowering::lowerMSCATTER(SDValue Op, 3754 SelectionDAG &DAG) const { 3755 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode()); 3756 SDLoc DL(Op); 3757 SDValue Index = MSN->getIndex(); 3758 SDValue Mask = MSN->getMask(); 3759 SDValue Val = MSN->getValue(); 3760 3761 MVT VT = Val.getSimpleValueType(); 3762 MVT IndexVT = Index.getSimpleValueType(); 3763 MVT XLenVT = Subtarget.getXLenVT(); 3764 3765 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 3766 "Unexpected VTs!"); 3767 assert(MSN->getBasePtr().getSimpleValueType() == XLenVT && 3768 "Unexpected pointer type"); 3769 // Targets have to explicitly opt-in for extending vector loads and 3770 // truncating vector stores. 3771 assert(!MSN->isTruncatingStore() && "Unexpected extending MSCATTER"); 3772 3773 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 3774 // the selection of the masked intrinsics doesn't do this for us. 3775 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 3776 3777 SDValue VL; 3778 if (VT.isFixedLengthVector()) { 3779 // We need to use the larger of the value and index type to determine the 3780 // scalable type to use so we don't increase LMUL for any operand/result. 3781 if (VT.bitsGE(IndexVT)) { 3782 VT = getContainerForFixedLengthVector(VT); 3783 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 3784 VT.getVectorElementCount()); 3785 } else { 3786 IndexVT = getContainerForFixedLengthVector(IndexVT); 3787 VT = MVT::getVectorVT(VT.getVectorElementType(), 3788 IndexVT.getVectorElementCount()); 3789 } 3790 3791 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 3792 Val = convertToScalableVector(VT, Val, DAG, Subtarget); 3793 3794 if (!IsUnmasked) { 3795 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); 3796 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 3797 } 3798 3799 VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 3800 } else 3801 VL = DAG.getRegister(RISCV::X0, XLenVT); 3802 3803 unsigned IntID = 3804 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask; 3805 SmallVector<SDValue, 8> Ops{MSN->getChain(), 3806 DAG.getTargetConstant(IntID, DL, XLenVT)}; 3807 Ops.push_back(Val); 3808 Ops.push_back(MSN->getBasePtr()); 3809 Ops.push_back(Index); 3810 if (!IsUnmasked) 3811 Ops.push_back(Mask); 3812 Ops.push_back(VL); 3813 3814 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, MSN->getVTList(), Ops, 3815 MSN->getMemoryVT(), MSN->getMemOperand()); 3816 } 3817 3818 // Returns the opcode of the target-specific SDNode that implements the 32-bit 3819 // form of the given Opcode. 3820 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 3821 switch (Opcode) { 3822 default: 3823 llvm_unreachable("Unexpected opcode"); 3824 case ISD::SHL: 3825 return RISCVISD::SLLW; 3826 case ISD::SRA: 3827 return RISCVISD::SRAW; 3828 case ISD::SRL: 3829 return RISCVISD::SRLW; 3830 case ISD::SDIV: 3831 return RISCVISD::DIVW; 3832 case ISD::UDIV: 3833 return RISCVISD::DIVUW; 3834 case ISD::UREM: 3835 return RISCVISD::REMUW; 3836 case ISD::ROTL: 3837 return RISCVISD::ROLW; 3838 case ISD::ROTR: 3839 return RISCVISD::RORW; 3840 case RISCVISD::GREVI: 3841 return RISCVISD::GREVIW; 3842 case RISCVISD::GORCI: 3843 return RISCVISD::GORCIW; 3844 } 3845 } 3846 3847 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 3848 // Because i32 isn't a legal type for RV64, these operations would otherwise 3849 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 3850 // later one because the fact the operation was originally of type i32 is 3851 // lost. 3852 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 3853 unsigned ExtOpc = ISD::ANY_EXTEND) { 3854 SDLoc DL(N); 3855 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 3856 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 3857 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 3858 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 3859 // ReplaceNodeResults requires we maintain the same type for the return value. 3860 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 3861 } 3862 3863 // Converts the given 32-bit operation to a i64 operation with signed extension 3864 // semantic to reduce the signed extension instructions. 3865 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 3866 SDLoc DL(N); 3867 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3868 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 3869 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 3870 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 3871 DAG.getValueType(MVT::i32)); 3872 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 3873 } 3874 3875 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 3876 SmallVectorImpl<SDValue> &Results, 3877 SelectionDAG &DAG) const { 3878 SDLoc DL(N); 3879 switch (N->getOpcode()) { 3880 default: 3881 llvm_unreachable("Don't know how to custom type legalize this operation!"); 3882 case ISD::STRICT_FP_TO_SINT: 3883 case ISD::STRICT_FP_TO_UINT: 3884 case ISD::FP_TO_SINT: 3885 case ISD::FP_TO_UINT: { 3886 bool IsStrict = N->isStrictFPOpcode(); 3887 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3888 "Unexpected custom legalisation"); 3889 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 3890 // If the FP type needs to be softened, emit a library call using the 'si' 3891 // version. If we left it to default legalization we'd end up with 'di'. If 3892 // the FP type doesn't need to be softened just let generic type 3893 // legalization promote the result type. 3894 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 3895 TargetLowering::TypeSoftenFloat) 3896 return; 3897 RTLIB::Libcall LC; 3898 if (N->getOpcode() == ISD::FP_TO_SINT || 3899 N->getOpcode() == ISD::STRICT_FP_TO_SINT) 3900 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 3901 else 3902 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 3903 MakeLibCallOptions CallOptions; 3904 EVT OpVT = Op0.getValueType(); 3905 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 3906 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 3907 SDValue Result; 3908 std::tie(Result, Chain) = 3909 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 3910 Results.push_back(Result); 3911 if (IsStrict) 3912 Results.push_back(Chain); 3913 break; 3914 } 3915 case ISD::READCYCLECOUNTER: { 3916 assert(!Subtarget.is64Bit() && 3917 "READCYCLECOUNTER only has custom type legalization on riscv32"); 3918 3919 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 3920 SDValue RCW = 3921 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 3922 3923 Results.push_back( 3924 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 3925 Results.push_back(RCW.getValue(2)); 3926 break; 3927 } 3928 case ISD::ADD: 3929 case ISD::SUB: 3930 case ISD::MUL: 3931 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3932 "Unexpected custom legalisation"); 3933 if (N->getOperand(1).getOpcode() == ISD::Constant) 3934 return; 3935 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 3936 break; 3937 case ISD::SHL: 3938 case ISD::SRA: 3939 case ISD::SRL: 3940 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3941 "Unexpected custom legalisation"); 3942 if (N->getOperand(1).getOpcode() == ISD::Constant) 3943 return; 3944 Results.push_back(customLegalizeToWOp(N, DAG)); 3945 break; 3946 case ISD::ROTL: 3947 case ISD::ROTR: 3948 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3949 "Unexpected custom legalisation"); 3950 Results.push_back(customLegalizeToWOp(N, DAG)); 3951 break; 3952 case ISD::SDIV: 3953 case ISD::UDIV: 3954 case ISD::UREM: { 3955 MVT VT = N->getSimpleValueType(0); 3956 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 3957 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 3958 "Unexpected custom legalisation"); 3959 if (N->getOperand(0).getOpcode() == ISD::Constant || 3960 N->getOperand(1).getOpcode() == ISD::Constant) 3961 return; 3962 3963 // If the input is i32, use ANY_EXTEND since the W instructions don't read 3964 // the upper 32 bits. For other types we need to sign or zero extend 3965 // based on the opcode. 3966 unsigned ExtOpc = ISD::ANY_EXTEND; 3967 if (VT != MVT::i32) 3968 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 3969 : ISD::ZERO_EXTEND; 3970 3971 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 3972 break; 3973 } 3974 case ISD::UADDO: 3975 case ISD::USUBO: { 3976 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3977 "Unexpected custom legalisation"); 3978 bool IsAdd = N->getOpcode() == ISD::UADDO; 3979 // Create an ADDW or SUBW. 3980 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3981 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 3982 SDValue Res = 3983 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS); 3984 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, 3985 DAG.getValueType(MVT::i32)); 3986 3987 // Sign extend the LHS and perform an unsigned compare with the ADDW result. 3988 // Since the inputs are sign extended from i32, this is equivalent to 3989 // comparing the lower 32 bits. 3990 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 3991 SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS, 3992 IsAdd ? ISD::SETULT : ISD::SETUGT); 3993 3994 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 3995 Results.push_back(Overflow); 3996 return; 3997 } 3998 case ISD::UADDSAT: 3999 case ISD::USUBSAT: { 4000 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 4001 "Unexpected custom legalisation"); 4002 if (Subtarget.hasStdExtZbb()) { 4003 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using 4004 // sign extend allows overflow of the lower 32 bits to be detected on 4005 // the promoted size. 4006 SDValue LHS = 4007 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 4008 SDValue RHS = 4009 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); 4010 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS); 4011 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 4012 return; 4013 } 4014 4015 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom 4016 // promotion for UADDO/USUBO. 4017 Results.push_back(expandAddSubSat(N, DAG)); 4018 return; 4019 } 4020 case ISD::BITCAST: { 4021 assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 4022 Subtarget.hasStdExtF()) || 4023 (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) && 4024 "Unexpected custom legalisation"); 4025 SDValue Op0 = N->getOperand(0); 4026 if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) { 4027 if (Op0.getValueType() != MVT::f16) 4028 return; 4029 SDValue FPConv = 4030 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0); 4031 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 4032 } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 4033 Subtarget.hasStdExtF()) { 4034 if (Op0.getValueType() != MVT::f32) 4035 return; 4036 SDValue FPConv = 4037 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 4038 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 4039 } 4040 break; 4041 } 4042 case RISCVISD::GREVI: 4043 case RISCVISD::GORCI: { 4044 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 4045 "Unexpected custom legalisation"); 4046 // This is similar to customLegalizeToWOp, except that we pass the second 4047 // operand (a TargetConstant) straight through: it is already of type 4048 // XLenVT. 4049 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 4050 SDValue NewOp0 = 4051 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 4052 SDValue NewRes = 4053 DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1)); 4054 // ReplaceNodeResults requires we maintain the same type for the return 4055 // value. 4056 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 4057 break; 4058 } 4059 case RISCVISD::SHFLI: { 4060 // There is no SHFLIW instruction, but we can just promote the operation. 4061 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 4062 "Unexpected custom legalisation"); 4063 SDValue NewOp0 = 4064 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 4065 SDValue NewRes = 4066 DAG.getNode(RISCVISD::SHFLI, DL, MVT::i64, NewOp0, N->getOperand(1)); 4067 // ReplaceNodeResults requires we maintain the same type for the return 4068 // value. 4069 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 4070 break; 4071 } 4072 case ISD::BSWAP: 4073 case ISD::BITREVERSE: { 4074 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 4075 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 4076 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 4077 N->getOperand(0)); 4078 unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24; 4079 SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0, 4080 DAG.getTargetConstant(Imm, DL, 4081 Subtarget.getXLenVT())); 4082 // ReplaceNodeResults requires we maintain the same type for the return 4083 // value. 4084 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW)); 4085 break; 4086 } 4087 case ISD::FSHL: 4088 case ISD::FSHR: { 4089 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 4090 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); 4091 SDValue NewOp0 = 4092 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 4093 SDValue NewOp1 = 4094 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 4095 SDValue NewOp2 = 4096 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 4097 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. 4098 // Mask the shift amount to 5 bits. 4099 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, 4100 DAG.getConstant(0x1f, DL, MVT::i64)); 4101 unsigned Opc = 4102 N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; 4103 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); 4104 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); 4105 break; 4106 } 4107 case ISD::EXTRACT_VECTOR_ELT: { 4108 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 4109 // type is illegal (currently only vXi64 RV32). 4110 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 4111 // transferred to the destination register. We issue two of these from the 4112 // upper- and lower- halves of the SEW-bit vector element, slid down to the 4113 // first element. 4114 SDValue Vec = N->getOperand(0); 4115 SDValue Idx = N->getOperand(1); 4116 4117 // The vector type hasn't been legalized yet so we can't issue target 4118 // specific nodes if it needs legalization. 4119 // FIXME: We would manually legalize if it's important. 4120 if (!isTypeLegal(Vec.getValueType())) 4121 return; 4122 4123 MVT VecVT = Vec.getSimpleValueType(); 4124 4125 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 4126 VecVT.getVectorElementType() == MVT::i64 && 4127 "Unexpected EXTRACT_VECTOR_ELT legalization"); 4128 4129 // If this is a fixed vector, we need to convert it to a scalable vector. 4130 MVT ContainerVT = VecVT; 4131 if (VecVT.isFixedLengthVector()) { 4132 ContainerVT = getContainerForFixedLengthVector(VecVT); 4133 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 4134 } 4135 4136 MVT XLenVT = Subtarget.getXLenVT(); 4137 4138 // Use a VL of 1 to avoid processing more elements than we need. 4139 MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); 4140 SDValue VL = DAG.getConstant(1, DL, XLenVT); 4141 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 4142 4143 // Unless the index is known to be 0, we must slide the vector down to get 4144 // the desired element into index 0. 4145 if (!isNullConstant(Idx)) { 4146 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 4147 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 4148 } 4149 4150 // Extract the lower XLEN bits of the correct vector element. 4151 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 4152 4153 // To extract the upper XLEN bits of the vector element, shift the first 4154 // element right by 32 bits and re-extract the lower XLEN bits. 4155 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 4156 DAG.getConstant(32, DL, XLenVT), VL); 4157 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, 4158 ThirtyTwoV, Mask, VL); 4159 4160 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 4161 4162 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 4163 break; 4164 } 4165 case ISD::INTRINSIC_WO_CHAIN: { 4166 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 4167 switch (IntNo) { 4168 default: 4169 llvm_unreachable( 4170 "Don't know how to custom type legalize this intrinsic!"); 4171 case Intrinsic::riscv_vmv_x_s: { 4172 EVT VT = N->getValueType(0); 4173 MVT XLenVT = Subtarget.getXLenVT(); 4174 if (VT.bitsLT(XLenVT)) { 4175 // Simple case just extract using vmv.x.s and truncate. 4176 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 4177 Subtarget.getXLenVT(), N->getOperand(1)); 4178 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 4179 return; 4180 } 4181 4182 assert(VT == MVT::i64 && !Subtarget.is64Bit() && 4183 "Unexpected custom legalization"); 4184 4185 // We need to do the move in two steps. 4186 SDValue Vec = N->getOperand(1); 4187 MVT VecVT = Vec.getSimpleValueType(); 4188 4189 // First extract the lower XLEN bits of the element. 4190 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 4191 4192 // To extract the upper XLEN bits of the vector element, shift the first 4193 // element right by 32 bits and re-extract the lower XLEN bits. 4194 SDValue VL = DAG.getConstant(1, DL, XLenVT); 4195 MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); 4196 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 4197 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, 4198 DAG.getConstant(32, DL, XLenVT), VL); 4199 SDValue LShr32 = 4200 DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL); 4201 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 4202 4203 Results.push_back( 4204 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 4205 break; 4206 } 4207 } 4208 break; 4209 } 4210 case ISD::VECREDUCE_ADD: 4211 case ISD::VECREDUCE_AND: 4212 case ISD::VECREDUCE_OR: 4213 case ISD::VECREDUCE_XOR: 4214 case ISD::VECREDUCE_SMAX: 4215 case ISD::VECREDUCE_UMAX: 4216 case ISD::VECREDUCE_SMIN: 4217 case ISD::VECREDUCE_UMIN: 4218 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG)) 4219 Results.push_back(V); 4220 break; 4221 } 4222 } 4223 4224 // A structure to hold one of the bit-manipulation patterns below. Together, a 4225 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source: 4226 // (or (and (shl x, 1), 0xAAAAAAAA), 4227 // (and (srl x, 1), 0x55555555)) 4228 struct RISCVBitmanipPat { 4229 SDValue Op; 4230 unsigned ShAmt; 4231 bool IsSHL; 4232 4233 bool formsPairWith(const RISCVBitmanipPat &Other) const { 4234 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; 4235 } 4236 }; 4237 4238 // Matches patterns of the form 4239 // (and (shl x, C2), (C1 << C2)) 4240 // (and (srl x, C2), C1) 4241 // (shl (and x, C1), C2) 4242 // (srl (and x, (C1 << C2)), C2) 4243 // Where C2 is a power of 2 and C1 has at least that many leading zeroes. 4244 // The expected masks for each shift amount are specified in BitmanipMasks where 4245 // BitmanipMasks[log2(C2)] specifies the expected C1 value. 4246 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether 4247 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible 4248 // XLen is 64. 4249 static Optional<RISCVBitmanipPat> 4250 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) { 4251 assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) && 4252 "Unexpected number of masks"); 4253 Optional<uint64_t> Mask; 4254 // Optionally consume a mask around the shift operation. 4255 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { 4256 Mask = Op.getConstantOperandVal(1); 4257 Op = Op.getOperand(0); 4258 } 4259 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) 4260 return None; 4261 bool IsSHL = Op.getOpcode() == ISD::SHL; 4262 4263 if (!isa<ConstantSDNode>(Op.getOperand(1))) 4264 return None; 4265 uint64_t ShAmt = Op.getConstantOperandVal(1); 4266 4267 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 4268 if (ShAmt >= Width && !isPowerOf2_64(ShAmt)) 4269 return None; 4270 // If we don't have enough masks for 64 bit, then we must be trying to 4271 // match SHFL so we're only allowed to shift 1/4 of the width. 4272 if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2)) 4273 return None; 4274 4275 SDValue Src = Op.getOperand(0); 4276 4277 // The expected mask is shifted left when the AND is found around SHL 4278 // patterns. 4279 // ((x >> 1) & 0x55555555) 4280 // ((x << 1) & 0xAAAAAAAA) 4281 bool SHLExpMask = IsSHL; 4282 4283 if (!Mask) { 4284 // Sometimes LLVM keeps the mask as an operand of the shift, typically when 4285 // the mask is all ones: consume that now. 4286 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { 4287 Mask = Src.getConstantOperandVal(1); 4288 Src = Src.getOperand(0); 4289 // The expected mask is now in fact shifted left for SRL, so reverse the 4290 // decision. 4291 // ((x & 0xAAAAAAAA) >> 1) 4292 // ((x & 0x55555555) << 1) 4293 SHLExpMask = !SHLExpMask; 4294 } else { 4295 // Use a default shifted mask of all-ones if there's no AND, truncated 4296 // down to the expected width. This simplifies the logic later on. 4297 Mask = maskTrailingOnes<uint64_t>(Width); 4298 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); 4299 } 4300 } 4301 4302 unsigned MaskIdx = Log2_32(ShAmt); 4303 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 4304 4305 if (SHLExpMask) 4306 ExpMask <<= ShAmt; 4307 4308 if (Mask != ExpMask) 4309 return None; 4310 4311 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; 4312 } 4313 4314 // Matches any of the following bit-manipulation patterns: 4315 // (and (shl x, 1), (0x55555555 << 1)) 4316 // (and (srl x, 1), 0x55555555) 4317 // (shl (and x, 0x55555555), 1) 4318 // (srl (and x, (0x55555555 << 1)), 1) 4319 // where the shift amount and mask may vary thus: 4320 // [1] = 0x55555555 / 0xAAAAAAAA 4321 // [2] = 0x33333333 / 0xCCCCCCCC 4322 // [4] = 0x0F0F0F0F / 0xF0F0F0F0 4323 // [8] = 0x00FF00FF / 0xFF00FF00 4324 // [16] = 0x0000FFFF / 0xFFFFFFFF 4325 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) 4326 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) { 4327 // These are the unshifted masks which we use to match bit-manipulation 4328 // patterns. They may be shifted left in certain circumstances. 4329 static const uint64_t BitmanipMasks[] = { 4330 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 4331 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; 4332 4333 return matchRISCVBitmanipPat(Op, BitmanipMasks); 4334 } 4335 4336 // Match the following pattern as a GREVI(W) operation 4337 // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) 4338 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, 4339 const RISCVSubtarget &Subtarget) { 4340 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 4341 EVT VT = Op.getValueType(); 4342 4343 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 4344 auto LHS = matchGREVIPat(Op.getOperand(0)); 4345 auto RHS = matchGREVIPat(Op.getOperand(1)); 4346 if (LHS && RHS && LHS->formsPairWith(*RHS)) { 4347 SDLoc DL(Op); 4348 return DAG.getNode( 4349 RISCVISD::GREVI, DL, VT, LHS->Op, 4350 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 4351 } 4352 } 4353 return SDValue(); 4354 } 4355 4356 // Matches any the following pattern as a GORCI(W) operation 4357 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2 4358 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2 4359 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) 4360 // Note that with the variant of 3., 4361 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) 4362 // the inner pattern will first be matched as GREVI and then the outer 4363 // pattern will be matched to GORC via the first rule above. 4364 // 4. (or (rotl/rotr x, bitwidth/2), x) 4365 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, 4366 const RISCVSubtarget &Subtarget) { 4367 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 4368 EVT VT = Op.getValueType(); 4369 4370 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 4371 SDLoc DL(Op); 4372 SDValue Op0 = Op.getOperand(0); 4373 SDValue Op1 = Op.getOperand(1); 4374 4375 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { 4376 if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X && 4377 isPowerOf2_32(Reverse.getConstantOperandVal(1))) 4378 return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1)); 4379 // We can also form GORCI from ROTL/ROTR by half the bitwidth. 4380 if ((Reverse.getOpcode() == ISD::ROTL || 4381 Reverse.getOpcode() == ISD::ROTR) && 4382 Reverse.getOperand(0) == X && 4383 isa<ConstantSDNode>(Reverse.getOperand(1))) { 4384 uint64_t RotAmt = Reverse.getConstantOperandVal(1); 4385 if (RotAmt == (VT.getSizeInBits() / 2)) 4386 return DAG.getNode( 4387 RISCVISD::GORCI, DL, VT, X, 4388 DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT())); 4389 } 4390 return SDValue(); 4391 }; 4392 4393 // Check for either commutable permutation of (or (GREVI x, shamt), x) 4394 if (SDValue V = MatchOROfReverse(Op0, Op1)) 4395 return V; 4396 if (SDValue V = MatchOROfReverse(Op1, Op0)) 4397 return V; 4398 4399 // OR is commutable so canonicalize its OR operand to the left 4400 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) 4401 std::swap(Op0, Op1); 4402 if (Op0.getOpcode() != ISD::OR) 4403 return SDValue(); 4404 SDValue OrOp0 = Op0.getOperand(0); 4405 SDValue OrOp1 = Op0.getOperand(1); 4406 auto LHS = matchGREVIPat(OrOp0); 4407 // OR is commutable so swap the operands and try again: x might have been 4408 // on the left 4409 if (!LHS) { 4410 std::swap(OrOp0, OrOp1); 4411 LHS = matchGREVIPat(OrOp0); 4412 } 4413 auto RHS = matchGREVIPat(Op1); 4414 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { 4415 return DAG.getNode( 4416 RISCVISD::GORCI, DL, VT, LHS->Op, 4417 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 4418 } 4419 } 4420 return SDValue(); 4421 } 4422 4423 // Matches any of the following bit-manipulation patterns: 4424 // (and (shl x, 1), (0x22222222 << 1)) 4425 // (and (srl x, 1), 0x22222222) 4426 // (shl (and x, 0x22222222), 1) 4427 // (srl (and x, (0x22222222 << 1)), 1) 4428 // where the shift amount and mask may vary thus: 4429 // [1] = 0x22222222 / 0x44444444 4430 // [2] = 0x0C0C0C0C / 0x3C3C3C3C 4431 // [4] = 0x00F000F0 / 0x0F000F00 4432 // [8] = 0x0000FF00 / 0x00FF0000 4433 // [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64) 4434 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) { 4435 // These are the unshifted masks which we use to match bit-manipulation 4436 // patterns. They may be shifted left in certain circumstances. 4437 static const uint64_t BitmanipMasks[] = { 4438 0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL, 4439 0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL}; 4440 4441 return matchRISCVBitmanipPat(Op, BitmanipMasks); 4442 } 4443 4444 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x) 4445 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG, 4446 const RISCVSubtarget &Subtarget) { 4447 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 4448 EVT VT = Op.getValueType(); 4449 4450 if (VT != MVT::i32 && VT != Subtarget.getXLenVT()) 4451 return SDValue(); 4452 4453 SDValue Op0 = Op.getOperand(0); 4454 SDValue Op1 = Op.getOperand(1); 4455 4456 // Or is commutable so canonicalize the second OR to the LHS. 4457 if (Op0.getOpcode() != ISD::OR) 4458 std::swap(Op0, Op1); 4459 if (Op0.getOpcode() != ISD::OR) 4460 return SDValue(); 4461 4462 // We found an inner OR, so our operands are the operands of the inner OR 4463 // and the other operand of the outer OR. 4464 SDValue A = Op0.getOperand(0); 4465 SDValue B = Op0.getOperand(1); 4466 SDValue C = Op1; 4467 4468 auto Match1 = matchSHFLPat(A); 4469 auto Match2 = matchSHFLPat(B); 4470 4471 // If neither matched, we failed. 4472 if (!Match1 && !Match2) 4473 return SDValue(); 4474 4475 // We had at least one match. if one failed, try the remaining C operand. 4476 if (!Match1) { 4477 std::swap(A, C); 4478 Match1 = matchSHFLPat(A); 4479 if (!Match1) 4480 return SDValue(); 4481 } else if (!Match2) { 4482 std::swap(B, C); 4483 Match2 = matchSHFLPat(B); 4484 if (!Match2) 4485 return SDValue(); 4486 } 4487 assert(Match1 && Match2); 4488 4489 // Make sure our matches pair up. 4490 if (!Match1->formsPairWith(*Match2)) 4491 return SDValue(); 4492 4493 // All the remains is to make sure C is an AND with the same input, that masks 4494 // out the bits that are being shuffled. 4495 if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) || 4496 C.getOperand(0) != Match1->Op) 4497 return SDValue(); 4498 4499 uint64_t Mask = C.getConstantOperandVal(1); 4500 4501 static const uint64_t BitmanipMasks[] = { 4502 0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL, 4503 0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL, 4504 }; 4505 4506 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 4507 unsigned MaskIdx = Log2_32(Match1->ShAmt); 4508 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 4509 4510 if (Mask != ExpMask) 4511 return SDValue(); 4512 4513 SDLoc DL(Op); 4514 return DAG.getNode( 4515 RISCVISD::SHFLI, DL, VT, Match1->Op, 4516 DAG.getTargetConstant(Match1->ShAmt, DL, Subtarget.getXLenVT())); 4517 } 4518 4519 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is 4520 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. 4521 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does 4522 // not undo itself, but they are redundant. 4523 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { 4524 unsigned ShAmt1 = N->getConstantOperandVal(1); 4525 SDValue Src = N->getOperand(0); 4526 4527 if (Src.getOpcode() != N->getOpcode()) 4528 return SDValue(); 4529 4530 unsigned ShAmt2 = Src.getConstantOperandVal(1); 4531 Src = Src.getOperand(0); 4532 4533 unsigned CombinedShAmt; 4534 if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW) 4535 CombinedShAmt = ShAmt1 | ShAmt2; 4536 else 4537 CombinedShAmt = ShAmt1 ^ ShAmt2; 4538 4539 if (CombinedShAmt == 0) 4540 return Src; 4541 4542 SDLoc DL(N); 4543 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src, 4544 DAG.getTargetConstant(CombinedShAmt, DL, 4545 N->getOperand(1).getValueType())); 4546 } 4547 4548 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 4549 DAGCombinerInfo &DCI) const { 4550 SelectionDAG &DAG = DCI.DAG; 4551 4552 switch (N->getOpcode()) { 4553 default: 4554 break; 4555 case RISCVISD::SplitF64: { 4556 SDValue Op0 = N->getOperand(0); 4557 // If the input to SplitF64 is just BuildPairF64 then the operation is 4558 // redundant. Instead, use BuildPairF64's operands directly. 4559 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 4560 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 4561 4562 SDLoc DL(N); 4563 4564 // It's cheaper to materialise two 32-bit integers than to load a double 4565 // from the constant pool and transfer it to integer registers through the 4566 // stack. 4567 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 4568 APInt V = C->getValueAPF().bitcastToAPInt(); 4569 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 4570 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 4571 return DCI.CombineTo(N, Lo, Hi); 4572 } 4573 4574 // This is a target-specific version of a DAGCombine performed in 4575 // DAGCombiner::visitBITCAST. It performs the equivalent of: 4576 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 4577 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 4578 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 4579 !Op0.getNode()->hasOneUse()) 4580 break; 4581 SDValue NewSplitF64 = 4582 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 4583 Op0.getOperand(0)); 4584 SDValue Lo = NewSplitF64.getValue(0); 4585 SDValue Hi = NewSplitF64.getValue(1); 4586 APInt SignBit = APInt::getSignMask(32); 4587 if (Op0.getOpcode() == ISD::FNEG) { 4588 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 4589 DAG.getConstant(SignBit, DL, MVT::i32)); 4590 return DCI.CombineTo(N, Lo, NewHi); 4591 } 4592 assert(Op0.getOpcode() == ISD::FABS); 4593 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 4594 DAG.getConstant(~SignBit, DL, MVT::i32)); 4595 return DCI.CombineTo(N, Lo, NewHi); 4596 } 4597 case RISCVISD::SLLW: 4598 case RISCVISD::SRAW: 4599 case RISCVISD::SRLW: 4600 case RISCVISD::ROLW: 4601 case RISCVISD::RORW: { 4602 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 4603 SDValue LHS = N->getOperand(0); 4604 SDValue RHS = N->getOperand(1); 4605 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 4606 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 4607 if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) || 4608 SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) { 4609 if (N->getOpcode() != ISD::DELETED_NODE) 4610 DCI.AddToWorklist(N); 4611 return SDValue(N, 0); 4612 } 4613 break; 4614 } 4615 case RISCVISD::FSL: 4616 case RISCVISD::FSR: { 4617 // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read. 4618 SDValue ShAmt = N->getOperand(2); 4619 unsigned BitWidth = ShAmt.getValueSizeInBits(); 4620 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width"); 4621 APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1); 4622 if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 4623 if (N->getOpcode() != ISD::DELETED_NODE) 4624 DCI.AddToWorklist(N); 4625 return SDValue(N, 0); 4626 } 4627 break; 4628 } 4629 case RISCVISD::FSLW: 4630 case RISCVISD::FSRW: { 4631 // Only the lower 32 bits of Values and lower 6 bits of shift amount are 4632 // read. 4633 SDValue Op0 = N->getOperand(0); 4634 SDValue Op1 = N->getOperand(1); 4635 SDValue ShAmt = N->getOperand(2); 4636 APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 4637 APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6); 4638 if (SimplifyDemandedBits(Op0, OpMask, DCI) || 4639 SimplifyDemandedBits(Op1, OpMask, DCI) || 4640 SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 4641 if (N->getOpcode() != ISD::DELETED_NODE) 4642 DCI.AddToWorklist(N); 4643 return SDValue(N, 0); 4644 } 4645 break; 4646 } 4647 case RISCVISD::GREVIW: 4648 case RISCVISD::GORCIW: { 4649 // Only the lower 32 bits of the first operand are read 4650 SDValue Op0 = N->getOperand(0); 4651 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 4652 if (SimplifyDemandedBits(Op0, Mask, DCI)) { 4653 if (N->getOpcode() != ISD::DELETED_NODE) 4654 DCI.AddToWorklist(N); 4655 return SDValue(N, 0); 4656 } 4657 4658 return combineGREVI_GORCI(N, DCI.DAG); 4659 } 4660 case RISCVISD::FMV_X_ANYEXTW_RV64: { 4661 SDLoc DL(N); 4662 SDValue Op0 = N->getOperand(0); 4663 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 4664 // conversion is unnecessary and can be replaced with an ANY_EXTEND 4665 // of the FMV_W_X_RV64 operand. 4666 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 4667 assert(Op0.getOperand(0).getValueType() == MVT::i64 && 4668 "Unexpected value type!"); 4669 return Op0.getOperand(0); 4670 } 4671 4672 // This is a target-specific version of a DAGCombine performed in 4673 // DAGCombiner::visitBITCAST. It performs the equivalent of: 4674 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 4675 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 4676 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 4677 !Op0.getNode()->hasOneUse()) 4678 break; 4679 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 4680 Op0.getOperand(0)); 4681 APInt SignBit = APInt::getSignMask(32).sext(64); 4682 if (Op0.getOpcode() == ISD::FNEG) 4683 return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 4684 DAG.getConstant(SignBit, DL, MVT::i64)); 4685 4686 assert(Op0.getOpcode() == ISD::FABS); 4687 return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 4688 DAG.getConstant(~SignBit, DL, MVT::i64)); 4689 } 4690 case RISCVISD::GREVI: 4691 case RISCVISD::GORCI: 4692 return combineGREVI_GORCI(N, DCI.DAG); 4693 case ISD::OR: 4694 if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) 4695 return GREV; 4696 if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) 4697 return GORC; 4698 if (auto SHFL = combineORToSHFL(SDValue(N, 0), DCI.DAG, Subtarget)) 4699 return SHFL; 4700 break; 4701 case RISCVISD::SELECT_CC: { 4702 // Transform 4703 SDValue LHS = N->getOperand(0); 4704 SDValue RHS = N->getOperand(1); 4705 auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2)); 4706 if (!ISD::isIntEqualitySetCC(CCVal)) 4707 break; 4708 4709 // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) -> 4710 // (select_cc X, Y, lt, trueV, falseV) 4711 // Sometimes the setcc is introduced after select_cc has been formed. 4712 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && 4713 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { 4714 // If we're looking for eq 0 instead of ne 0, we need to invert the 4715 // condition. 4716 bool Invert = CCVal == ISD::SETEQ; 4717 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); 4718 if (Invert) 4719 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4720 4721 SDLoc DL(N); 4722 RHS = LHS.getOperand(1); 4723 LHS = LHS.getOperand(0); 4724 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 4725 4726 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 4727 return DAG.getNode( 4728 RISCVISD::SELECT_CC, DL, N->getValueType(0), 4729 {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)}); 4730 } 4731 4732 // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) -> 4733 // (select_cc X, Y, eq/ne, trueV, falseV) 4734 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) 4735 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0), 4736 {LHS.getOperand(0), LHS.getOperand(1), 4737 N->getOperand(2), N->getOperand(3), 4738 N->getOperand(4)}); 4739 // (select_cc X, 1, setne, trueV, falseV) -> 4740 // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. 4741 // This can occur when legalizing some floating point comparisons. 4742 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 4743 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { 4744 SDLoc DL(N); 4745 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4746 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 4747 RHS = DAG.getConstant(0, DL, LHS.getValueType()); 4748 return DAG.getNode( 4749 RISCVISD::SELECT_CC, DL, N->getValueType(0), 4750 {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)}); 4751 } 4752 4753 break; 4754 } 4755 case RISCVISD::BR_CC: { 4756 SDValue LHS = N->getOperand(1); 4757 SDValue RHS = N->getOperand(2); 4758 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get(); 4759 if (!ISD::isIntEqualitySetCC(CCVal)) 4760 break; 4761 4762 // Fold (br_cc (setlt X, Y), 0, ne, dest) -> 4763 // (br_cc X, Y, lt, dest) 4764 // Sometimes the setcc is introduced after br_cc has been formed. 4765 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && 4766 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { 4767 // If we're looking for eq 0 instead of ne 0, we need to invert the 4768 // condition. 4769 bool Invert = CCVal == ISD::SETEQ; 4770 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); 4771 if (Invert) 4772 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4773 4774 SDLoc DL(N); 4775 RHS = LHS.getOperand(1); 4776 LHS = LHS.getOperand(0); 4777 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 4778 4779 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), 4780 N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal), 4781 N->getOperand(4)); 4782 } 4783 4784 // Fold (br_cc (xor X, Y), 0, eq/ne, dest) -> 4785 // (br_cc X, Y, eq/ne, trueV, falseV) 4786 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) 4787 return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0), 4788 N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1), 4789 N->getOperand(3), N->getOperand(4)); 4790 4791 // (br_cc X, 1, setne, br_cc) -> 4792 // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1. 4793 // This can occur when legalizing some floating point comparisons. 4794 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 4795 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { 4796 SDLoc DL(N); 4797 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4798 SDValue TargetCC = DAG.getCondCode(CCVal); 4799 RHS = DAG.getConstant(0, DL, LHS.getValueType()); 4800 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), 4801 N->getOperand(0), LHS, RHS, TargetCC, 4802 N->getOperand(4)); 4803 } 4804 break; 4805 } 4806 case ISD::FCOPYSIGN: { 4807 EVT VT = N->getValueType(0); 4808 if (!VT.isVector()) 4809 break; 4810 // There is a form of VFSGNJ which injects the negated sign of its second 4811 // operand. Try and bubble any FNEG up after the extend/round to produce 4812 // this optimized pattern. Avoid modifying cases where FP_ROUND and 4813 // TRUNC=1. 4814 SDValue In2 = N->getOperand(1); 4815 // Avoid cases where the extend/round has multiple uses, as duplicating 4816 // those is typically more expensive than removing a fneg. 4817 if (!In2.hasOneUse()) 4818 break; 4819 if (In2.getOpcode() != ISD::FP_EXTEND && 4820 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0)) 4821 break; 4822 In2 = In2.getOperand(0); 4823 if (In2.getOpcode() != ISD::FNEG) 4824 break; 4825 SDLoc DL(N); 4826 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT); 4827 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), 4828 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); 4829 } 4830 case ISD::MGATHER: 4831 case ISD::MSCATTER: { 4832 if (!DCI.isBeforeLegalize()) 4833 break; 4834 MaskedGatherScatterSDNode *MGSN = cast<MaskedGatherScatterSDNode>(N); 4835 SDValue Index = MGSN->getIndex(); 4836 EVT IndexVT = Index.getValueType(); 4837 MVT XLenVT = Subtarget.getXLenVT(); 4838 // RISCV indexed loads only support the "unsigned unscaled" addressing 4839 // mode, so anything else must be manually legalized. 4840 bool NeedsIdxLegalization = MGSN->isIndexScaled() || 4841 (MGSN->isIndexSigned() && 4842 IndexVT.getVectorElementType().bitsLT(XLenVT)); 4843 if (!NeedsIdxLegalization) 4844 break; 4845 4846 SDLoc DL(N); 4847 4848 // Any index legalization should first promote to XLenVT, so we don't lose 4849 // bits when scaling. This may create an illegal index type so we let 4850 // LLVM's legalization take care of the splitting. 4851 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) { 4852 IndexVT = IndexVT.changeVectorElementType(XLenVT); 4853 Index = DAG.getNode(MGSN->isIndexSigned() ? ISD::SIGN_EXTEND 4854 : ISD::ZERO_EXTEND, 4855 DL, IndexVT, Index); 4856 } 4857 4858 unsigned Scale = N->getConstantOperandVal(5); 4859 if (MGSN->isIndexScaled() && Scale != 1) { 4860 // Manually scale the indices by the element size. 4861 // TODO: Sanitize the scale operand here? 4862 assert(isPowerOf2_32(Scale) && "Expecting power-of-two types"); 4863 SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT); 4864 Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale); 4865 } 4866 4867 ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED; 4868 if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N)) { 4869 return DAG.getMaskedGather( 4870 N->getVTList(), MGSN->getMemoryVT(), DL, 4871 {MGSN->getChain(), MGN->getPassThru(), MGSN->getMask(), 4872 MGSN->getBasePtr(), Index, MGN->getScale()}, 4873 MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType()); 4874 } 4875 const auto *MSN = cast<MaskedScatterSDNode>(N); 4876 return DAG.getMaskedScatter( 4877 N->getVTList(), MGSN->getMemoryVT(), DL, 4878 {MGSN->getChain(), MSN->getValue(), MGSN->getMask(), MGSN->getBasePtr(), 4879 Index, MGSN->getScale()}, 4880 MGSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore()); 4881 } 4882 } 4883 4884 return SDValue(); 4885 } 4886 4887 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 4888 const SDNode *N, CombineLevel Level) const { 4889 // The following folds are only desirable if `(OP _, c1 << c2)` can be 4890 // materialised in fewer instructions than `(OP _, c1)`: 4891 // 4892 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 4893 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 4894 SDValue N0 = N->getOperand(0); 4895 EVT Ty = N0.getValueType(); 4896 if (Ty.isScalarInteger() && 4897 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 4898 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 4899 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 4900 if (C1 && C2) { 4901 const APInt &C1Int = C1->getAPIntValue(); 4902 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 4903 4904 // We can materialise `c1 << c2` into an add immediate, so it's "free", 4905 // and the combine should happen, to potentially allow further combines 4906 // later. 4907 if (ShiftedC1Int.getMinSignedBits() <= 64 && 4908 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 4909 return true; 4910 4911 // We can materialise `c1` in an add immediate, so it's "free", and the 4912 // combine should be prevented. 4913 if (C1Int.getMinSignedBits() <= 64 && 4914 isLegalAddImmediate(C1Int.getSExtValue())) 4915 return false; 4916 4917 // Neither constant will fit into an immediate, so find materialisation 4918 // costs. 4919 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 4920 Subtarget.is64Bit()); 4921 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 4922 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); 4923 4924 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 4925 // combine should be prevented. 4926 if (C1Cost < ShiftedC1Cost) 4927 return false; 4928 } 4929 } 4930 return true; 4931 } 4932 4933 bool RISCVTargetLowering::targetShrinkDemandedConstant( 4934 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 4935 TargetLoweringOpt &TLO) const { 4936 // Delay this optimization as late as possible. 4937 if (!TLO.LegalOps) 4938 return false; 4939 4940 EVT VT = Op.getValueType(); 4941 if (VT.isVector()) 4942 return false; 4943 4944 // Only handle AND for now. 4945 if (Op.getOpcode() != ISD::AND) 4946 return false; 4947 4948 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 4949 if (!C) 4950 return false; 4951 4952 const APInt &Mask = C->getAPIntValue(); 4953 4954 // Clear all non-demanded bits initially. 4955 APInt ShrunkMask = Mask & DemandedBits; 4956 4957 // Try to make a smaller immediate by setting undemanded bits. 4958 4959 APInt ExpandedMask = Mask | ~DemandedBits; 4960 4961 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool { 4962 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask); 4963 }; 4964 auto UseMask = [Mask, Op, VT, &TLO](const APInt &NewMask) -> bool { 4965 if (NewMask == Mask) 4966 return true; 4967 SDLoc DL(Op); 4968 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); 4969 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); 4970 return TLO.CombineTo(Op, NewOp); 4971 }; 4972 4973 // If the shrunk mask fits in sign extended 12 bits, let the target 4974 // independent code apply it. 4975 if (ShrunkMask.isSignedIntN(12)) 4976 return false; 4977 4978 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern. 4979 if (VT == MVT::i64) { 4980 APInt NewMask = APInt(64, 0xffffffff); 4981 if (IsLegalMask(NewMask)) 4982 return UseMask(NewMask); 4983 } 4984 4985 // For the remaining optimizations, we need to be able to make a negative 4986 // number through a combination of mask and undemanded bits. 4987 if (!ExpandedMask.isNegative()) 4988 return false; 4989 4990 // What is the fewest number of bits we need to represent the negative number. 4991 unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); 4992 4993 // Try to make a 12 bit negative immediate. If that fails try to make a 32 4994 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 4995 APInt NewMask = ShrunkMask; 4996 if (MinSignedBits <= 12) 4997 NewMask.setBitsFrom(11); 4998 else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 4999 NewMask.setBitsFrom(31); 5000 else 5001 return false; 5002 5003 // Sanity check that our new mask is a subset of the demanded mask. 5004 assert(IsLegalMask(NewMask)); 5005 return UseMask(NewMask); 5006 } 5007 5008 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 5009 KnownBits &Known, 5010 const APInt &DemandedElts, 5011 const SelectionDAG &DAG, 5012 unsigned Depth) const { 5013 unsigned BitWidth = Known.getBitWidth(); 5014 unsigned Opc = Op.getOpcode(); 5015 assert((Opc >= ISD::BUILTIN_OP_END || 5016 Opc == ISD::INTRINSIC_WO_CHAIN || 5017 Opc == ISD::INTRINSIC_W_CHAIN || 5018 Opc == ISD::INTRINSIC_VOID) && 5019 "Should use MaskedValueIsZero if you don't know whether Op" 5020 " is a target node!"); 5021 5022 Known.resetAll(); 5023 switch (Opc) { 5024 default: break; 5025 case RISCVISD::SELECT_CC: { 5026 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1); 5027 // If we don't know any bits, early out. 5028 if (Known.isUnknown()) 5029 break; 5030 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1); 5031 5032 // Only known if known in both the LHS and RHS. 5033 Known = KnownBits::commonBits(Known, Known2); 5034 break; 5035 } 5036 case RISCVISD::REMUW: { 5037 KnownBits Known2; 5038 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 5039 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 5040 // We only care about the lower 32 bits. 5041 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 5042 // Restore the original width by sign extending. 5043 Known = Known.sext(BitWidth); 5044 break; 5045 } 5046 case RISCVISD::DIVUW: { 5047 KnownBits Known2; 5048 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 5049 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 5050 // We only care about the lower 32 bits. 5051 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 5052 // Restore the original width by sign extending. 5053 Known = Known.sext(BitWidth); 5054 break; 5055 } 5056 case RISCVISD::READ_VLENB: 5057 // We assume VLENB is at least 8 bytes. 5058 // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits. 5059 Known.Zero.setLowBits(3); 5060 break; 5061 } 5062 } 5063 5064 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 5065 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 5066 unsigned Depth) const { 5067 switch (Op.getOpcode()) { 5068 default: 5069 break; 5070 case RISCVISD::SLLW: 5071 case RISCVISD::SRAW: 5072 case RISCVISD::SRLW: 5073 case RISCVISD::DIVW: 5074 case RISCVISD::DIVUW: 5075 case RISCVISD::REMUW: 5076 case RISCVISD::ROLW: 5077 case RISCVISD::RORW: 5078 case RISCVISD::GREVIW: 5079 case RISCVISD::GORCIW: 5080 case RISCVISD::FSLW: 5081 case RISCVISD::FSRW: 5082 // TODO: As the result is sign-extended, this is conservatively correct. A 5083 // more precise answer could be calculated for SRAW depending on known 5084 // bits in the shift amount. 5085 return 33; 5086 case RISCVISD::SHFLI: { 5087 // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word 5088 // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but 5089 // will stay within the upper 32 bits. If there were more than 32 sign bits 5090 // before there will be at least 33 sign bits after. 5091 if (Op.getValueType() == MVT::i64 && 5092 (Op.getConstantOperandVal(1) & 0x10) == 0) { 5093 unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); 5094 if (Tmp > 32) 5095 return 33; 5096 } 5097 break; 5098 } 5099 case RISCVISD::VMV_X_S: 5100 // The number of sign bits of the scalar result is computed by obtaining the 5101 // element type of the input vector operand, subtracting its width from the 5102 // XLEN, and then adding one (sign bit within the element type). If the 5103 // element type is wider than XLen, the least-significant XLEN bits are 5104 // taken. 5105 if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen()) 5106 return 1; 5107 return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; 5108 } 5109 5110 return 1; 5111 } 5112 5113 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 5114 MachineBasicBlock *BB) { 5115 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 5116 5117 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 5118 // Should the count have wrapped while it was being read, we need to try 5119 // again. 5120 // ... 5121 // read: 5122 // rdcycleh x3 # load high word of cycle 5123 // rdcycle x2 # load low word of cycle 5124 // rdcycleh x4 # load high word of cycle 5125 // bne x3, x4, read # check if high word reads match, otherwise try again 5126 // ... 5127 5128 MachineFunction &MF = *BB->getParent(); 5129 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5130 MachineFunction::iterator It = ++BB->getIterator(); 5131 5132 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 5133 MF.insert(It, LoopMBB); 5134 5135 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 5136 MF.insert(It, DoneMBB); 5137 5138 // Transfer the remainder of BB and its successor edges to DoneMBB. 5139 DoneMBB->splice(DoneMBB->begin(), BB, 5140 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 5141 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 5142 5143 BB->addSuccessor(LoopMBB); 5144 5145 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5146 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 5147 Register LoReg = MI.getOperand(0).getReg(); 5148 Register HiReg = MI.getOperand(1).getReg(); 5149 DebugLoc DL = MI.getDebugLoc(); 5150 5151 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 5152 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 5153 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 5154 .addReg(RISCV::X0); 5155 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 5156 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 5157 .addReg(RISCV::X0); 5158 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 5159 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 5160 .addReg(RISCV::X0); 5161 5162 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 5163 .addReg(HiReg) 5164 .addReg(ReadAgainReg) 5165 .addMBB(LoopMBB); 5166 5167 LoopMBB->addSuccessor(LoopMBB); 5168 LoopMBB->addSuccessor(DoneMBB); 5169 5170 MI.eraseFromParent(); 5171 5172 return DoneMBB; 5173 } 5174 5175 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 5176 MachineBasicBlock *BB) { 5177 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 5178 5179 MachineFunction &MF = *BB->getParent(); 5180 DebugLoc DL = MI.getDebugLoc(); 5181 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 5182 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 5183 Register LoReg = MI.getOperand(0).getReg(); 5184 Register HiReg = MI.getOperand(1).getReg(); 5185 Register SrcReg = MI.getOperand(2).getReg(); 5186 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 5187 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 5188 5189 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 5190 RI); 5191 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 5192 MachineMemOperand *MMOLo = 5193 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 5194 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 5195 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 5196 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 5197 .addFrameIndex(FI) 5198 .addImm(0) 5199 .addMemOperand(MMOLo); 5200 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 5201 .addFrameIndex(FI) 5202 .addImm(4) 5203 .addMemOperand(MMOHi); 5204 MI.eraseFromParent(); // The pseudo instruction is gone now. 5205 return BB; 5206 } 5207 5208 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 5209 MachineBasicBlock *BB) { 5210 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 5211 "Unexpected instruction"); 5212 5213 MachineFunction &MF = *BB->getParent(); 5214 DebugLoc DL = MI.getDebugLoc(); 5215 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 5216 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 5217 Register DstReg = MI.getOperand(0).getReg(); 5218 Register LoReg = MI.getOperand(1).getReg(); 5219 Register HiReg = MI.getOperand(2).getReg(); 5220 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 5221 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 5222 5223 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 5224 MachineMemOperand *MMOLo = 5225 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 5226 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 5227 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 5228 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 5229 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 5230 .addFrameIndex(FI) 5231 .addImm(0) 5232 .addMemOperand(MMOLo); 5233 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 5234 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 5235 .addFrameIndex(FI) 5236 .addImm(4) 5237 .addMemOperand(MMOHi); 5238 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 5239 MI.eraseFromParent(); // The pseudo instruction is gone now. 5240 return BB; 5241 } 5242 5243 static bool isSelectPseudo(MachineInstr &MI) { 5244 switch (MI.getOpcode()) { 5245 default: 5246 return false; 5247 case RISCV::Select_GPR_Using_CC_GPR: 5248 case RISCV::Select_FPR16_Using_CC_GPR: 5249 case RISCV::Select_FPR32_Using_CC_GPR: 5250 case RISCV::Select_FPR64_Using_CC_GPR: 5251 return true; 5252 } 5253 } 5254 5255 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 5256 MachineBasicBlock *BB) { 5257 // To "insert" Select_* instructions, we actually have to insert the triangle 5258 // control-flow pattern. The incoming instructions know the destination vreg 5259 // to set, the condition code register to branch on, the true/false values to 5260 // select between, and the condcode to use to select the appropriate branch. 5261 // 5262 // We produce the following control flow: 5263 // HeadMBB 5264 // | \ 5265 // | IfFalseMBB 5266 // | / 5267 // TailMBB 5268 // 5269 // When we find a sequence of selects we attempt to optimize their emission 5270 // by sharing the control flow. Currently we only handle cases where we have 5271 // multiple selects with the exact same condition (same LHS, RHS and CC). 5272 // The selects may be interleaved with other instructions if the other 5273 // instructions meet some requirements we deem safe: 5274 // - They are debug instructions. Otherwise, 5275 // - They do not have side-effects, do not access memory and their inputs do 5276 // not depend on the results of the select pseudo-instructions. 5277 // The TrueV/FalseV operands of the selects cannot depend on the result of 5278 // previous selects in the sequence. 5279 // These conditions could be further relaxed. See the X86 target for a 5280 // related approach and more information. 5281 Register LHS = MI.getOperand(1).getReg(); 5282 Register RHS = MI.getOperand(2).getReg(); 5283 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 5284 5285 SmallVector<MachineInstr *, 4> SelectDebugValues; 5286 SmallSet<Register, 4> SelectDests; 5287 SelectDests.insert(MI.getOperand(0).getReg()); 5288 5289 MachineInstr *LastSelectPseudo = &MI; 5290 5291 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 5292 SequenceMBBI != E; ++SequenceMBBI) { 5293 if (SequenceMBBI->isDebugInstr()) 5294 continue; 5295 else if (isSelectPseudo(*SequenceMBBI)) { 5296 if (SequenceMBBI->getOperand(1).getReg() != LHS || 5297 SequenceMBBI->getOperand(2).getReg() != RHS || 5298 SequenceMBBI->getOperand(3).getImm() != CC || 5299 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 5300 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 5301 break; 5302 LastSelectPseudo = &*SequenceMBBI; 5303 SequenceMBBI->collectDebugValues(SelectDebugValues); 5304 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 5305 } else { 5306 if (SequenceMBBI->hasUnmodeledSideEffects() || 5307 SequenceMBBI->mayLoadOrStore()) 5308 break; 5309 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 5310 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 5311 })) 5312 break; 5313 } 5314 } 5315 5316 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 5317 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5318 DebugLoc DL = MI.getDebugLoc(); 5319 MachineFunction::iterator I = ++BB->getIterator(); 5320 5321 MachineBasicBlock *HeadMBB = BB; 5322 MachineFunction *F = BB->getParent(); 5323 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 5324 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 5325 5326 F->insert(I, IfFalseMBB); 5327 F->insert(I, TailMBB); 5328 5329 // Transfer debug instructions associated with the selects to TailMBB. 5330 for (MachineInstr *DebugInstr : SelectDebugValues) { 5331 TailMBB->push_back(DebugInstr->removeFromParent()); 5332 } 5333 5334 // Move all instructions after the sequence to TailMBB. 5335 TailMBB->splice(TailMBB->end(), HeadMBB, 5336 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 5337 // Update machine-CFG edges by transferring all successors of the current 5338 // block to the new block which will contain the Phi nodes for the selects. 5339 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 5340 // Set the successors for HeadMBB. 5341 HeadMBB->addSuccessor(IfFalseMBB); 5342 HeadMBB->addSuccessor(TailMBB); 5343 5344 // Insert appropriate branch. 5345 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 5346 5347 BuildMI(HeadMBB, DL, TII.get(Opcode)) 5348 .addReg(LHS) 5349 .addReg(RHS) 5350 .addMBB(TailMBB); 5351 5352 // IfFalseMBB just falls through to TailMBB. 5353 IfFalseMBB->addSuccessor(TailMBB); 5354 5355 // Create PHIs for all of the select pseudo-instructions. 5356 auto SelectMBBI = MI.getIterator(); 5357 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 5358 auto InsertionPoint = TailMBB->begin(); 5359 while (SelectMBBI != SelectEnd) { 5360 auto Next = std::next(SelectMBBI); 5361 if (isSelectPseudo(*SelectMBBI)) { 5362 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 5363 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 5364 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 5365 .addReg(SelectMBBI->getOperand(4).getReg()) 5366 .addMBB(HeadMBB) 5367 .addReg(SelectMBBI->getOperand(5).getReg()) 5368 .addMBB(IfFalseMBB); 5369 SelectMBBI->eraseFromParent(); 5370 } 5371 SelectMBBI = Next; 5372 } 5373 5374 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 5375 return TailMBB; 5376 } 5377 5378 static MachineInstr *elideCopies(MachineInstr *MI, 5379 const MachineRegisterInfo &MRI) { 5380 while (true) { 5381 if (!MI->isFullCopy()) 5382 return MI; 5383 if (!Register::isVirtualRegister(MI->getOperand(1).getReg())) 5384 return nullptr; 5385 MI = MRI.getVRegDef(MI->getOperand(1).getReg()); 5386 if (!MI) 5387 return nullptr; 5388 } 5389 } 5390 5391 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, 5392 int VLIndex, unsigned SEWIndex, 5393 RISCVVLMUL VLMul, bool ForceTailAgnostic) { 5394 MachineFunction &MF = *BB->getParent(); 5395 DebugLoc DL = MI.getDebugLoc(); 5396 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 5397 5398 unsigned SEW = MI.getOperand(SEWIndex).getImm(); 5399 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 5400 RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8)); 5401 5402 MachineRegisterInfo &MRI = MF.getRegInfo(); 5403 5404 auto BuildVSETVLI = [&]() { 5405 if (VLIndex >= 0) { 5406 Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 5407 Register VLReg = MI.getOperand(VLIndex).getReg(); 5408 5409 // VL might be a compile time constant, but isel would have to put it 5410 // in a register. See if VL comes from an ADDI X0, imm. 5411 if (VLReg.isVirtual()) { 5412 MachineInstr *Def = MRI.getVRegDef(VLReg); 5413 if (Def && Def->getOpcode() == RISCV::ADDI && 5414 Def->getOperand(1).getReg() == RISCV::X0 && 5415 Def->getOperand(2).isImm()) { 5416 uint64_t Imm = Def->getOperand(2).getImm(); 5417 // VSETIVLI allows a 5-bit zero extended immediate. 5418 if (isUInt<5>(Imm)) 5419 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI)) 5420 .addReg(DestReg, RegState::Define | RegState::Dead) 5421 .addImm(Imm); 5422 } 5423 } 5424 5425 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) 5426 .addReg(DestReg, RegState::Define | RegState::Dead) 5427 .addReg(VLReg); 5428 } 5429 5430 // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). 5431 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) 5432 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 5433 .addReg(RISCV::X0, RegState::Kill); 5434 }; 5435 5436 MachineInstrBuilder MIB = BuildVSETVLI(); 5437 5438 // Default to tail agnostic unless the destination is tied to a source. In 5439 // that case the user would have some control over the tail values. The tail 5440 // policy is also ignored on instructions that only update element 0 like 5441 // vmv.s.x or reductions so use agnostic there to match the common case. 5442 // FIXME: This is conservatively correct, but we might want to detect that 5443 // the input is undefined. 5444 bool TailAgnostic = true; 5445 unsigned UseOpIdx; 5446 if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 5447 TailAgnostic = false; 5448 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 5449 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 5450 MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); 5451 if (UseMI) { 5452 UseMI = elideCopies(UseMI, MRI); 5453 if (UseMI && UseMI->isImplicitDef()) 5454 TailAgnostic = true; 5455 } 5456 } 5457 5458 // For simplicity we reuse the vtype representation here. 5459 MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth, 5460 /*TailAgnostic*/ TailAgnostic, 5461 /*MaskAgnostic*/ false)); 5462 5463 // Remove (now) redundant operands from pseudo 5464 if (VLIndex >= 0) { 5465 MI.getOperand(VLIndex).setReg(RISCV::NoRegister); 5466 MI.getOperand(VLIndex).setIsKill(false); 5467 } 5468 5469 return BB; 5470 } 5471 5472 MachineBasicBlock * 5473 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 5474 MachineBasicBlock *BB) const { 5475 uint64_t TSFlags = MI.getDesc().TSFlags; 5476 5477 if (TSFlags & RISCVII::HasSEWOpMask) { 5478 unsigned NumOperands = MI.getNumExplicitOperands(); 5479 int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1; 5480 unsigned SEWIndex = NumOperands - 1; 5481 bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask; 5482 5483 RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >> 5484 RISCVII::VLMulShift); 5485 return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic); 5486 } 5487 5488 switch (MI.getOpcode()) { 5489 default: 5490 llvm_unreachable("Unexpected instr type to insert"); 5491 case RISCV::ReadCycleWide: 5492 assert(!Subtarget.is64Bit() && 5493 "ReadCycleWrite is only to be used on riscv32"); 5494 return emitReadCycleWidePseudo(MI, BB); 5495 case RISCV::Select_GPR_Using_CC_GPR: 5496 case RISCV::Select_FPR16_Using_CC_GPR: 5497 case RISCV::Select_FPR32_Using_CC_GPR: 5498 case RISCV::Select_FPR64_Using_CC_GPR: 5499 return emitSelectPseudo(MI, BB); 5500 case RISCV::BuildPairF64Pseudo: 5501 return emitBuildPairF64Pseudo(MI, BB); 5502 case RISCV::SplitF64Pseudo: 5503 return emitSplitF64Pseudo(MI, BB); 5504 } 5505 } 5506 5507 // Calling Convention Implementation. 5508 // The expectations for frontend ABI lowering vary from target to target. 5509 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 5510 // details, but this is a longer term goal. For now, we simply try to keep the 5511 // role of the frontend as simple and well-defined as possible. The rules can 5512 // be summarised as: 5513 // * Never split up large scalar arguments. We handle them here. 5514 // * If a hardfloat calling convention is being used, and the struct may be 5515 // passed in a pair of registers (fp+fp, int+fp), and both registers are 5516 // available, then pass as two separate arguments. If either the GPRs or FPRs 5517 // are exhausted, then pass according to the rule below. 5518 // * If a struct could never be passed in registers or directly in a stack 5519 // slot (as it is larger than 2*XLEN and the floating point rules don't 5520 // apply), then pass it using a pointer with the byval attribute. 5521 // * If a struct is less than 2*XLEN, then coerce to either a two-element 5522 // word-sized array or a 2*XLEN scalar (depending on alignment). 5523 // * The frontend can determine whether a struct is returned by reference or 5524 // not based on its size and fields. If it will be returned by reference, the 5525 // frontend must modify the prototype so a pointer with the sret annotation is 5526 // passed as the first argument. This is not necessary for large scalar 5527 // returns. 5528 // * Struct return values and varargs should be coerced to structs containing 5529 // register-size fields in the same situations they would be for fixed 5530 // arguments. 5531 5532 static const MCPhysReg ArgGPRs[] = { 5533 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 5534 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 5535 }; 5536 static const MCPhysReg ArgFPR16s[] = { 5537 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 5538 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 5539 }; 5540 static const MCPhysReg ArgFPR32s[] = { 5541 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 5542 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 5543 }; 5544 static const MCPhysReg ArgFPR64s[] = { 5545 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 5546 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 5547 }; 5548 // This is an interim calling convention and it may be changed in the future. 5549 static const MCPhysReg ArgVRs[] = { 5550 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 5551 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 5552 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 5553 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 5554 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 5555 RISCV::V20M2, RISCV::V22M2}; 5556 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 5557 RISCV::V20M4}; 5558 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 5559 5560 // Pass a 2*XLEN argument that has been split into two XLEN values through 5561 // registers or the stack as necessary. 5562 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 5563 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 5564 MVT ValVT2, MVT LocVT2, 5565 ISD::ArgFlagsTy ArgFlags2) { 5566 unsigned XLenInBytes = XLen / 8; 5567 if (Register Reg = State.AllocateReg(ArgGPRs)) { 5568 // At least one half can be passed via register. 5569 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 5570 VA1.getLocVT(), CCValAssign::Full)); 5571 } else { 5572 // Both halves must be passed on the stack, with proper alignment. 5573 Align StackAlign = 5574 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 5575 State.addLoc( 5576 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 5577 State.AllocateStack(XLenInBytes, StackAlign), 5578 VA1.getLocVT(), CCValAssign::Full)); 5579 State.addLoc(CCValAssign::getMem( 5580 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 5581 LocVT2, CCValAssign::Full)); 5582 return false; 5583 } 5584 5585 if (Register Reg = State.AllocateReg(ArgGPRs)) { 5586 // The second half can also be passed via register. 5587 State.addLoc( 5588 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 5589 } else { 5590 // The second half is passed via the stack, without additional alignment. 5591 State.addLoc(CCValAssign::getMem( 5592 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 5593 LocVT2, CCValAssign::Full)); 5594 } 5595 5596 return false; 5597 } 5598 5599 // Implements the RISC-V calling convention. Returns true upon failure. 5600 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 5601 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 5602 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 5603 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 5604 Optional<unsigned> FirstMaskArgument) { 5605 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 5606 assert(XLen == 32 || XLen == 64); 5607 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 5608 5609 // Any return value split in to more than two values can't be returned 5610 // directly. Vectors are returned via the available vector registers. 5611 if (!LocVT.isVector() && IsRet && ValNo > 1) 5612 return true; 5613 5614 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 5615 // variadic argument, or if no F16/F32 argument registers are available. 5616 bool UseGPRForF16_F32 = true; 5617 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 5618 // variadic argument, or if no F64 argument registers are available. 5619 bool UseGPRForF64 = true; 5620 5621 switch (ABI) { 5622 default: 5623 llvm_unreachable("Unexpected ABI"); 5624 case RISCVABI::ABI_ILP32: 5625 case RISCVABI::ABI_LP64: 5626 break; 5627 case RISCVABI::ABI_ILP32F: 5628 case RISCVABI::ABI_LP64F: 5629 UseGPRForF16_F32 = !IsFixed; 5630 break; 5631 case RISCVABI::ABI_ILP32D: 5632 case RISCVABI::ABI_LP64D: 5633 UseGPRForF16_F32 = !IsFixed; 5634 UseGPRForF64 = !IsFixed; 5635 break; 5636 } 5637 5638 // FPR16, FPR32, and FPR64 alias each other. 5639 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { 5640 UseGPRForF16_F32 = true; 5641 UseGPRForF64 = true; 5642 } 5643 5644 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 5645 // similar local variables rather than directly checking against the target 5646 // ABI. 5647 5648 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { 5649 LocVT = XLenVT; 5650 LocInfo = CCValAssign::BCvt; 5651 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 5652 LocVT = MVT::i64; 5653 LocInfo = CCValAssign::BCvt; 5654 } 5655 5656 // If this is a variadic argument, the RISC-V calling convention requires 5657 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 5658 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 5659 // be used regardless of whether the original argument was split during 5660 // legalisation or not. The argument will not be passed by registers if the 5661 // original type is larger than 2*XLEN, so the register alignment rule does 5662 // not apply. 5663 unsigned TwoXLenInBytes = (2 * XLen) / 8; 5664 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 5665 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 5666 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 5667 // Skip 'odd' register if necessary. 5668 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 5669 State.AllocateReg(ArgGPRs); 5670 } 5671 5672 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 5673 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 5674 State.getPendingArgFlags(); 5675 5676 assert(PendingLocs.size() == PendingArgFlags.size() && 5677 "PendingLocs and PendingArgFlags out of sync"); 5678 5679 // Handle passing f64 on RV32D with a soft float ABI or when floating point 5680 // registers are exhausted. 5681 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 5682 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 5683 "Can't lower f64 if it is split"); 5684 // Depending on available argument GPRS, f64 may be passed in a pair of 5685 // GPRs, split between a GPR and the stack, or passed completely on the 5686 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 5687 // cases. 5688 Register Reg = State.AllocateReg(ArgGPRs); 5689 LocVT = MVT::i32; 5690 if (!Reg) { 5691 unsigned StackOffset = State.AllocateStack(8, Align(8)); 5692 State.addLoc( 5693 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 5694 return false; 5695 } 5696 if (!State.AllocateReg(ArgGPRs)) 5697 State.AllocateStack(4, Align(4)); 5698 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5699 return false; 5700 } 5701 5702 // Fixed-length vectors are located in the corresponding scalable-vector 5703 // container types. 5704 if (ValVT.isFixedLengthVector()) 5705 LocVT = TLI.getContainerForFixedLengthVector(LocVT); 5706 5707 // Split arguments might be passed indirectly, so keep track of the pending 5708 // values. Split vectors are passed via a mix of registers and indirectly, so 5709 // treat them as we would any other argument. 5710 if (!LocVT.isVector() && (ArgFlags.isSplit() || !PendingLocs.empty())) { 5711 LocVT = XLenVT; 5712 LocInfo = CCValAssign::Indirect; 5713 PendingLocs.push_back( 5714 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 5715 PendingArgFlags.push_back(ArgFlags); 5716 if (!ArgFlags.isSplitEnd()) { 5717 return false; 5718 } 5719 } 5720 5721 // If the split argument only had two elements, it should be passed directly 5722 // in registers or on the stack. 5723 if (!LocVT.isVector() && ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 5724 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 5725 // Apply the normal calling convention rules to the first half of the 5726 // split argument. 5727 CCValAssign VA = PendingLocs[0]; 5728 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 5729 PendingLocs.clear(); 5730 PendingArgFlags.clear(); 5731 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 5732 ArgFlags); 5733 } 5734 5735 // Allocate to a register if possible, or else a stack slot. 5736 Register Reg; 5737 if (ValVT == MVT::f16 && !UseGPRForF16_F32) 5738 Reg = State.AllocateReg(ArgFPR16s); 5739 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 5740 Reg = State.AllocateReg(ArgFPR32s); 5741 else if (ValVT == MVT::f64 && !UseGPRForF64) 5742 Reg = State.AllocateReg(ArgFPR64s); 5743 else if (ValVT.isVector()) { 5744 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 5745 if (RC == &RISCV::VRRegClass) { 5746 // Assign the first mask argument to V0. 5747 // This is an interim calling convention and it may be changed in the 5748 // future. 5749 if (FirstMaskArgument.hasValue() && 5750 ValNo == FirstMaskArgument.getValue()) { 5751 Reg = State.AllocateReg(RISCV::V0); 5752 } else { 5753 Reg = State.AllocateReg(ArgVRs); 5754 } 5755 } else if (RC == &RISCV::VRM2RegClass) { 5756 Reg = State.AllocateReg(ArgVRM2s); 5757 } else if (RC == &RISCV::VRM4RegClass) { 5758 Reg = State.AllocateReg(ArgVRM4s); 5759 } else if (RC == &RISCV::VRM8RegClass) { 5760 Reg = State.AllocateReg(ArgVRM8s); 5761 } else { 5762 llvm_unreachable("Unhandled class register for ValueType"); 5763 } 5764 if (!Reg) { 5765 // For return values, the vector must be passed fully via registers or 5766 // via the stack. 5767 // FIXME: The proposed vector ABI only mandates v8-v15 for return values, 5768 // but we're using all of them. 5769 if (IsRet) 5770 return true; 5771 LocInfo = CCValAssign::Indirect; 5772 // Try using a GPR to pass the address 5773 Reg = State.AllocateReg(ArgGPRs); 5774 LocVT = XLenVT; 5775 } 5776 } else 5777 Reg = State.AllocateReg(ArgGPRs); 5778 unsigned StackOffset = 5779 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); 5780 5781 // If we reach this point and PendingLocs is non-empty, we must be at the 5782 // end of a split argument that must be passed indirectly. 5783 if (!PendingLocs.empty()) { 5784 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 5785 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 5786 5787 for (auto &It : PendingLocs) { 5788 if (Reg) 5789 It.convertToReg(Reg); 5790 else 5791 It.convertToMem(StackOffset); 5792 State.addLoc(It); 5793 } 5794 PendingLocs.clear(); 5795 PendingArgFlags.clear(); 5796 return false; 5797 } 5798 5799 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 5800 (TLI.getSubtarget().hasStdExtV() && ValVT.isVector())) && 5801 "Expected an XLenVT or vector types at this stage"); 5802 5803 if (Reg) { 5804 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5805 return false; 5806 } 5807 5808 // When a floating-point value is passed on the stack, no bit-conversion is 5809 // needed. 5810 if (ValVT.isFloatingPoint()) { 5811 LocVT = ValVT; 5812 LocInfo = CCValAssign::Full; 5813 } 5814 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 5815 return false; 5816 } 5817 5818 template <typename ArgTy> 5819 static Optional<unsigned> preAssignMask(const ArgTy &Args) { 5820 for (const auto &ArgIdx : enumerate(Args)) { 5821 MVT ArgVT = ArgIdx.value().VT; 5822 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1) 5823 return ArgIdx.index(); 5824 } 5825 return None; 5826 } 5827 5828 void RISCVTargetLowering::analyzeInputArgs( 5829 MachineFunction &MF, CCState &CCInfo, 5830 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 5831 unsigned NumArgs = Ins.size(); 5832 FunctionType *FType = MF.getFunction().getFunctionType(); 5833 5834 Optional<unsigned> FirstMaskArgument; 5835 if (Subtarget.hasStdExtV()) 5836 FirstMaskArgument = preAssignMask(Ins); 5837 5838 for (unsigned i = 0; i != NumArgs; ++i) { 5839 MVT ArgVT = Ins[i].VT; 5840 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 5841 5842 Type *ArgTy = nullptr; 5843 if (IsRet) 5844 ArgTy = FType->getReturnType(); 5845 else if (Ins[i].isOrigArg()) 5846 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 5847 5848 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 5849 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 5850 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 5851 FirstMaskArgument)) { 5852 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 5853 << EVT(ArgVT).getEVTString() << '\n'); 5854 llvm_unreachable(nullptr); 5855 } 5856 } 5857 } 5858 5859 void RISCVTargetLowering::analyzeOutputArgs( 5860 MachineFunction &MF, CCState &CCInfo, 5861 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 5862 CallLoweringInfo *CLI) const { 5863 unsigned NumArgs = Outs.size(); 5864 5865 Optional<unsigned> FirstMaskArgument; 5866 if (Subtarget.hasStdExtV()) 5867 FirstMaskArgument = preAssignMask(Outs); 5868 5869 for (unsigned i = 0; i != NumArgs; i++) { 5870 MVT ArgVT = Outs[i].VT; 5871 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 5872 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 5873 5874 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 5875 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 5876 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 5877 FirstMaskArgument)) { 5878 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 5879 << EVT(ArgVT).getEVTString() << "\n"); 5880 llvm_unreachable(nullptr); 5881 } 5882 } 5883 } 5884 5885 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 5886 // values. 5887 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 5888 const CCValAssign &VA, const SDLoc &DL, 5889 const RISCVSubtarget &Subtarget) { 5890 switch (VA.getLocInfo()) { 5891 default: 5892 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5893 case CCValAssign::Full: 5894 if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector()) 5895 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget); 5896 break; 5897 case CCValAssign::BCvt: 5898 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 5899 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); 5900 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 5901 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 5902 else 5903 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 5904 break; 5905 } 5906 return Val; 5907 } 5908 5909 // The caller is responsible for loading the full value if the argument is 5910 // passed with CCValAssign::Indirect. 5911 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 5912 const CCValAssign &VA, const SDLoc &DL, 5913 const RISCVTargetLowering &TLI) { 5914 MachineFunction &MF = DAG.getMachineFunction(); 5915 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5916 EVT LocVT = VA.getLocVT(); 5917 SDValue Val; 5918 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 5919 Register VReg = RegInfo.createVirtualRegister(RC); 5920 RegInfo.addLiveIn(VA.getLocReg(), VReg); 5921 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 5922 5923 if (VA.getLocInfo() == CCValAssign::Indirect) 5924 return Val; 5925 5926 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget()); 5927 } 5928 5929 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 5930 const CCValAssign &VA, const SDLoc &DL, 5931 const RISCVSubtarget &Subtarget) { 5932 EVT LocVT = VA.getLocVT(); 5933 5934 switch (VA.getLocInfo()) { 5935 default: 5936 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5937 case CCValAssign::Full: 5938 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector()) 5939 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget); 5940 break; 5941 case CCValAssign::BCvt: 5942 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 5943 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 5944 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 5945 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 5946 else 5947 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 5948 break; 5949 } 5950 return Val; 5951 } 5952 5953 // The caller is responsible for loading the full value if the argument is 5954 // passed with CCValAssign::Indirect. 5955 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 5956 const CCValAssign &VA, const SDLoc &DL) { 5957 MachineFunction &MF = DAG.getMachineFunction(); 5958 MachineFrameInfo &MFI = MF.getFrameInfo(); 5959 EVT LocVT = VA.getLocVT(); 5960 EVT ValVT = VA.getValVT(); 5961 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 5962 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 5963 VA.getLocMemOffset(), /*Immutable=*/true); 5964 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 5965 SDValue Val; 5966 5967 ISD::LoadExtType ExtType; 5968 switch (VA.getLocInfo()) { 5969 default: 5970 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5971 case CCValAssign::Full: 5972 case CCValAssign::Indirect: 5973 case CCValAssign::BCvt: 5974 ExtType = ISD::NON_EXTLOAD; 5975 break; 5976 } 5977 Val = DAG.getExtLoad( 5978 ExtType, DL, LocVT, Chain, FIN, 5979 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 5980 return Val; 5981 } 5982 5983 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 5984 const CCValAssign &VA, const SDLoc &DL) { 5985 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 5986 "Unexpected VA"); 5987 MachineFunction &MF = DAG.getMachineFunction(); 5988 MachineFrameInfo &MFI = MF.getFrameInfo(); 5989 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5990 5991 if (VA.isMemLoc()) { 5992 // f64 is passed on the stack. 5993 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 5994 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 5995 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 5996 MachinePointerInfo::getFixedStack(MF, FI)); 5997 } 5998 5999 assert(VA.isRegLoc() && "Expected register VA assignment"); 6000 6001 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 6002 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 6003 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 6004 SDValue Hi; 6005 if (VA.getLocReg() == RISCV::X17) { 6006 // Second half of f64 is passed on the stack. 6007 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 6008 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 6009 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 6010 MachinePointerInfo::getFixedStack(MF, FI)); 6011 } else { 6012 // Second half of f64 is passed in another GPR. 6013 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 6014 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 6015 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 6016 } 6017 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 6018 } 6019 6020 // FastCC has less than 1% performance improvement for some particular 6021 // benchmark. But theoretically, it may has benenfit for some cases. 6022 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, 6023 CCValAssign::LocInfo LocInfo, 6024 ISD::ArgFlagsTy ArgFlags, CCState &State) { 6025 6026 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 6027 // X5 and X6 might be used for save-restore libcall. 6028 static const MCPhysReg GPRList[] = { 6029 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 6030 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 6031 RISCV::X29, RISCV::X30, RISCV::X31}; 6032 if (unsigned Reg = State.AllocateReg(GPRList)) { 6033 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 6034 return false; 6035 } 6036 } 6037 6038 if (LocVT == MVT::f16) { 6039 static const MCPhysReg FPR16List[] = { 6040 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 6041 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 6042 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 6043 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 6044 if (unsigned Reg = State.AllocateReg(FPR16List)) { 6045 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 6046 return false; 6047 } 6048 } 6049 6050 if (LocVT == MVT::f32) { 6051 static const MCPhysReg FPR32List[] = { 6052 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 6053 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 6054 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 6055 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 6056 if (unsigned Reg = State.AllocateReg(FPR32List)) { 6057 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 6058 return false; 6059 } 6060 } 6061 6062 if (LocVT == MVT::f64) { 6063 static const MCPhysReg FPR64List[] = { 6064 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 6065 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 6066 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 6067 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 6068 if (unsigned Reg = State.AllocateReg(FPR64List)) { 6069 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 6070 return false; 6071 } 6072 } 6073 6074 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 6075 unsigned Offset4 = State.AllocateStack(4, Align(4)); 6076 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 6077 return false; 6078 } 6079 6080 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 6081 unsigned Offset5 = State.AllocateStack(8, Align(8)); 6082 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 6083 return false; 6084 } 6085 6086 return true; // CC didn't match. 6087 } 6088 6089 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 6090 CCValAssign::LocInfo LocInfo, 6091 ISD::ArgFlagsTy ArgFlags, CCState &State) { 6092 6093 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 6094 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 6095 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 6096 static const MCPhysReg GPRList[] = { 6097 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 6098 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 6099 if (unsigned Reg = State.AllocateReg(GPRList)) { 6100 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 6101 return false; 6102 } 6103 } 6104 6105 if (LocVT == MVT::f32) { 6106 // Pass in STG registers: F1, ..., F6 6107 // fs0 ... fs5 6108 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 6109 RISCV::F18_F, RISCV::F19_F, 6110 RISCV::F20_F, RISCV::F21_F}; 6111 if (unsigned Reg = State.AllocateReg(FPR32List)) { 6112 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 6113 return false; 6114 } 6115 } 6116 6117 if (LocVT == MVT::f64) { 6118 // Pass in STG registers: D1, ..., D6 6119 // fs6 ... fs11 6120 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 6121 RISCV::F24_D, RISCV::F25_D, 6122 RISCV::F26_D, RISCV::F27_D}; 6123 if (unsigned Reg = State.AllocateReg(FPR64List)) { 6124 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 6125 return false; 6126 } 6127 } 6128 6129 report_fatal_error("No registers left in GHC calling convention"); 6130 return true; 6131 } 6132 6133 // Transform physical registers into virtual registers. 6134 SDValue RISCVTargetLowering::LowerFormalArguments( 6135 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 6136 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 6137 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 6138 6139 MachineFunction &MF = DAG.getMachineFunction(); 6140 6141 switch (CallConv) { 6142 default: 6143 report_fatal_error("Unsupported calling convention"); 6144 case CallingConv::C: 6145 case CallingConv::Fast: 6146 break; 6147 case CallingConv::GHC: 6148 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || 6149 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) 6150 report_fatal_error( 6151 "GHC calling convention requires the F and D instruction set extensions"); 6152 } 6153 6154 const Function &Func = MF.getFunction(); 6155 if (Func.hasFnAttribute("interrupt")) { 6156 if (!Func.arg_empty()) 6157 report_fatal_error( 6158 "Functions with the interrupt attribute cannot have arguments!"); 6159 6160 StringRef Kind = 6161 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 6162 6163 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 6164 report_fatal_error( 6165 "Function interrupt attribute argument not supported!"); 6166 } 6167 6168 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6169 MVT XLenVT = Subtarget.getXLenVT(); 6170 unsigned XLenInBytes = Subtarget.getXLen() / 8; 6171 // Used with vargs to acumulate store chains. 6172 std::vector<SDValue> OutChains; 6173 6174 // Assign locations to all of the incoming arguments. 6175 SmallVector<CCValAssign, 16> ArgLocs; 6176 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 6177 6178 if (CallConv == CallingConv::Fast) 6179 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); 6180 else if (CallConv == CallingConv::GHC) 6181 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); 6182 else 6183 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 6184 6185 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 6186 CCValAssign &VA = ArgLocs[i]; 6187 SDValue ArgValue; 6188 // Passing f64 on RV32D with a soft float ABI must be handled as a special 6189 // case. 6190 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 6191 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 6192 else if (VA.isRegLoc()) 6193 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 6194 else 6195 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 6196 6197 if (VA.getLocInfo() == CCValAssign::Indirect) { 6198 // If the original argument was split and passed by reference (e.g. i128 6199 // on RV32), we need to load all parts of it here (using the same 6200 // address). Vectors may be partly split to registers and partly to the 6201 // stack, in which case the base address is partly offset and subsequent 6202 // stores are relative to that. 6203 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 6204 MachinePointerInfo())); 6205 unsigned ArgIndex = Ins[i].OrigArgIndex; 6206 unsigned ArgPartOffset = Ins[i].PartOffset; 6207 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 6208 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 6209 CCValAssign &PartVA = ArgLocs[i + 1]; 6210 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; 6211 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 6212 DAG.getIntPtrConstant(PartOffset, DL)); 6213 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 6214 MachinePointerInfo())); 6215 ++i; 6216 } 6217 continue; 6218 } 6219 InVals.push_back(ArgValue); 6220 } 6221 6222 if (IsVarArg) { 6223 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 6224 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 6225 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 6226 MachineFrameInfo &MFI = MF.getFrameInfo(); 6227 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 6228 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 6229 6230 // Offset of the first variable argument from stack pointer, and size of 6231 // the vararg save area. For now, the varargs save area is either zero or 6232 // large enough to hold a0-a7. 6233 int VaArgOffset, VarArgsSaveSize; 6234 6235 // If all registers are allocated, then all varargs must be passed on the 6236 // stack and we don't need to save any argregs. 6237 if (ArgRegs.size() == Idx) { 6238 VaArgOffset = CCInfo.getNextStackOffset(); 6239 VarArgsSaveSize = 0; 6240 } else { 6241 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 6242 VaArgOffset = -VarArgsSaveSize; 6243 } 6244 6245 // Record the frame index of the first variable argument 6246 // which is a value necessary to VASTART. 6247 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 6248 RVFI->setVarArgsFrameIndex(FI); 6249 6250 // If saving an odd number of registers then create an extra stack slot to 6251 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 6252 // offsets to even-numbered registered remain 2*XLEN-aligned. 6253 if (Idx % 2) { 6254 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 6255 VarArgsSaveSize += XLenInBytes; 6256 } 6257 6258 // Copy the integer registers that may have been used for passing varargs 6259 // to the vararg save area. 6260 for (unsigned I = Idx; I < ArgRegs.size(); 6261 ++I, VaArgOffset += XLenInBytes) { 6262 const Register Reg = RegInfo.createVirtualRegister(RC); 6263 RegInfo.addLiveIn(ArgRegs[I], Reg); 6264 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 6265 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 6266 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 6267 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 6268 MachinePointerInfo::getFixedStack(MF, FI)); 6269 cast<StoreSDNode>(Store.getNode()) 6270 ->getMemOperand() 6271 ->setValue((Value *)nullptr); 6272 OutChains.push_back(Store); 6273 } 6274 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 6275 } 6276 6277 // All stores are grouped in one node to allow the matching between 6278 // the size of Ins and InVals. This only happens for vararg functions. 6279 if (!OutChains.empty()) { 6280 OutChains.push_back(Chain); 6281 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 6282 } 6283 6284 return Chain; 6285 } 6286 6287 /// isEligibleForTailCallOptimization - Check whether the call is eligible 6288 /// for tail call optimization. 6289 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 6290 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 6291 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 6292 const SmallVector<CCValAssign, 16> &ArgLocs) const { 6293 6294 auto &Callee = CLI.Callee; 6295 auto CalleeCC = CLI.CallConv; 6296 auto &Outs = CLI.Outs; 6297 auto &Caller = MF.getFunction(); 6298 auto CallerCC = Caller.getCallingConv(); 6299 6300 // Exception-handling functions need a special set of instructions to 6301 // indicate a return to the hardware. Tail-calling another function would 6302 // probably break this. 6303 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 6304 // should be expanded as new function attributes are introduced. 6305 if (Caller.hasFnAttribute("interrupt")) 6306 return false; 6307 6308 // Do not tail call opt if the stack is used to pass parameters. 6309 if (CCInfo.getNextStackOffset() != 0) 6310 return false; 6311 6312 // Do not tail call opt if any parameters need to be passed indirectly. 6313 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 6314 // passed indirectly. So the address of the value will be passed in a 6315 // register, or if not available, then the address is put on the stack. In 6316 // order to pass indirectly, space on the stack often needs to be allocated 6317 // in order to store the value. In this case the CCInfo.getNextStackOffset() 6318 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 6319 // are passed CCValAssign::Indirect. 6320 for (auto &VA : ArgLocs) 6321 if (VA.getLocInfo() == CCValAssign::Indirect) 6322 return false; 6323 6324 // Do not tail call opt if either caller or callee uses struct return 6325 // semantics. 6326 auto IsCallerStructRet = Caller.hasStructRetAttr(); 6327 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 6328 if (IsCallerStructRet || IsCalleeStructRet) 6329 return false; 6330 6331 // Externally-defined functions with weak linkage should not be 6332 // tail-called. The behaviour of branch instructions in this situation (as 6333 // used for tail calls) is implementation-defined, so we cannot rely on the 6334 // linker replacing the tail call with a return. 6335 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 6336 const GlobalValue *GV = G->getGlobal(); 6337 if (GV->hasExternalWeakLinkage()) 6338 return false; 6339 } 6340 6341 // The callee has to preserve all registers the caller needs to preserve. 6342 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 6343 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 6344 if (CalleeCC != CallerCC) { 6345 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 6346 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 6347 return false; 6348 } 6349 6350 // Byval parameters hand the function a pointer directly into the stack area 6351 // we want to reuse during a tail call. Working around this *is* possible 6352 // but less efficient and uglier in LowerCall. 6353 for (auto &Arg : Outs) 6354 if (Arg.Flags.isByVal()) 6355 return false; 6356 6357 return true; 6358 } 6359 6360 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 6361 // and output parameter nodes. 6362 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 6363 SmallVectorImpl<SDValue> &InVals) const { 6364 SelectionDAG &DAG = CLI.DAG; 6365 SDLoc &DL = CLI.DL; 6366 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 6367 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 6368 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 6369 SDValue Chain = CLI.Chain; 6370 SDValue Callee = CLI.Callee; 6371 bool &IsTailCall = CLI.IsTailCall; 6372 CallingConv::ID CallConv = CLI.CallConv; 6373 bool IsVarArg = CLI.IsVarArg; 6374 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6375 MVT XLenVT = Subtarget.getXLenVT(); 6376 6377 MachineFunction &MF = DAG.getMachineFunction(); 6378 6379 // Analyze the operands of the call, assigning locations to each operand. 6380 SmallVector<CCValAssign, 16> ArgLocs; 6381 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 6382 6383 if (CallConv == CallingConv::Fast) 6384 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); 6385 else if (CallConv == CallingConv::GHC) 6386 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); 6387 else 6388 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 6389 6390 // Check if it's really possible to do a tail call. 6391 if (IsTailCall) 6392 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 6393 6394 if (IsTailCall) 6395 ++NumTailCalls; 6396 else if (CLI.CB && CLI.CB->isMustTailCall()) 6397 report_fatal_error("failed to perform tail call elimination on a call " 6398 "site marked musttail"); 6399 6400 // Get a count of how many bytes are to be pushed on the stack. 6401 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 6402 6403 // Create local copies for byval args 6404 SmallVector<SDValue, 8> ByValArgs; 6405 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 6406 ISD::ArgFlagsTy Flags = Outs[i].Flags; 6407 if (!Flags.isByVal()) 6408 continue; 6409 6410 SDValue Arg = OutVals[i]; 6411 unsigned Size = Flags.getByValSize(); 6412 Align Alignment = Flags.getNonZeroByValAlign(); 6413 6414 int FI = 6415 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 6416 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 6417 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 6418 6419 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 6420 /*IsVolatile=*/false, 6421 /*AlwaysInline=*/false, IsTailCall, 6422 MachinePointerInfo(), MachinePointerInfo()); 6423 ByValArgs.push_back(FIPtr); 6424 } 6425 6426 if (!IsTailCall) 6427 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 6428 6429 // Copy argument values to their designated locations. 6430 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 6431 SmallVector<SDValue, 8> MemOpChains; 6432 SDValue StackPtr; 6433 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 6434 CCValAssign &VA = ArgLocs[i]; 6435 SDValue ArgValue = OutVals[i]; 6436 ISD::ArgFlagsTy Flags = Outs[i].Flags; 6437 6438 // Handle passing f64 on RV32D with a soft float ABI as a special case. 6439 bool IsF64OnRV32DSoftABI = 6440 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 6441 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 6442 SDValue SplitF64 = DAG.getNode( 6443 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 6444 SDValue Lo = SplitF64.getValue(0); 6445 SDValue Hi = SplitF64.getValue(1); 6446 6447 Register RegLo = VA.getLocReg(); 6448 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 6449 6450 if (RegLo == RISCV::X17) { 6451 // Second half of f64 is passed on the stack. 6452 // Work out the address of the stack slot. 6453 if (!StackPtr.getNode()) 6454 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 6455 // Emit the store. 6456 MemOpChains.push_back( 6457 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 6458 } else { 6459 // Second half of f64 is passed in another GPR. 6460 assert(RegLo < RISCV::X31 && "Invalid register pair"); 6461 Register RegHigh = RegLo + 1; 6462 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 6463 } 6464 continue; 6465 } 6466 6467 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 6468 // as any other MemLoc. 6469 6470 // Promote the value if needed. 6471 // For now, only handle fully promoted and indirect arguments. 6472 if (VA.getLocInfo() == CCValAssign::Indirect) { 6473 // Store the argument in a stack slot and pass its address. 6474 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 6475 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 6476 MemOpChains.push_back( 6477 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 6478 MachinePointerInfo::getFixedStack(MF, FI))); 6479 // If the original argument was split (e.g. i128), we need 6480 // to store the required parts of it here (and pass just one address). 6481 // Vectors may be partly split to registers and partly to the stack, in 6482 // which case the base address is partly offset and subsequent stores are 6483 // relative to that. 6484 unsigned ArgIndex = Outs[i].OrigArgIndex; 6485 unsigned ArgPartOffset = Outs[i].PartOffset; 6486 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 6487 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 6488 SDValue PartValue = OutVals[i + 1]; 6489 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; 6490 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 6491 DAG.getIntPtrConstant(PartOffset, DL)); 6492 MemOpChains.push_back( 6493 DAG.getStore(Chain, DL, PartValue, Address, 6494 MachinePointerInfo::getFixedStack(MF, FI))); 6495 ++i; 6496 } 6497 ArgValue = SpillSlot; 6498 } else { 6499 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget); 6500 } 6501 6502 // Use local copy if it is a byval arg. 6503 if (Flags.isByVal()) 6504 ArgValue = ByValArgs[j++]; 6505 6506 if (VA.isRegLoc()) { 6507 // Queue up the argument copies and emit them at the end. 6508 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 6509 } else { 6510 assert(VA.isMemLoc() && "Argument not register or memory"); 6511 assert(!IsTailCall && "Tail call not allowed if stack is used " 6512 "for passing parameters"); 6513 6514 // Work out the address of the stack slot. 6515 if (!StackPtr.getNode()) 6516 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 6517 SDValue Address = 6518 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 6519 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 6520 6521 // Emit the store. 6522 MemOpChains.push_back( 6523 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 6524 } 6525 } 6526 6527 // Join the stores, which are independent of one another. 6528 if (!MemOpChains.empty()) 6529 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 6530 6531 SDValue Glue; 6532 6533 // Build a sequence of copy-to-reg nodes, chained and glued together. 6534 for (auto &Reg : RegsToPass) { 6535 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 6536 Glue = Chain.getValue(1); 6537 } 6538 6539 // Validate that none of the argument registers have been marked as 6540 // reserved, if so report an error. Do the same for the return address if this 6541 // is not a tailcall. 6542 validateCCReservedRegs(RegsToPass, MF); 6543 if (!IsTailCall && 6544 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 6545 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 6546 MF.getFunction(), 6547 "Return address register required, but has been reserved."}); 6548 6549 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 6550 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 6551 // split it and then direct call can be matched by PseudoCALL. 6552 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 6553 const GlobalValue *GV = S->getGlobal(); 6554 6555 unsigned OpFlags = RISCVII::MO_CALL; 6556 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 6557 OpFlags = RISCVII::MO_PLT; 6558 6559 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 6560 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 6561 unsigned OpFlags = RISCVII::MO_CALL; 6562 6563 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 6564 nullptr)) 6565 OpFlags = RISCVII::MO_PLT; 6566 6567 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 6568 } 6569 6570 // The first call operand is the chain and the second is the target address. 6571 SmallVector<SDValue, 8> Ops; 6572 Ops.push_back(Chain); 6573 Ops.push_back(Callee); 6574 6575 // Add argument registers to the end of the list so that they are 6576 // known live into the call. 6577 for (auto &Reg : RegsToPass) 6578 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 6579 6580 if (!IsTailCall) { 6581 // Add a register mask operand representing the call-preserved registers. 6582 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 6583 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 6584 assert(Mask && "Missing call preserved mask for calling convention"); 6585 Ops.push_back(DAG.getRegisterMask(Mask)); 6586 } 6587 6588 // Glue the call to the argument copies, if any. 6589 if (Glue.getNode()) 6590 Ops.push_back(Glue); 6591 6592 // Emit the call. 6593 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 6594 6595 if (IsTailCall) { 6596 MF.getFrameInfo().setHasTailCall(); 6597 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 6598 } 6599 6600 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 6601 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 6602 Glue = Chain.getValue(1); 6603 6604 // Mark the end of the call, which is glued to the call itself. 6605 Chain = DAG.getCALLSEQ_END(Chain, 6606 DAG.getConstant(NumBytes, DL, PtrVT, true), 6607 DAG.getConstant(0, DL, PtrVT, true), 6608 Glue, DL); 6609 Glue = Chain.getValue(1); 6610 6611 // Assign locations to each value returned by this call. 6612 SmallVector<CCValAssign, 16> RVLocs; 6613 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 6614 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 6615 6616 // Copy all of the result registers out of their specified physreg. 6617 for (auto &VA : RVLocs) { 6618 // Copy the value out 6619 SDValue RetValue = 6620 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 6621 // Glue the RetValue to the end of the call sequence 6622 Chain = RetValue.getValue(1); 6623 Glue = RetValue.getValue(2); 6624 6625 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 6626 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 6627 SDValue RetValue2 = 6628 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 6629 Chain = RetValue2.getValue(1); 6630 Glue = RetValue2.getValue(2); 6631 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 6632 RetValue2); 6633 } 6634 6635 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget); 6636 6637 InVals.push_back(RetValue); 6638 } 6639 6640 return Chain; 6641 } 6642 6643 bool RISCVTargetLowering::CanLowerReturn( 6644 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 6645 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 6646 SmallVector<CCValAssign, 16> RVLocs; 6647 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 6648 6649 Optional<unsigned> FirstMaskArgument; 6650 if (Subtarget.hasStdExtV()) 6651 FirstMaskArgument = preAssignMask(Outs); 6652 6653 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 6654 MVT VT = Outs[i].VT; 6655 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 6656 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 6657 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 6658 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 6659 *this, FirstMaskArgument)) 6660 return false; 6661 } 6662 return true; 6663 } 6664 6665 SDValue 6666 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 6667 bool IsVarArg, 6668 const SmallVectorImpl<ISD::OutputArg> &Outs, 6669 const SmallVectorImpl<SDValue> &OutVals, 6670 const SDLoc &DL, SelectionDAG &DAG) const { 6671 const MachineFunction &MF = DAG.getMachineFunction(); 6672 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 6673 6674 // Stores the assignment of the return value to a location. 6675 SmallVector<CCValAssign, 16> RVLocs; 6676 6677 // Info about the registers and stack slot. 6678 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 6679 *DAG.getContext()); 6680 6681 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 6682 nullptr); 6683 6684 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 6685 report_fatal_error("GHC functions return void only"); 6686 6687 SDValue Glue; 6688 SmallVector<SDValue, 4> RetOps(1, Chain); 6689 6690 // Copy the result values into the output registers. 6691 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 6692 SDValue Val = OutVals[i]; 6693 CCValAssign &VA = RVLocs[i]; 6694 assert(VA.isRegLoc() && "Can only return in registers!"); 6695 6696 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 6697 // Handle returning f64 on RV32D with a soft float ABI. 6698 assert(VA.isRegLoc() && "Expected return via registers"); 6699 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 6700 DAG.getVTList(MVT::i32, MVT::i32), Val); 6701 SDValue Lo = SplitF64.getValue(0); 6702 SDValue Hi = SplitF64.getValue(1); 6703 Register RegLo = VA.getLocReg(); 6704 assert(RegLo < RISCV::X31 && "Invalid register pair"); 6705 Register RegHi = RegLo + 1; 6706 6707 if (STI.isRegisterReservedByUser(RegLo) || 6708 STI.isRegisterReservedByUser(RegHi)) 6709 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 6710 MF.getFunction(), 6711 "Return value register required, but has been reserved."}); 6712 6713 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 6714 Glue = Chain.getValue(1); 6715 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 6716 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 6717 Glue = Chain.getValue(1); 6718 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 6719 } else { 6720 // Handle a 'normal' return. 6721 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget); 6722 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 6723 6724 if (STI.isRegisterReservedByUser(VA.getLocReg())) 6725 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 6726 MF.getFunction(), 6727 "Return value register required, but has been reserved."}); 6728 6729 // Guarantee that all emitted copies are stuck together. 6730 Glue = Chain.getValue(1); 6731 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 6732 } 6733 } 6734 6735 RetOps[0] = Chain; // Update chain. 6736 6737 // Add the glue node if we have it. 6738 if (Glue.getNode()) { 6739 RetOps.push_back(Glue); 6740 } 6741 6742 // Interrupt service routines use different return instructions. 6743 const Function &Func = DAG.getMachineFunction().getFunction(); 6744 if (Func.hasFnAttribute("interrupt")) { 6745 if (!Func.getReturnType()->isVoidTy()) 6746 report_fatal_error( 6747 "Functions with the interrupt attribute must have void return type!"); 6748 6749 MachineFunction &MF = DAG.getMachineFunction(); 6750 StringRef Kind = 6751 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 6752 6753 unsigned RetOpc; 6754 if (Kind == "user") 6755 RetOpc = RISCVISD::URET_FLAG; 6756 else if (Kind == "supervisor") 6757 RetOpc = RISCVISD::SRET_FLAG; 6758 else 6759 RetOpc = RISCVISD::MRET_FLAG; 6760 6761 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 6762 } 6763 6764 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 6765 } 6766 6767 void RISCVTargetLowering::validateCCReservedRegs( 6768 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 6769 MachineFunction &MF) const { 6770 const Function &F = MF.getFunction(); 6771 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 6772 6773 if (llvm::any_of(Regs, [&STI](auto Reg) { 6774 return STI.isRegisterReservedByUser(Reg.first); 6775 })) 6776 F.getContext().diagnose(DiagnosticInfoUnsupported{ 6777 F, "Argument register required, but has been reserved."}); 6778 } 6779 6780 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 6781 return CI->isTailCall(); 6782 } 6783 6784 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 6785 #define NODE_NAME_CASE(NODE) \ 6786 case RISCVISD::NODE: \ 6787 return "RISCVISD::" #NODE; 6788 // clang-format off 6789 switch ((RISCVISD::NodeType)Opcode) { 6790 case RISCVISD::FIRST_NUMBER: 6791 break; 6792 NODE_NAME_CASE(RET_FLAG) 6793 NODE_NAME_CASE(URET_FLAG) 6794 NODE_NAME_CASE(SRET_FLAG) 6795 NODE_NAME_CASE(MRET_FLAG) 6796 NODE_NAME_CASE(CALL) 6797 NODE_NAME_CASE(SELECT_CC) 6798 NODE_NAME_CASE(BR_CC) 6799 NODE_NAME_CASE(BuildPairF64) 6800 NODE_NAME_CASE(SplitF64) 6801 NODE_NAME_CASE(TAIL) 6802 NODE_NAME_CASE(SLLW) 6803 NODE_NAME_CASE(SRAW) 6804 NODE_NAME_CASE(SRLW) 6805 NODE_NAME_CASE(DIVW) 6806 NODE_NAME_CASE(DIVUW) 6807 NODE_NAME_CASE(REMUW) 6808 NODE_NAME_CASE(ROLW) 6809 NODE_NAME_CASE(RORW) 6810 NODE_NAME_CASE(FSLW) 6811 NODE_NAME_CASE(FSRW) 6812 NODE_NAME_CASE(FSL) 6813 NODE_NAME_CASE(FSR) 6814 NODE_NAME_CASE(FMV_H_X) 6815 NODE_NAME_CASE(FMV_X_ANYEXTH) 6816 NODE_NAME_CASE(FMV_W_X_RV64) 6817 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 6818 NODE_NAME_CASE(READ_CYCLE_WIDE) 6819 NODE_NAME_CASE(GREVI) 6820 NODE_NAME_CASE(GREVIW) 6821 NODE_NAME_CASE(GORCI) 6822 NODE_NAME_CASE(GORCIW) 6823 NODE_NAME_CASE(SHFLI) 6824 NODE_NAME_CASE(VMV_V_X_VL) 6825 NODE_NAME_CASE(VFMV_V_F_VL) 6826 NODE_NAME_CASE(VMV_X_S) 6827 NODE_NAME_CASE(VMV_S_XF_VL) 6828 NODE_NAME_CASE(SPLAT_VECTOR_I64) 6829 NODE_NAME_CASE(READ_VLENB) 6830 NODE_NAME_CASE(TRUNCATE_VECTOR_VL) 6831 NODE_NAME_CASE(VLEFF) 6832 NODE_NAME_CASE(VLEFF_MASK) 6833 NODE_NAME_CASE(VSLIDEUP_VL) 6834 NODE_NAME_CASE(VSLIDE1UP_VL) 6835 NODE_NAME_CASE(VSLIDEDOWN_VL) 6836 NODE_NAME_CASE(VID_VL) 6837 NODE_NAME_CASE(VFNCVT_ROD_VL) 6838 NODE_NAME_CASE(VECREDUCE_ADD_VL) 6839 NODE_NAME_CASE(VECREDUCE_UMAX_VL) 6840 NODE_NAME_CASE(VECREDUCE_SMAX_VL) 6841 NODE_NAME_CASE(VECREDUCE_UMIN_VL) 6842 NODE_NAME_CASE(VECREDUCE_SMIN_VL) 6843 NODE_NAME_CASE(VECREDUCE_AND_VL) 6844 NODE_NAME_CASE(VECREDUCE_OR_VL) 6845 NODE_NAME_CASE(VECREDUCE_XOR_VL) 6846 NODE_NAME_CASE(VECREDUCE_FADD_VL) 6847 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL) 6848 NODE_NAME_CASE(ADD_VL) 6849 NODE_NAME_CASE(AND_VL) 6850 NODE_NAME_CASE(MUL_VL) 6851 NODE_NAME_CASE(OR_VL) 6852 NODE_NAME_CASE(SDIV_VL) 6853 NODE_NAME_CASE(SHL_VL) 6854 NODE_NAME_CASE(SREM_VL) 6855 NODE_NAME_CASE(SRA_VL) 6856 NODE_NAME_CASE(SRL_VL) 6857 NODE_NAME_CASE(SUB_VL) 6858 NODE_NAME_CASE(UDIV_VL) 6859 NODE_NAME_CASE(UREM_VL) 6860 NODE_NAME_CASE(XOR_VL) 6861 NODE_NAME_CASE(FADD_VL) 6862 NODE_NAME_CASE(FSUB_VL) 6863 NODE_NAME_CASE(FMUL_VL) 6864 NODE_NAME_CASE(FDIV_VL) 6865 NODE_NAME_CASE(FNEG_VL) 6866 NODE_NAME_CASE(FABS_VL) 6867 NODE_NAME_CASE(FSQRT_VL) 6868 NODE_NAME_CASE(FMA_VL) 6869 NODE_NAME_CASE(FCOPYSIGN_VL) 6870 NODE_NAME_CASE(SMIN_VL) 6871 NODE_NAME_CASE(SMAX_VL) 6872 NODE_NAME_CASE(UMIN_VL) 6873 NODE_NAME_CASE(UMAX_VL) 6874 NODE_NAME_CASE(MULHS_VL) 6875 NODE_NAME_CASE(MULHU_VL) 6876 NODE_NAME_CASE(FP_TO_SINT_VL) 6877 NODE_NAME_CASE(FP_TO_UINT_VL) 6878 NODE_NAME_CASE(SINT_TO_FP_VL) 6879 NODE_NAME_CASE(UINT_TO_FP_VL) 6880 NODE_NAME_CASE(FP_EXTEND_VL) 6881 NODE_NAME_CASE(FP_ROUND_VL) 6882 NODE_NAME_CASE(SETCC_VL) 6883 NODE_NAME_CASE(VSELECT_VL) 6884 NODE_NAME_CASE(VMAND_VL) 6885 NODE_NAME_CASE(VMOR_VL) 6886 NODE_NAME_CASE(VMXOR_VL) 6887 NODE_NAME_CASE(VMCLR_VL) 6888 NODE_NAME_CASE(VMSET_VL) 6889 NODE_NAME_CASE(VRGATHER_VX_VL) 6890 NODE_NAME_CASE(VRGATHER_VV_VL) 6891 NODE_NAME_CASE(VRGATHEREI16_VV_VL) 6892 NODE_NAME_CASE(VSEXT_VL) 6893 NODE_NAME_CASE(VZEXT_VL) 6894 NODE_NAME_CASE(VLE_VL) 6895 NODE_NAME_CASE(VSE_VL) 6896 } 6897 // clang-format on 6898 return nullptr; 6899 #undef NODE_NAME_CASE 6900 } 6901 6902 /// getConstraintType - Given a constraint letter, return the type of 6903 /// constraint it is for this target. 6904 RISCVTargetLowering::ConstraintType 6905 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 6906 if (Constraint.size() == 1) { 6907 switch (Constraint[0]) { 6908 default: 6909 break; 6910 case 'f': 6911 case 'v': 6912 return C_RegisterClass; 6913 case 'I': 6914 case 'J': 6915 case 'K': 6916 return C_Immediate; 6917 case 'A': 6918 return C_Memory; 6919 } 6920 } 6921 return TargetLowering::getConstraintType(Constraint); 6922 } 6923 6924 std::pair<unsigned, const TargetRegisterClass *> 6925 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 6926 StringRef Constraint, 6927 MVT VT) const { 6928 // First, see if this is a constraint that directly corresponds to a 6929 // RISCV register class. 6930 if (Constraint.size() == 1) { 6931 switch (Constraint[0]) { 6932 case 'r': 6933 return std::make_pair(0U, &RISCV::GPRRegClass); 6934 case 'f': 6935 if (Subtarget.hasStdExtZfh() && VT == MVT::f16) 6936 return std::make_pair(0U, &RISCV::FPR16RegClass); 6937 if (Subtarget.hasStdExtF() && VT == MVT::f32) 6938 return std::make_pair(0U, &RISCV::FPR32RegClass); 6939 if (Subtarget.hasStdExtD() && VT == MVT::f64) 6940 return std::make_pair(0U, &RISCV::FPR64RegClass); 6941 break; 6942 case 'v': 6943 for (const auto *RC : 6944 {&RISCV::VMRegClass, &RISCV::VRRegClass, &RISCV::VRM2RegClass, 6945 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 6946 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) 6947 return std::make_pair(0U, RC); 6948 } 6949 break; 6950 default: 6951 break; 6952 } 6953 } 6954 6955 // Clang will correctly decode the usage of register name aliases into their 6956 // official names. However, other frontends like `rustc` do not. This allows 6957 // users of these frontends to use the ABI names for registers in LLVM-style 6958 // register constraints. 6959 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 6960 .Case("{zero}", RISCV::X0) 6961 .Case("{ra}", RISCV::X1) 6962 .Case("{sp}", RISCV::X2) 6963 .Case("{gp}", RISCV::X3) 6964 .Case("{tp}", RISCV::X4) 6965 .Case("{t0}", RISCV::X5) 6966 .Case("{t1}", RISCV::X6) 6967 .Case("{t2}", RISCV::X7) 6968 .Cases("{s0}", "{fp}", RISCV::X8) 6969 .Case("{s1}", RISCV::X9) 6970 .Case("{a0}", RISCV::X10) 6971 .Case("{a1}", RISCV::X11) 6972 .Case("{a2}", RISCV::X12) 6973 .Case("{a3}", RISCV::X13) 6974 .Case("{a4}", RISCV::X14) 6975 .Case("{a5}", RISCV::X15) 6976 .Case("{a6}", RISCV::X16) 6977 .Case("{a7}", RISCV::X17) 6978 .Case("{s2}", RISCV::X18) 6979 .Case("{s3}", RISCV::X19) 6980 .Case("{s4}", RISCV::X20) 6981 .Case("{s5}", RISCV::X21) 6982 .Case("{s6}", RISCV::X22) 6983 .Case("{s7}", RISCV::X23) 6984 .Case("{s8}", RISCV::X24) 6985 .Case("{s9}", RISCV::X25) 6986 .Case("{s10}", RISCV::X26) 6987 .Case("{s11}", RISCV::X27) 6988 .Case("{t3}", RISCV::X28) 6989 .Case("{t4}", RISCV::X29) 6990 .Case("{t5}", RISCV::X30) 6991 .Case("{t6}", RISCV::X31) 6992 .Default(RISCV::NoRegister); 6993 if (XRegFromAlias != RISCV::NoRegister) 6994 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 6995 6996 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 6997 // TableGen record rather than the AsmName to choose registers for InlineAsm 6998 // constraints, plus we want to match those names to the widest floating point 6999 // register type available, manually select floating point registers here. 7000 // 7001 // The second case is the ABI name of the register, so that frontends can also 7002 // use the ABI names in register constraint lists. 7003 if (Subtarget.hasStdExtF()) { 7004 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 7005 .Cases("{f0}", "{ft0}", RISCV::F0_F) 7006 .Cases("{f1}", "{ft1}", RISCV::F1_F) 7007 .Cases("{f2}", "{ft2}", RISCV::F2_F) 7008 .Cases("{f3}", "{ft3}", RISCV::F3_F) 7009 .Cases("{f4}", "{ft4}", RISCV::F4_F) 7010 .Cases("{f5}", "{ft5}", RISCV::F5_F) 7011 .Cases("{f6}", "{ft6}", RISCV::F6_F) 7012 .Cases("{f7}", "{ft7}", RISCV::F7_F) 7013 .Cases("{f8}", "{fs0}", RISCV::F8_F) 7014 .Cases("{f9}", "{fs1}", RISCV::F9_F) 7015 .Cases("{f10}", "{fa0}", RISCV::F10_F) 7016 .Cases("{f11}", "{fa1}", RISCV::F11_F) 7017 .Cases("{f12}", "{fa2}", RISCV::F12_F) 7018 .Cases("{f13}", "{fa3}", RISCV::F13_F) 7019 .Cases("{f14}", "{fa4}", RISCV::F14_F) 7020 .Cases("{f15}", "{fa5}", RISCV::F15_F) 7021 .Cases("{f16}", "{fa6}", RISCV::F16_F) 7022 .Cases("{f17}", "{fa7}", RISCV::F17_F) 7023 .Cases("{f18}", "{fs2}", RISCV::F18_F) 7024 .Cases("{f19}", "{fs3}", RISCV::F19_F) 7025 .Cases("{f20}", "{fs4}", RISCV::F20_F) 7026 .Cases("{f21}", "{fs5}", RISCV::F21_F) 7027 .Cases("{f22}", "{fs6}", RISCV::F22_F) 7028 .Cases("{f23}", "{fs7}", RISCV::F23_F) 7029 .Cases("{f24}", "{fs8}", RISCV::F24_F) 7030 .Cases("{f25}", "{fs9}", RISCV::F25_F) 7031 .Cases("{f26}", "{fs10}", RISCV::F26_F) 7032 .Cases("{f27}", "{fs11}", RISCV::F27_F) 7033 .Cases("{f28}", "{ft8}", RISCV::F28_F) 7034 .Cases("{f29}", "{ft9}", RISCV::F29_F) 7035 .Cases("{f30}", "{ft10}", RISCV::F30_F) 7036 .Cases("{f31}", "{ft11}", RISCV::F31_F) 7037 .Default(RISCV::NoRegister); 7038 if (FReg != RISCV::NoRegister) { 7039 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 7040 if (Subtarget.hasStdExtD()) { 7041 unsigned RegNo = FReg - RISCV::F0_F; 7042 unsigned DReg = RISCV::F0_D + RegNo; 7043 return std::make_pair(DReg, &RISCV::FPR64RegClass); 7044 } 7045 return std::make_pair(FReg, &RISCV::FPR32RegClass); 7046 } 7047 } 7048 7049 if (Subtarget.hasStdExtV()) { 7050 Register VReg = StringSwitch<Register>(Constraint.lower()) 7051 .Case("{v0}", RISCV::V0) 7052 .Case("{v1}", RISCV::V1) 7053 .Case("{v2}", RISCV::V2) 7054 .Case("{v3}", RISCV::V3) 7055 .Case("{v4}", RISCV::V4) 7056 .Case("{v5}", RISCV::V5) 7057 .Case("{v6}", RISCV::V6) 7058 .Case("{v7}", RISCV::V7) 7059 .Case("{v8}", RISCV::V8) 7060 .Case("{v9}", RISCV::V9) 7061 .Case("{v10}", RISCV::V10) 7062 .Case("{v11}", RISCV::V11) 7063 .Case("{v12}", RISCV::V12) 7064 .Case("{v13}", RISCV::V13) 7065 .Case("{v14}", RISCV::V14) 7066 .Case("{v15}", RISCV::V15) 7067 .Case("{v16}", RISCV::V16) 7068 .Case("{v17}", RISCV::V17) 7069 .Case("{v18}", RISCV::V18) 7070 .Case("{v19}", RISCV::V19) 7071 .Case("{v20}", RISCV::V20) 7072 .Case("{v21}", RISCV::V21) 7073 .Case("{v22}", RISCV::V22) 7074 .Case("{v23}", RISCV::V23) 7075 .Case("{v24}", RISCV::V24) 7076 .Case("{v25}", RISCV::V25) 7077 .Case("{v26}", RISCV::V26) 7078 .Case("{v27}", RISCV::V27) 7079 .Case("{v28}", RISCV::V28) 7080 .Case("{v29}", RISCV::V29) 7081 .Case("{v30}", RISCV::V30) 7082 .Case("{v31}", RISCV::V31) 7083 .Default(RISCV::NoRegister); 7084 if (VReg != RISCV::NoRegister) { 7085 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy)) 7086 return std::make_pair(VReg, &RISCV::VMRegClass); 7087 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy)) 7088 return std::make_pair(VReg, &RISCV::VRRegClass); 7089 for (const auto *RC : 7090 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 7091 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) { 7092 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC); 7093 return std::make_pair(VReg, RC); 7094 } 7095 } 7096 } 7097 } 7098 7099 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 7100 } 7101 7102 unsigned 7103 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 7104 // Currently only support length 1 constraints. 7105 if (ConstraintCode.size() == 1) { 7106 switch (ConstraintCode[0]) { 7107 case 'A': 7108 return InlineAsm::Constraint_A; 7109 default: 7110 break; 7111 } 7112 } 7113 7114 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 7115 } 7116 7117 void RISCVTargetLowering::LowerAsmOperandForConstraint( 7118 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 7119 SelectionDAG &DAG) const { 7120 // Currently only support length 1 constraints. 7121 if (Constraint.length() == 1) { 7122 switch (Constraint[0]) { 7123 case 'I': 7124 // Validate & create a 12-bit signed immediate operand. 7125 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 7126 uint64_t CVal = C->getSExtValue(); 7127 if (isInt<12>(CVal)) 7128 Ops.push_back( 7129 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 7130 } 7131 return; 7132 case 'J': 7133 // Validate & create an integer zero operand. 7134 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 7135 if (C->getZExtValue() == 0) 7136 Ops.push_back( 7137 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 7138 return; 7139 case 'K': 7140 // Validate & create a 5-bit unsigned immediate operand. 7141 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 7142 uint64_t CVal = C->getZExtValue(); 7143 if (isUInt<5>(CVal)) 7144 Ops.push_back( 7145 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 7146 } 7147 return; 7148 default: 7149 break; 7150 } 7151 } 7152 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 7153 } 7154 7155 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 7156 Instruction *Inst, 7157 AtomicOrdering Ord) const { 7158 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 7159 return Builder.CreateFence(Ord); 7160 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 7161 return Builder.CreateFence(AtomicOrdering::Release); 7162 return nullptr; 7163 } 7164 7165 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 7166 Instruction *Inst, 7167 AtomicOrdering Ord) const { 7168 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 7169 return Builder.CreateFence(AtomicOrdering::Acquire); 7170 return nullptr; 7171 } 7172 7173 TargetLowering::AtomicExpansionKind 7174 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 7175 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 7176 // point operations can't be used in an lr/sc sequence without breaking the 7177 // forward-progress guarantee. 7178 if (AI->isFloatingPointOperation()) 7179 return AtomicExpansionKind::CmpXChg; 7180 7181 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 7182 if (Size == 8 || Size == 16) 7183 return AtomicExpansionKind::MaskedIntrinsic; 7184 return AtomicExpansionKind::None; 7185 } 7186 7187 static Intrinsic::ID 7188 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 7189 if (XLen == 32) { 7190 switch (BinOp) { 7191 default: 7192 llvm_unreachable("Unexpected AtomicRMW BinOp"); 7193 case AtomicRMWInst::Xchg: 7194 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 7195 case AtomicRMWInst::Add: 7196 return Intrinsic::riscv_masked_atomicrmw_add_i32; 7197 case AtomicRMWInst::Sub: 7198 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 7199 case AtomicRMWInst::Nand: 7200 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 7201 case AtomicRMWInst::Max: 7202 return Intrinsic::riscv_masked_atomicrmw_max_i32; 7203 case AtomicRMWInst::Min: 7204 return Intrinsic::riscv_masked_atomicrmw_min_i32; 7205 case AtomicRMWInst::UMax: 7206 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 7207 case AtomicRMWInst::UMin: 7208 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 7209 } 7210 } 7211 7212 if (XLen == 64) { 7213 switch (BinOp) { 7214 default: 7215 llvm_unreachable("Unexpected AtomicRMW BinOp"); 7216 case AtomicRMWInst::Xchg: 7217 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 7218 case AtomicRMWInst::Add: 7219 return Intrinsic::riscv_masked_atomicrmw_add_i64; 7220 case AtomicRMWInst::Sub: 7221 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 7222 case AtomicRMWInst::Nand: 7223 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 7224 case AtomicRMWInst::Max: 7225 return Intrinsic::riscv_masked_atomicrmw_max_i64; 7226 case AtomicRMWInst::Min: 7227 return Intrinsic::riscv_masked_atomicrmw_min_i64; 7228 case AtomicRMWInst::UMax: 7229 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 7230 case AtomicRMWInst::UMin: 7231 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 7232 } 7233 } 7234 7235 llvm_unreachable("Unexpected XLen\n"); 7236 } 7237 7238 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 7239 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 7240 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 7241 unsigned XLen = Subtarget.getXLen(); 7242 Value *Ordering = 7243 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 7244 Type *Tys[] = {AlignedAddr->getType()}; 7245 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 7246 AI->getModule(), 7247 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 7248 7249 if (XLen == 64) { 7250 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 7251 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 7252 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 7253 } 7254 7255 Value *Result; 7256 7257 // Must pass the shift amount needed to sign extend the loaded value prior 7258 // to performing a signed comparison for min/max. ShiftAmt is the number of 7259 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 7260 // is the number of bits to left+right shift the value in order to 7261 // sign-extend. 7262 if (AI->getOperation() == AtomicRMWInst::Min || 7263 AI->getOperation() == AtomicRMWInst::Max) { 7264 const DataLayout &DL = AI->getModule()->getDataLayout(); 7265 unsigned ValWidth = 7266 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 7267 Value *SextShamt = 7268 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 7269 Result = Builder.CreateCall(LrwOpScwLoop, 7270 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 7271 } else { 7272 Result = 7273 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 7274 } 7275 7276 if (XLen == 64) 7277 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 7278 return Result; 7279 } 7280 7281 TargetLowering::AtomicExpansionKind 7282 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 7283 AtomicCmpXchgInst *CI) const { 7284 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 7285 if (Size == 8 || Size == 16) 7286 return AtomicExpansionKind::MaskedIntrinsic; 7287 return AtomicExpansionKind::None; 7288 } 7289 7290 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 7291 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 7292 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 7293 unsigned XLen = Subtarget.getXLen(); 7294 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 7295 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 7296 if (XLen == 64) { 7297 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 7298 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 7299 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 7300 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 7301 } 7302 Type *Tys[] = {AlignedAddr->getType()}; 7303 Function *MaskedCmpXchg = 7304 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 7305 Value *Result = Builder.CreateCall( 7306 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 7307 if (XLen == 64) 7308 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 7309 return Result; 7310 } 7311 7312 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { 7313 return false; 7314 } 7315 7316 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 7317 EVT VT) const { 7318 VT = VT.getScalarType(); 7319 7320 if (!VT.isSimple()) 7321 return false; 7322 7323 switch (VT.getSimpleVT().SimpleTy) { 7324 case MVT::f16: 7325 return Subtarget.hasStdExtZfh(); 7326 case MVT::f32: 7327 return Subtarget.hasStdExtF(); 7328 case MVT::f64: 7329 return Subtarget.hasStdExtD(); 7330 default: 7331 break; 7332 } 7333 7334 return false; 7335 } 7336 7337 Register RISCVTargetLowering::getExceptionPointerRegister( 7338 const Constant *PersonalityFn) const { 7339 return RISCV::X10; 7340 } 7341 7342 Register RISCVTargetLowering::getExceptionSelectorRegister( 7343 const Constant *PersonalityFn) const { 7344 return RISCV::X11; 7345 } 7346 7347 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 7348 // Return false to suppress the unnecessary extensions if the LibCall 7349 // arguments or return value is f32 type for LP64 ABI. 7350 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 7351 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 7352 return false; 7353 7354 return true; 7355 } 7356 7357 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 7358 if (Subtarget.is64Bit() && Type == MVT::i32) 7359 return true; 7360 7361 return IsSigned; 7362 } 7363 7364 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 7365 SDValue C) const { 7366 // Check integral scalar types. 7367 if (VT.isScalarInteger()) { 7368 // Omit the optimization if the sub target has the M extension and the data 7369 // size exceeds XLen. 7370 if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) 7371 return false; 7372 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 7373 // Break the MUL to a SLLI and an ADD/SUB. 7374 const APInt &Imm = ConstNode->getAPIntValue(); 7375 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 7376 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 7377 return true; 7378 // Omit the following optimization if the sub target has the M extension 7379 // and the data size >= XLen. 7380 if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) 7381 return false; 7382 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 7383 // a pair of LUI/ADDI. 7384 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { 7385 APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); 7386 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 7387 (1 - ImmS).isPowerOf2()) 7388 return true; 7389 } 7390 } 7391 } 7392 7393 return false; 7394 } 7395 7396 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { 7397 if (!Subtarget.useRVVForFixedLengthVectors()) 7398 return false; 7399 7400 if (!VT.isFixedLengthVector()) 7401 return false; 7402 7403 // Don't use RVV for vectors we cannot scalarize if required. 7404 switch (VT.getVectorElementType().SimpleTy) { 7405 // i1 is supported but has different rules. 7406 default: 7407 return false; 7408 case MVT::i1: 7409 // Masks can only use a single register. 7410 if (VT.getVectorNumElements() > Subtarget.getMinRVVVectorSizeInBits()) 7411 return false; 7412 break; 7413 case MVT::i8: 7414 case MVT::i16: 7415 case MVT::i32: 7416 case MVT::i64: 7417 break; 7418 case MVT::f16: 7419 if (!Subtarget.hasStdExtZfh()) 7420 return false; 7421 break; 7422 case MVT::f32: 7423 if (!Subtarget.hasStdExtF()) 7424 return false; 7425 break; 7426 case MVT::f64: 7427 if (!Subtarget.hasStdExtD()) 7428 return false; 7429 break; 7430 } 7431 7432 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 7433 // Don't use RVV for types that don't fit. 7434 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) 7435 return false; 7436 7437 // TODO: Perhaps an artificial restriction, but worth having whilst getting 7438 // the base fixed length RVV support in place. 7439 if (!VT.isPow2VectorType()) 7440 return false; 7441 7442 return true; 7443 } 7444 7445 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( 7446 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 7447 bool *Fast) const { 7448 if (!VT.isScalableVector()) 7449 return false; 7450 7451 EVT ElemVT = VT.getVectorElementType(); 7452 if (Alignment >= ElemVT.getStoreSize()) { 7453 if (Fast) 7454 *Fast = true; 7455 return true; 7456 } 7457 7458 return false; 7459 } 7460 7461 bool RISCVTargetLowering::splitValueIntoRegisterParts( 7462 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 7463 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const { 7464 EVT ValueVT = Val.getValueType(); 7465 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 7466 LLVMContext &Context = *DAG.getContext(); 7467 EVT ValueEltVT = ValueVT.getVectorElementType(); 7468 EVT PartEltVT = PartVT.getVectorElementType(); 7469 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize(); 7470 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize(); 7471 if (PartVTBitSize % ValueVTBitSize == 0) { 7472 // If the element types are different, bitcast to the same element type of 7473 // PartVT first. 7474 if (ValueEltVT != PartEltVT) { 7475 unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits(); 7476 assert(Count != 0 && "The number of element should not be zero."); 7477 EVT SameEltTypeVT = 7478 EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true); 7479 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val); 7480 } 7481 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), 7482 Val, DAG.getConstant(0, DL, Subtarget.getXLenVT())); 7483 Parts[0] = Val; 7484 return true; 7485 } 7486 } 7487 return false; 7488 } 7489 7490 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( 7491 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, 7492 MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const { 7493 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 7494 LLVMContext &Context = *DAG.getContext(); 7495 SDValue Val = Parts[0]; 7496 EVT ValueEltVT = ValueVT.getVectorElementType(); 7497 EVT PartEltVT = PartVT.getVectorElementType(); 7498 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize(); 7499 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize(); 7500 if (PartVTBitSize % ValueVTBitSize == 0) { 7501 EVT SameEltTypeVT = ValueVT; 7502 // If the element types are different, convert it to the same element type 7503 // of PartVT. 7504 if (ValueEltVT != PartEltVT) { 7505 unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits(); 7506 assert(Count != 0 && "The number of element should not be zero."); 7507 SameEltTypeVT = 7508 EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true); 7509 } 7510 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SameEltTypeVT, Val, 7511 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 7512 if (ValueEltVT != PartEltVT) 7513 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 7514 return Val; 7515 } 7516 } 7517 return SDValue(); 7518 } 7519 7520 #define GET_REGISTER_MATCHER 7521 #include "RISCVGenAsmMatcher.inc" 7522 7523 Register 7524 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 7525 const MachineFunction &MF) const { 7526 Register Reg = MatchRegisterAltName(RegName); 7527 if (Reg == RISCV::NoRegister) 7528 Reg = MatchRegisterName(RegName); 7529 if (Reg == RISCV::NoRegister) 7530 report_fatal_error( 7531 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 7532 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 7533 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 7534 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 7535 StringRef(RegName) + "\".")); 7536 return Reg; 7537 } 7538 7539 namespace llvm { 7540 namespace RISCVVIntrinsicsTable { 7541 7542 #define GET_RISCVVIntrinsicsTable_IMPL 7543 #include "RISCVGenSearchableTables.inc" 7544 7545 } // namespace RISCVVIntrinsicsTable 7546 7547 } // namespace llvm 7548