1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/IntrinsicsRISCV.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/KnownBits.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "riscv-lower" 42 43 STATISTIC(NumTailCalls, "Number of tail calls"); 44 45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 46 const RISCVSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 if (Subtarget.isRV32E()) 50 report_fatal_error("Codegen not yet implemented for RV32E"); 51 52 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 53 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 54 55 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 56 !Subtarget.hasStdExtF()) { 57 errs() << "Hard-float 'f' ABI can't be used for a target that " 58 "doesn't support the F instruction set extension (ignoring " 59 "target-abi)\n"; 60 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 61 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 62 !Subtarget.hasStdExtD()) { 63 errs() << "Hard-float 'd' ABI can't be used for a target that " 64 "doesn't support the D instruction set extension (ignoring " 65 "target-abi)\n"; 66 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 67 } 68 69 switch (ABI) { 70 default: 71 report_fatal_error("Don't know how to lower this ABI"); 72 case RISCVABI::ABI_ILP32: 73 case RISCVABI::ABI_ILP32F: 74 case RISCVABI::ABI_ILP32D: 75 case RISCVABI::ABI_LP64: 76 case RISCVABI::ABI_LP64F: 77 case RISCVABI::ABI_LP64D: 78 break; 79 } 80 81 MVT XLenVT = Subtarget.getXLenVT(); 82 83 // Set up the register classes. 84 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 85 86 if (Subtarget.hasStdExtZfh()) 87 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 88 if (Subtarget.hasStdExtF()) 89 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 90 if (Subtarget.hasStdExtD()) 91 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 92 93 static const MVT::SimpleValueType BoolVecVTs[] = { 94 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 95 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 96 static const MVT::SimpleValueType IntVecVTs[] = { 97 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 98 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 99 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 100 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 101 MVT::nxv4i64, MVT::nxv8i64}; 102 static const MVT::SimpleValueType F16VecVTs[] = { 103 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 104 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 105 static const MVT::SimpleValueType F32VecVTs[] = { 106 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 107 static const MVT::SimpleValueType F64VecVTs[] = { 108 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 109 110 if (Subtarget.hasStdExtV()) { 111 auto addRegClassForRVV = [this](MVT VT) { 112 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 113 assert(Size <= 512 && isPowerOf2_32(Size)); 114 const TargetRegisterClass *RC; 115 if (Size <= 64) 116 RC = &RISCV::VRRegClass; 117 else if (Size == 128) 118 RC = &RISCV::VRM2RegClass; 119 else if (Size == 256) 120 RC = &RISCV::VRM4RegClass; 121 else 122 RC = &RISCV::VRM8RegClass; 123 124 addRegisterClass(VT, RC); 125 }; 126 127 for (MVT VT : BoolVecVTs) 128 addRegClassForRVV(VT); 129 for (MVT VT : IntVecVTs) 130 addRegClassForRVV(VT); 131 132 if (Subtarget.hasStdExtZfh()) 133 for (MVT VT : F16VecVTs) 134 addRegClassForRVV(VT); 135 136 if (Subtarget.hasStdExtF()) 137 for (MVT VT : F32VecVTs) 138 addRegClassForRVV(VT); 139 140 if (Subtarget.hasStdExtD()) 141 for (MVT VT : F64VecVTs) 142 addRegClassForRVV(VT); 143 144 if (Subtarget.useRVVForFixedLengthVectors()) { 145 auto addRegClassForFixedVectors = [this](MVT VT) { 146 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 147 const TargetRegisterClass *RC; 148 if (LMul == 1 || VT.getVectorElementType() == MVT::i1) 149 RC = &RISCV::VRRegClass; 150 else if (LMul == 2) 151 RC = &RISCV::VRM2RegClass; 152 else if (LMul == 4) 153 RC = &RISCV::VRM4RegClass; 154 else if (LMul == 8) 155 RC = &RISCV::VRM8RegClass; 156 else 157 llvm_unreachable("Unexpected LMul!"); 158 159 addRegisterClass(VT, RC); 160 }; 161 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 162 if (useRVVForFixedLengthVectorVT(VT)) 163 addRegClassForFixedVectors(VT); 164 165 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) 166 if (useRVVForFixedLengthVectorVT(VT)) 167 addRegClassForFixedVectors(VT); 168 } 169 } 170 171 // Compute derived properties from the register classes. 172 computeRegisterProperties(STI.getRegisterInfo()); 173 174 setStackPointerRegisterToSaveRestore(RISCV::X2); 175 176 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 177 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 178 179 // TODO: add all necessary setOperationAction calls. 180 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 181 182 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 183 setOperationAction(ISD::BR_CC, XLenVT, Expand); 184 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 185 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 186 187 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 188 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 189 190 setOperationAction(ISD::VASTART, MVT::Other, Custom); 191 setOperationAction(ISD::VAARG, MVT::Other, Expand); 192 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 193 setOperationAction(ISD::VAEND, MVT::Other, Expand); 194 195 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 196 if (!Subtarget.hasStdExtZbb()) { 197 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 198 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 199 } 200 201 if (Subtarget.is64Bit()) { 202 setOperationAction(ISD::ADD, MVT::i32, Custom); 203 setOperationAction(ISD::SUB, MVT::i32, Custom); 204 setOperationAction(ISD::SHL, MVT::i32, Custom); 205 setOperationAction(ISD::SRA, MVT::i32, Custom); 206 setOperationAction(ISD::SRL, MVT::i32, Custom); 207 208 setOperationAction(ISD::UADDO, MVT::i32, Custom); 209 setOperationAction(ISD::USUBO, MVT::i32, Custom); 210 setOperationAction(ISD::UADDSAT, MVT::i32, Custom); 211 setOperationAction(ISD::USUBSAT, MVT::i32, Custom); 212 } 213 214 if (!Subtarget.hasStdExtM()) { 215 setOperationAction(ISD::MUL, XLenVT, Expand); 216 setOperationAction(ISD::MULHS, XLenVT, Expand); 217 setOperationAction(ISD::MULHU, XLenVT, Expand); 218 setOperationAction(ISD::SDIV, XLenVT, Expand); 219 setOperationAction(ISD::UDIV, XLenVT, Expand); 220 setOperationAction(ISD::SREM, XLenVT, Expand); 221 setOperationAction(ISD::UREM, XLenVT, Expand); 222 } 223 224 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 225 setOperationAction(ISD::MUL, MVT::i32, Custom); 226 227 setOperationAction(ISD::SDIV, MVT::i8, Custom); 228 setOperationAction(ISD::UDIV, MVT::i8, Custom); 229 setOperationAction(ISD::UREM, MVT::i8, Custom); 230 setOperationAction(ISD::SDIV, MVT::i16, Custom); 231 setOperationAction(ISD::UDIV, MVT::i16, Custom); 232 setOperationAction(ISD::UREM, MVT::i16, Custom); 233 setOperationAction(ISD::SDIV, MVT::i32, Custom); 234 setOperationAction(ISD::UDIV, MVT::i32, Custom); 235 setOperationAction(ISD::UREM, MVT::i32, Custom); 236 } 237 238 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 239 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 240 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 241 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 242 243 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 244 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 245 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 246 247 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { 248 if (Subtarget.is64Bit()) { 249 setOperationAction(ISD::ROTL, MVT::i32, Custom); 250 setOperationAction(ISD::ROTR, MVT::i32, Custom); 251 } 252 } else { 253 setOperationAction(ISD::ROTL, XLenVT, Expand); 254 setOperationAction(ISD::ROTR, XLenVT, Expand); 255 } 256 257 if (Subtarget.hasStdExtZbp()) { 258 // Custom lower bswap/bitreverse so we can convert them to GREVI to enable 259 // more combining. 260 setOperationAction(ISD::BITREVERSE, XLenVT, Custom); 261 setOperationAction(ISD::BSWAP, XLenVT, Custom); 262 263 if (Subtarget.is64Bit()) { 264 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 265 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 266 } 267 } else { 268 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 269 // pattern match it directly in isel. 270 setOperationAction(ISD::BSWAP, XLenVT, 271 Subtarget.hasStdExtZbb() ? Legal : Expand); 272 } 273 274 if (Subtarget.hasStdExtZbb()) { 275 setOperationAction(ISD::SMIN, XLenVT, Legal); 276 setOperationAction(ISD::SMAX, XLenVT, Legal); 277 setOperationAction(ISD::UMIN, XLenVT, Legal); 278 setOperationAction(ISD::UMAX, XLenVT, Legal); 279 } else { 280 setOperationAction(ISD::CTTZ, XLenVT, Expand); 281 setOperationAction(ISD::CTLZ, XLenVT, Expand); 282 setOperationAction(ISD::CTPOP, XLenVT, Expand); 283 } 284 285 if (Subtarget.hasStdExtZbt()) { 286 setOperationAction(ISD::FSHL, XLenVT, Custom); 287 setOperationAction(ISD::FSHR, XLenVT, Custom); 288 setOperationAction(ISD::SELECT, XLenVT, Legal); 289 290 if (Subtarget.is64Bit()) { 291 setOperationAction(ISD::FSHL, MVT::i32, Custom); 292 setOperationAction(ISD::FSHR, MVT::i32, Custom); 293 } 294 } else { 295 setOperationAction(ISD::SELECT, XLenVT, Custom); 296 } 297 298 ISD::CondCode FPCCToExpand[] = { 299 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 300 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 301 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 302 303 ISD::NodeType FPOpToExpand[] = { 304 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, 305 ISD::FP_TO_FP16}; 306 307 if (Subtarget.hasStdExtZfh()) 308 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 309 310 if (Subtarget.hasStdExtZfh()) { 311 setOperationAction(ISD::FMINNUM, MVT::f16, Legal); 312 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); 313 for (auto CC : FPCCToExpand) 314 setCondCodeAction(CC, MVT::f16, Expand); 315 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 316 setOperationAction(ISD::SELECT, MVT::f16, Custom); 317 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 318 for (auto Op : FPOpToExpand) 319 setOperationAction(Op, MVT::f16, Expand); 320 } 321 322 if (Subtarget.hasStdExtF()) { 323 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 324 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 325 for (auto CC : FPCCToExpand) 326 setCondCodeAction(CC, MVT::f32, Expand); 327 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 328 setOperationAction(ISD::SELECT, MVT::f32, Custom); 329 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 330 for (auto Op : FPOpToExpand) 331 setOperationAction(Op, MVT::f32, Expand); 332 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 333 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 334 } 335 336 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 337 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 338 339 if (Subtarget.hasStdExtD()) { 340 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 341 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 342 for (auto CC : FPCCToExpand) 343 setCondCodeAction(CC, MVT::f64, Expand); 344 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 345 setOperationAction(ISD::SELECT, MVT::f64, Custom); 346 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 347 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 348 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 349 for (auto Op : FPOpToExpand) 350 setOperationAction(Op, MVT::f64, Expand); 351 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 352 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 353 } 354 355 if (Subtarget.is64Bit()) { 356 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 357 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 358 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 359 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 360 } 361 362 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 363 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 364 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 365 setOperationAction(ISD::JumpTable, XLenVT, Custom); 366 367 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 368 369 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 370 // Unfortunately this can't be determined just from the ISA naming string. 371 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 372 Subtarget.is64Bit() ? Legal : Custom); 373 374 setOperationAction(ISD::TRAP, MVT::Other, Legal); 375 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 376 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 377 378 if (Subtarget.hasStdExtA()) { 379 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 380 setMinCmpXchgSizeInBits(32); 381 } else { 382 setMaxAtomicSizeInBitsSupported(0); 383 } 384 385 setBooleanContents(ZeroOrOneBooleanContent); 386 387 if (Subtarget.hasStdExtV()) { 388 setBooleanVectorContents(ZeroOrOneBooleanContent); 389 390 setOperationAction(ISD::VSCALE, XLenVT, Custom); 391 392 // RVV intrinsics may have illegal operands. 393 // We also need to custom legalize vmv.x.s. 394 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); 395 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); 396 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 397 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); 398 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 399 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 400 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 401 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 402 403 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 404 405 if (!Subtarget.is64Bit()) { 406 // We must custom-lower certain vXi64 operations on RV32 due to the vector 407 // element type being illegal. 408 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); 409 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); 410 411 setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom); 412 setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom); 413 setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom); 414 setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom); 415 setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom); 416 setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom); 417 setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom); 418 setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom); 419 } 420 421 for (MVT VT : BoolVecVTs) { 422 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 423 424 // Mask VTs are custom-expanded into a series of standard nodes 425 setOperationAction(ISD::TRUNCATE, VT, Custom); 426 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 427 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 428 429 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 430 } 431 432 for (MVT VT : IntVecVTs) { 433 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 434 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); 435 436 setOperationAction(ISD::SMIN, VT, Legal); 437 setOperationAction(ISD::SMAX, VT, Legal); 438 setOperationAction(ISD::UMIN, VT, Legal); 439 setOperationAction(ISD::UMAX, VT, Legal); 440 441 setOperationAction(ISD::ROTL, VT, Expand); 442 setOperationAction(ISD::ROTR, VT, Expand); 443 444 // Custom-lower extensions and truncations from/to mask types. 445 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 446 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 447 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 448 449 // RVV has native int->float & float->int conversions where the 450 // element type sizes are within one power-of-two of each other. Any 451 // wider distances between type sizes have to be lowered as sequences 452 // which progressively narrow the gap in stages. 453 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 454 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 455 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 456 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 457 458 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" 459 // nodes which truncate by one power of two at a time. 460 setOperationAction(ISD::TRUNCATE, VT, Custom); 461 462 // Custom-lower insert/extract operations to simplify patterns. 463 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 464 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 465 466 // Custom-lower reduction operations to set up the corresponding custom 467 // nodes' operands. 468 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 469 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 470 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 471 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 472 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 473 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 474 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 475 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 476 477 setOperationAction(ISD::MGATHER, VT, Custom); 478 setOperationAction(ISD::MSCATTER, VT, Custom); 479 480 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 481 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 482 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 483 484 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 485 } 486 487 // Expand various CCs to best match the RVV ISA, which natively supports UNE 488 // but no other unordered comparisons, and supports all ordered comparisons 489 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 490 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 491 // and we pattern-match those back to the "original", swapping operands once 492 // more. This way we catch both operations and both "vf" and "fv" forms with 493 // fewer patterns. 494 ISD::CondCode VFPCCToExpand[] = { 495 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 496 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 497 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 498 }; 499 500 // Sets common operation actions on RVV floating-point vector types. 501 const auto SetCommonVFPActions = [&](MVT VT) { 502 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 503 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 504 // sizes are within one power-of-two of each other. Therefore conversions 505 // between vXf16 and vXf64 must be lowered as sequences which convert via 506 // vXf32. 507 setOperationAction(ISD::FP_ROUND, VT, Custom); 508 setOperationAction(ISD::FP_EXTEND, VT, Custom); 509 // Custom-lower insert/extract operations to simplify patterns. 510 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 511 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 512 // Expand various condition codes (explained above). 513 for (auto CC : VFPCCToExpand) 514 setCondCodeAction(CC, VT, Expand); 515 516 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 517 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 518 setOperationAction(ISD::FCOPYSIGN, VT, Legal); 519 520 setOperationAction(ISD::MGATHER, VT, Custom); 521 setOperationAction(ISD::MSCATTER, VT, Custom); 522 523 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 524 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 525 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 526 527 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 528 }; 529 530 if (Subtarget.hasStdExtZfh()) 531 for (MVT VT : F16VecVTs) 532 SetCommonVFPActions(VT); 533 534 if (Subtarget.hasStdExtF()) 535 for (MVT VT : F32VecVTs) 536 SetCommonVFPActions(VT); 537 538 if (Subtarget.hasStdExtD()) 539 for (MVT VT : F64VecVTs) 540 SetCommonVFPActions(VT); 541 542 if (Subtarget.useRVVForFixedLengthVectors()) { 543 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 544 if (!useRVVForFixedLengthVectorVT(VT)) 545 continue; 546 547 // By default everything must be expanded. 548 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 549 setOperationAction(Op, VT, Expand); 550 for (MVT OtherVT : MVT::fixedlen_vector_valuetypes()) 551 setTruncStoreAction(VT, OtherVT, Expand); 552 553 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 554 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 555 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 556 557 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 558 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 559 560 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 561 562 setOperationAction(ISD::LOAD, VT, Custom); 563 setOperationAction(ISD::STORE, VT, Custom); 564 565 setOperationAction(ISD::SETCC, VT, Custom); 566 567 setOperationAction(ISD::TRUNCATE, VT, Custom); 568 569 setOperationAction(ISD::BITCAST, VT, Custom); 570 571 // Operations below are different for between masks and other vectors. 572 if (VT.getVectorElementType() == MVT::i1) { 573 setOperationAction(ISD::AND, VT, Custom); 574 setOperationAction(ISD::OR, VT, Custom); 575 setOperationAction(ISD::XOR, VT, Custom); 576 continue; 577 } 578 579 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 580 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 581 582 setOperationAction(ISD::MLOAD, VT, Custom); 583 setOperationAction(ISD::MSTORE, VT, Custom); 584 setOperationAction(ISD::ADD, VT, Custom); 585 setOperationAction(ISD::MUL, VT, Custom); 586 setOperationAction(ISD::SUB, VT, Custom); 587 setOperationAction(ISD::AND, VT, Custom); 588 setOperationAction(ISD::OR, VT, Custom); 589 setOperationAction(ISD::XOR, VT, Custom); 590 setOperationAction(ISD::SDIV, VT, Custom); 591 setOperationAction(ISD::SREM, VT, Custom); 592 setOperationAction(ISD::UDIV, VT, Custom); 593 setOperationAction(ISD::UREM, VT, Custom); 594 setOperationAction(ISD::SHL, VT, Custom); 595 setOperationAction(ISD::SRA, VT, Custom); 596 setOperationAction(ISD::SRL, VT, Custom); 597 598 setOperationAction(ISD::SMIN, VT, Custom); 599 setOperationAction(ISD::SMAX, VT, Custom); 600 setOperationAction(ISD::UMIN, VT, Custom); 601 setOperationAction(ISD::UMAX, VT, Custom); 602 setOperationAction(ISD::ABS, VT, Custom); 603 604 setOperationAction(ISD::MULHS, VT, Custom); 605 setOperationAction(ISD::MULHU, VT, Custom); 606 607 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 608 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 609 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 610 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 611 612 setOperationAction(ISD::VSELECT, VT, Custom); 613 614 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 615 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 616 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 617 618 // Custom-lower reduction operations to set up the corresponding custom 619 // nodes' operands. 620 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 621 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 622 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 623 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 624 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 625 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 626 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 627 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 628 } 629 630 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { 631 if (!useRVVForFixedLengthVectorVT(VT)) 632 continue; 633 634 // By default everything must be expanded. 635 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 636 setOperationAction(Op, VT, Expand); 637 for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) { 638 setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); 639 setTruncStoreAction(VT, OtherVT, Expand); 640 } 641 642 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 643 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 644 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 645 646 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 647 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 648 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 649 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 650 651 setOperationAction(ISD::LOAD, VT, Custom); 652 setOperationAction(ISD::STORE, VT, Custom); 653 setOperationAction(ISD::MLOAD, VT, Custom); 654 setOperationAction(ISD::MSTORE, VT, Custom); 655 setOperationAction(ISD::FADD, VT, Custom); 656 setOperationAction(ISD::FSUB, VT, Custom); 657 setOperationAction(ISD::FMUL, VT, Custom); 658 setOperationAction(ISD::FDIV, VT, Custom); 659 setOperationAction(ISD::FNEG, VT, Custom); 660 setOperationAction(ISD::FABS, VT, Custom); 661 setOperationAction(ISD::FCOPYSIGN, VT, Custom); 662 setOperationAction(ISD::FSQRT, VT, Custom); 663 setOperationAction(ISD::FMA, VT, Custom); 664 665 setOperationAction(ISD::FP_ROUND, VT, Custom); 666 setOperationAction(ISD::FP_EXTEND, VT, Custom); 667 668 for (auto CC : VFPCCToExpand) 669 setCondCodeAction(CC, VT, Expand); 670 671 setOperationAction(ISD::VSELECT, VT, Custom); 672 673 setOperationAction(ISD::BITCAST, VT, Custom); 674 675 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 676 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 677 } 678 } 679 } 680 681 // Function alignments. 682 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 683 setMinFunctionAlignment(FunctionAlignment); 684 setPrefFunctionAlignment(FunctionAlignment); 685 686 setMinimumJumpTableEntries(5); 687 688 // Jumps are expensive, compared to logic 689 setJumpIsExpensive(); 690 691 // We can use any register for comparisons 692 setHasMultipleConditionRegisters(); 693 694 if (Subtarget.hasStdExtZbp()) { 695 setTargetDAGCombine(ISD::OR); 696 } 697 if (Subtarget.hasStdExtV()) { 698 setTargetDAGCombine(ISD::FCOPYSIGN); 699 setTargetDAGCombine(ISD::MGATHER); 700 setTargetDAGCombine(ISD::MSCATTER); 701 } 702 } 703 704 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 705 LLVMContext &Context, 706 EVT VT) const { 707 if (!VT.isVector()) 708 return getPointerTy(DL); 709 if (Subtarget.hasStdExtV() && 710 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) 711 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 712 return VT.changeVectorElementTypeToInteger(); 713 } 714 715 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 716 const CallInst &I, 717 MachineFunction &MF, 718 unsigned Intrinsic) const { 719 switch (Intrinsic) { 720 default: 721 return false; 722 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 723 case Intrinsic::riscv_masked_atomicrmw_add_i32: 724 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 725 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 726 case Intrinsic::riscv_masked_atomicrmw_max_i32: 727 case Intrinsic::riscv_masked_atomicrmw_min_i32: 728 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 729 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 730 case Intrinsic::riscv_masked_cmpxchg_i32: 731 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 732 Info.opc = ISD::INTRINSIC_W_CHAIN; 733 Info.memVT = MVT::getVT(PtrTy->getElementType()); 734 Info.ptrVal = I.getArgOperand(0); 735 Info.offset = 0; 736 Info.align = Align(4); 737 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 738 MachineMemOperand::MOVolatile; 739 return true; 740 } 741 } 742 743 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 744 const AddrMode &AM, Type *Ty, 745 unsigned AS, 746 Instruction *I) const { 747 // No global is ever allowed as a base. 748 if (AM.BaseGV) 749 return false; 750 751 // Require a 12-bit signed offset. 752 if (!isInt<12>(AM.BaseOffs)) 753 return false; 754 755 switch (AM.Scale) { 756 case 0: // "r+i" or just "i", depending on HasBaseReg. 757 break; 758 case 1: 759 if (!AM.HasBaseReg) // allow "r+i". 760 break; 761 return false; // disallow "r+r" or "r+r+i". 762 default: 763 return false; 764 } 765 766 return true; 767 } 768 769 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 770 return isInt<12>(Imm); 771 } 772 773 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 774 return isInt<12>(Imm); 775 } 776 777 // On RV32, 64-bit integers are split into their high and low parts and held 778 // in two different registers, so the trunc is free since the low register can 779 // just be used. 780 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 781 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 782 return false; 783 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 784 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 785 return (SrcBits == 64 && DestBits == 32); 786 } 787 788 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 789 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 790 !SrcVT.isInteger() || !DstVT.isInteger()) 791 return false; 792 unsigned SrcBits = SrcVT.getSizeInBits(); 793 unsigned DestBits = DstVT.getSizeInBits(); 794 return (SrcBits == 64 && DestBits == 32); 795 } 796 797 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 798 // Zexts are free if they can be combined with a load. 799 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 800 EVT MemVT = LD->getMemoryVT(); 801 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 802 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 803 (LD->getExtensionType() == ISD::NON_EXTLOAD || 804 LD->getExtensionType() == ISD::ZEXTLOAD)) 805 return true; 806 } 807 808 return TargetLowering::isZExtFree(Val, VT2); 809 } 810 811 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 812 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 813 } 814 815 bool RISCVTargetLowering::isCheapToSpeculateCttz() const { 816 return Subtarget.hasStdExtZbb(); 817 } 818 819 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { 820 return Subtarget.hasStdExtZbb(); 821 } 822 823 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 824 bool ForCodeSize) const { 825 if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) 826 return false; 827 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 828 return false; 829 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 830 return false; 831 if (Imm.isNegZero()) 832 return false; 833 return Imm.isZero(); 834 } 835 836 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 837 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || 838 (VT == MVT::f32 && Subtarget.hasStdExtF()) || 839 (VT == MVT::f64 && Subtarget.hasStdExtD()); 840 } 841 842 // Changes the condition code and swaps operands if necessary, so the SetCC 843 // operation matches one of the comparisons supported directly by branches 844 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare 845 // with 1/-1. 846 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, 847 ISD::CondCode &CC, SelectionDAG &DAG) { 848 // Convert X > -1 to X >= 0. 849 if (CC == ISD::SETGT && isAllOnesConstant(RHS)) { 850 RHS = DAG.getConstant(0, DL, RHS.getValueType()); 851 CC = ISD::SETGE; 852 return; 853 } 854 // Convert X < 1 to 0 >= X. 855 if (CC == ISD::SETLT && isOneConstant(RHS)) { 856 RHS = LHS; 857 LHS = DAG.getConstant(0, DL, RHS.getValueType()); 858 CC = ISD::SETGE; 859 return; 860 } 861 862 switch (CC) { 863 default: 864 break; 865 case ISD::SETGT: 866 case ISD::SETLE: 867 case ISD::SETUGT: 868 case ISD::SETULE: 869 CC = ISD::getSetCCSwappedOperands(CC); 870 std::swap(LHS, RHS); 871 break; 872 } 873 } 874 875 // Return the RISC-V branch opcode that matches the given DAG integer 876 // condition code. The CondCode must be one of those supported by the RISC-V 877 // ISA (see translateSetCCForBranch). 878 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 879 switch (CC) { 880 default: 881 llvm_unreachable("Unsupported CondCode"); 882 case ISD::SETEQ: 883 return RISCV::BEQ; 884 case ISD::SETNE: 885 return RISCV::BNE; 886 case ISD::SETLT: 887 return RISCV::BLT; 888 case ISD::SETGE: 889 return RISCV::BGE; 890 case ISD::SETULT: 891 return RISCV::BLTU; 892 case ISD::SETUGE: 893 return RISCV::BGEU; 894 } 895 } 896 897 RISCVVLMUL RISCVTargetLowering::getLMUL(MVT VT) { 898 assert(VT.isScalableVector() && "Expecting a scalable vector type"); 899 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); 900 if (VT.getVectorElementType() == MVT::i1) 901 KnownSize *= 8; 902 903 switch (KnownSize) { 904 default: 905 llvm_unreachable("Invalid LMUL."); 906 case 8: 907 return RISCVVLMUL::LMUL_F8; 908 case 16: 909 return RISCVVLMUL::LMUL_F4; 910 case 32: 911 return RISCVVLMUL::LMUL_F2; 912 case 64: 913 return RISCVVLMUL::LMUL_1; 914 case 128: 915 return RISCVVLMUL::LMUL_2; 916 case 256: 917 return RISCVVLMUL::LMUL_4; 918 case 512: 919 return RISCVVLMUL::LMUL_8; 920 } 921 } 922 923 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVVLMUL LMul) { 924 switch (LMul) { 925 default: 926 llvm_unreachable("Invalid LMUL."); 927 case RISCVVLMUL::LMUL_F8: 928 case RISCVVLMUL::LMUL_F4: 929 case RISCVVLMUL::LMUL_F2: 930 case RISCVVLMUL::LMUL_1: 931 return RISCV::VRRegClassID; 932 case RISCVVLMUL::LMUL_2: 933 return RISCV::VRM2RegClassID; 934 case RISCVVLMUL::LMUL_4: 935 return RISCV::VRM4RegClassID; 936 case RISCVVLMUL::LMUL_8: 937 return RISCV::VRM8RegClassID; 938 } 939 } 940 941 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { 942 RISCVVLMUL LMUL = getLMUL(VT); 943 if (LMUL == RISCVVLMUL::LMUL_F8 || LMUL == RISCVVLMUL::LMUL_F4 || 944 LMUL == RISCVVLMUL::LMUL_F2 || LMUL == RISCVVLMUL::LMUL_1) { 945 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 946 "Unexpected subreg numbering"); 947 return RISCV::sub_vrm1_0 + Index; 948 } 949 if (LMUL == RISCVVLMUL::LMUL_2) { 950 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 951 "Unexpected subreg numbering"); 952 return RISCV::sub_vrm2_0 + Index; 953 } 954 if (LMUL == RISCVVLMUL::LMUL_4) { 955 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 956 "Unexpected subreg numbering"); 957 return RISCV::sub_vrm4_0 + Index; 958 } 959 llvm_unreachable("Invalid vector type."); 960 } 961 962 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { 963 if (VT.getVectorElementType() == MVT::i1) 964 return RISCV::VRRegClassID; 965 return getRegClassIDForLMUL(getLMUL(VT)); 966 } 967 968 // Attempt to decompose a subvector insert/extract between VecVT and 969 // SubVecVT via subregister indices. Returns the subregister index that 970 // can perform the subvector insert/extract with the given element index, as 971 // well as the index corresponding to any leftover subvectors that must be 972 // further inserted/extracted within the register class for SubVecVT. 973 std::pair<unsigned, unsigned> 974 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 975 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, 976 const RISCVRegisterInfo *TRI) { 977 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && 978 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && 979 RISCV::VRM2RegClassID > RISCV::VRRegClassID), 980 "Register classes not ordered"); 981 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT); 982 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT); 983 // Try to compose a subregister index that takes us from the incoming 984 // LMUL>1 register class down to the outgoing one. At each step we half 985 // the LMUL: 986 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 987 // Note that this is not guaranteed to find a subregister index, such as 988 // when we are extracting from one VR type to another. 989 unsigned SubRegIdx = RISCV::NoSubRegister; 990 for (const unsigned RCID : 991 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) 992 if (VecRegClassID > RCID && SubRegClassID <= RCID) { 993 VecVT = VecVT.getHalfNumVectorElementsVT(); 994 bool IsHi = 995 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); 996 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, 997 getSubregIndexByMVT(VecVT, IsHi)); 998 if (IsHi) 999 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); 1000 } 1001 return {SubRegIdx, InsertExtractIdx}; 1002 } 1003 1004 // Return the largest legal scalable vector type that matches VT's element type. 1005 MVT RISCVTargetLowering::getContainerForFixedLengthVector( 1006 const TargetLowering &TLI, MVT VT, const RISCVSubtarget &Subtarget) { 1007 assert(VT.isFixedLengthVector() && TLI.isTypeLegal(VT) && 1008 "Expected legal fixed length vector!"); 1009 1010 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 1011 assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!"); 1012 1013 MVT EltVT = VT.getVectorElementType(); 1014 switch (EltVT.SimpleTy) { 1015 default: 1016 llvm_unreachable("unexpected element type for RVV container"); 1017 case MVT::i1: { 1018 // Masks are calculated assuming 8-bit elements since that's when we need 1019 // the most elements. 1020 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8; 1021 return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock); 1022 } 1023 case MVT::i8: 1024 case MVT::i16: 1025 case MVT::i32: 1026 case MVT::i64: 1027 case MVT::f16: 1028 case MVT::f32: 1029 case MVT::f64: { 1030 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits(); 1031 return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock); 1032 } 1033 } 1034 } 1035 1036 MVT RISCVTargetLowering::getContainerForFixedLengthVector( 1037 SelectionDAG &DAG, MVT VT, const RISCVSubtarget &Subtarget) { 1038 return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT, 1039 Subtarget); 1040 } 1041 1042 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const { 1043 return getContainerForFixedLengthVector(*this, VT, getSubtarget()); 1044 } 1045 1046 // Grow V to consume an entire RVV register. 1047 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 1048 const RISCVSubtarget &Subtarget) { 1049 assert(VT.isScalableVector() && 1050 "Expected to convert into a scalable vector!"); 1051 assert(V.getValueType().isFixedLengthVector() && 1052 "Expected a fixed length vector operand!"); 1053 SDLoc DL(V); 1054 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1055 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); 1056 } 1057 1058 // Shrink V so it's just big enough to maintain a VT's worth of data. 1059 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 1060 const RISCVSubtarget &Subtarget) { 1061 assert(VT.isFixedLengthVector() && 1062 "Expected to convert into a fixed length vector!"); 1063 assert(V.getValueType().isScalableVector() && 1064 "Expected a scalable vector operand!"); 1065 SDLoc DL(V); 1066 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1067 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); 1068 } 1069 1070 // Gets the two common "VL" operands: an all-ones mask and the vector length. 1071 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is 1072 // the vector type that it is contained in. 1073 static std::pair<SDValue, SDValue> 1074 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, 1075 const RISCVSubtarget &Subtarget) { 1076 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 1077 MVT XLenVT = Subtarget.getXLenVT(); 1078 SDValue VL = VecVT.isFixedLengthVector() 1079 ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT) 1080 : DAG.getRegister(RISCV::X0, XLenVT); 1081 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 1082 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 1083 return {Mask, VL}; 1084 } 1085 1086 // As above but assuming the given type is a scalable vector type. 1087 static std::pair<SDValue, SDValue> 1088 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG, 1089 const RISCVSubtarget &Subtarget) { 1090 assert(VecVT.isScalableVector() && "Expecting a scalable vector"); 1091 return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget); 1092 } 1093 1094 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few 1095 // of either is (currently) supported. This can get us into an infinite loop 1096 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR 1097 // as a ..., etc. 1098 // Until either (or both) of these can reliably lower any node, reporting that 1099 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks 1100 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack, 1101 // which is not desirable. 1102 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( 1103 EVT VT, unsigned DefinedValues) const { 1104 return false; 1105 } 1106 1107 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { 1108 // Only splats are currently supported. 1109 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) 1110 return true; 1111 1112 return false; 1113 } 1114 1115 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 1116 const RISCVSubtarget &Subtarget) { 1117 MVT VT = Op.getSimpleValueType(); 1118 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 1119 1120 MVT ContainerVT = 1121 RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); 1122 1123 SDLoc DL(Op); 1124 SDValue Mask, VL; 1125 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1126 1127 if (VT.getVectorElementType() == MVT::i1) { 1128 if (ISD::isBuildVectorAllZeros(Op.getNode())) { 1129 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); 1130 return convertFromScalableVector(VT, VMClr, DAG, Subtarget); 1131 } 1132 1133 if (ISD::isBuildVectorAllOnes(Op.getNode())) { 1134 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 1135 return convertFromScalableVector(VT, VMSet, DAG, Subtarget); 1136 } 1137 1138 return SDValue(); 1139 } 1140 1141 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 1142 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 1143 : RISCVISD::VMV_V_X_VL; 1144 Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL); 1145 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 1146 } 1147 1148 unsigned NumElts = Op.getNumOperands(); 1149 1150 // Try and match an index sequence, which we can lower directly to the vid 1151 // instruction. An all-undef vector is matched by getSplatValue, above. 1152 if (VT.isInteger()) { 1153 bool IsVID = true; 1154 for (unsigned I = 0; I < NumElts && IsVID; I++) 1155 IsVID &= Op.getOperand(I).isUndef() || 1156 (isa<ConstantSDNode>(Op.getOperand(I)) && 1157 Op.getConstantOperandVal(I) == I); 1158 1159 if (IsVID) { 1160 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); 1161 return convertFromScalableVector(VT, VID, DAG, Subtarget); 1162 } 1163 } 1164 1165 // Try and optimize BUILD_VECTORs with "dominant values" - these are values 1166 // which constitute a large proportion of the elements. In such cases we can 1167 // splat a vector with the dominant element and make up the shortfall with 1168 // INSERT_VECTOR_ELTs. 1169 // Note that this includes vectors of 2 elements by association. The 1170 // upper-most element is the "dominant" one, allowing us to use a splat to 1171 // "insert" the upper element, and an insert of the lower element at position 1172 // 0, which improves codegen. 1173 SDValue DominantValue; 1174 DenseMap<SDValue, unsigned> ValueCounts; 1175 // Use a fairly conservative threshold. A future optimization could be to use 1176 // multiple vmerge.vi/vmerge.vx instructions on "partially-dominant" 1177 // elements with more relaxed thresholds. 1178 unsigned NumUndefElts = 1179 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); 1180 unsigned NumDefElts = NumElts - NumUndefElts; 1181 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; 1182 1183 for (SDValue V : Op->op_values()) { 1184 if (V.isUndef()) 1185 continue; 1186 1187 ValueCounts.insert(std::make_pair(V, 0)); 1188 unsigned &Count = ValueCounts[V]; 1189 1190 // Is this value dominant? 1191 if (++Count > DominantValueCountThreshold) 1192 DominantValue = V; 1193 } 1194 1195 // Don't perform this optimization when optimizing for size, since 1196 // materializing elements and inserting them tends to cause code bloat. 1197 if (DominantValue && !DAG.shouldOptForSize()) { 1198 unsigned Opc = 1199 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; 1200 SDValue Vec = DAG.getNode(Opc, DL, ContainerVT, DominantValue, VL); 1201 1202 if (ValueCounts.size() != 1) { 1203 MVT XLenVT = Subtarget.getXLenVT(); 1204 for (unsigned I = 0; I < NumElts; ++I) { 1205 if (!Op.getOperand(I).isUndef() && Op.getOperand(I) != DominantValue) 1206 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, Vec, 1207 Op.getOperand(I), DAG.getConstant(I, DL, XLenVT)); 1208 } 1209 } 1210 1211 return convertFromScalableVector(VT, Vec, DAG, Subtarget); 1212 } 1213 1214 return SDValue(); 1215 } 1216 1217 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, 1218 const RISCVSubtarget &Subtarget) { 1219 SDValue V1 = Op.getOperand(0); 1220 SDLoc DL(Op); 1221 MVT VT = Op.getSimpleValueType(); 1222 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 1223 1224 if (SVN->isSplat()) { 1225 int Lane = SVN->getSplatIndex(); 1226 if (Lane >= 0) { 1227 MVT ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( 1228 DAG, VT, Subtarget); 1229 1230 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 1231 assert(Lane < (int)VT.getVectorNumElements() && "Unexpected lane!"); 1232 1233 SDValue Mask, VL; 1234 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1235 MVT XLenVT = Subtarget.getXLenVT(); 1236 SDValue Gather = 1237 DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1, 1238 DAG.getConstant(Lane, DL, XLenVT), Mask, VL); 1239 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 1240 } 1241 } 1242 1243 return SDValue(); 1244 } 1245 1246 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT, 1247 SDLoc DL, SelectionDAG &DAG, 1248 const RISCVSubtarget &Subtarget) { 1249 if (VT.isScalableVector()) 1250 return DAG.getFPExtendOrRound(Op, DL, VT); 1251 assert(VT.isFixedLengthVector() && 1252 "Unexpected value type for RVV FP extend/round lowering"); 1253 SDValue Mask, VL; 1254 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1255 unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType()) 1256 ? RISCVISD::FP_EXTEND_VL 1257 : RISCVISD::FP_ROUND_VL; 1258 return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL); 1259 } 1260 1261 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 1262 SelectionDAG &DAG) const { 1263 switch (Op.getOpcode()) { 1264 default: 1265 report_fatal_error("unimplemented operand"); 1266 case ISD::GlobalAddress: 1267 return lowerGlobalAddress(Op, DAG); 1268 case ISD::BlockAddress: 1269 return lowerBlockAddress(Op, DAG); 1270 case ISD::ConstantPool: 1271 return lowerConstantPool(Op, DAG); 1272 case ISD::JumpTable: 1273 return lowerJumpTable(Op, DAG); 1274 case ISD::GlobalTLSAddress: 1275 return lowerGlobalTLSAddress(Op, DAG); 1276 case ISD::SELECT: 1277 return lowerSELECT(Op, DAG); 1278 case ISD::BRCOND: 1279 return lowerBRCOND(Op, DAG); 1280 case ISD::VASTART: 1281 return lowerVASTART(Op, DAG); 1282 case ISD::FRAMEADDR: 1283 return lowerFRAMEADDR(Op, DAG); 1284 case ISD::RETURNADDR: 1285 return lowerRETURNADDR(Op, DAG); 1286 case ISD::SHL_PARTS: 1287 return lowerShiftLeftParts(Op, DAG); 1288 case ISD::SRA_PARTS: 1289 return lowerShiftRightParts(Op, DAG, true); 1290 case ISD::SRL_PARTS: 1291 return lowerShiftRightParts(Op, DAG, false); 1292 case ISD::BITCAST: { 1293 SDValue Op0 = Op.getOperand(0); 1294 // We can handle fixed length vector bitcasts with a simple replacement 1295 // in isel. 1296 if (Op.getValueType().isFixedLengthVector()) { 1297 if (Op0.getValueType().isFixedLengthVector()) 1298 return Op; 1299 return SDValue(); 1300 } 1301 assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) || 1302 Subtarget.hasStdExtZfh()) && 1303 "Unexpected custom legalisation"); 1304 SDLoc DL(Op); 1305 if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) { 1306 if (Op0.getValueType() != MVT::i16) 1307 return SDValue(); 1308 SDValue NewOp0 = 1309 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0); 1310 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 1311 return FPConv; 1312 } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && 1313 Subtarget.hasStdExtF()) { 1314 if (Op0.getValueType() != MVT::i32) 1315 return SDValue(); 1316 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 1317 SDValue FPConv = 1318 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 1319 return FPConv; 1320 } 1321 return SDValue(); 1322 } 1323 case ISD::INTRINSIC_WO_CHAIN: 1324 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 1325 case ISD::INTRINSIC_W_CHAIN: 1326 return LowerINTRINSIC_W_CHAIN(Op, DAG); 1327 case ISD::BSWAP: 1328 case ISD::BITREVERSE: { 1329 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. 1330 assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 1331 MVT VT = Op.getSimpleValueType(); 1332 SDLoc DL(Op); 1333 // Start with the maximum immediate value which is the bitwidth - 1. 1334 unsigned Imm = VT.getSizeInBits() - 1; 1335 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. 1336 if (Op.getOpcode() == ISD::BSWAP) 1337 Imm &= ~0x7U; 1338 return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0), 1339 DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT())); 1340 } 1341 case ISD::FSHL: 1342 case ISD::FSHR: { 1343 MVT VT = Op.getSimpleValueType(); 1344 assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization"); 1345 SDLoc DL(Op); 1346 // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only 1347 // use log(XLen) bits. Mask the shift amount accordingly. 1348 unsigned ShAmtWidth = Subtarget.getXLen() - 1; 1349 SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2), 1350 DAG.getConstant(ShAmtWidth, DL, VT)); 1351 unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR; 1352 return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt); 1353 } 1354 case ISD::TRUNCATE: { 1355 SDLoc DL(Op); 1356 MVT VT = Op.getSimpleValueType(); 1357 // Only custom-lower vector truncates 1358 if (!VT.isVector()) 1359 return Op; 1360 1361 // Truncates to mask types are handled differently 1362 if (VT.getVectorElementType() == MVT::i1) 1363 return lowerVectorMaskTrunc(Op, DAG); 1364 1365 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 1366 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which 1367 // truncate by one power of two at a time. 1368 MVT DstEltVT = VT.getVectorElementType(); 1369 1370 SDValue Src = Op.getOperand(0); 1371 MVT SrcVT = Src.getSimpleValueType(); 1372 MVT SrcEltVT = SrcVT.getVectorElementType(); 1373 1374 assert(DstEltVT.bitsLT(SrcEltVT) && 1375 isPowerOf2_64(DstEltVT.getSizeInBits()) && 1376 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 1377 "Unexpected vector truncate lowering"); 1378 1379 MVT ContainerVT = SrcVT; 1380 if (SrcVT.isFixedLengthVector()) { 1381 ContainerVT = getContainerForFixedLengthVector(SrcVT); 1382 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 1383 } 1384 1385 SDValue Result = Src; 1386 SDValue Mask, VL; 1387 std::tie(Mask, VL) = 1388 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 1389 LLVMContext &Context = *DAG.getContext(); 1390 const ElementCount Count = ContainerVT.getVectorElementCount(); 1391 do { 1392 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); 1393 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 1394 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, 1395 Mask, VL); 1396 } while (SrcEltVT != DstEltVT); 1397 1398 if (SrcVT.isFixedLengthVector()) 1399 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 1400 1401 return Result; 1402 } 1403 case ISD::ANY_EXTEND: 1404 case ISD::ZERO_EXTEND: 1405 if (Op.getOperand(0).getValueType().isVector() && 1406 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 1407 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 1408 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL); 1409 case ISD::SIGN_EXTEND: 1410 if (Op.getOperand(0).getValueType().isVector() && 1411 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 1412 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 1413 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); 1414 case ISD::SPLAT_VECTOR_PARTS: 1415 return lowerSPLAT_VECTOR_PARTS(Op, DAG); 1416 case ISD::INSERT_VECTOR_ELT: 1417 return lowerINSERT_VECTOR_ELT(Op, DAG); 1418 case ISD::EXTRACT_VECTOR_ELT: 1419 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 1420 case ISD::VSCALE: { 1421 MVT VT = Op.getSimpleValueType(); 1422 SDLoc DL(Op); 1423 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); 1424 // We define our scalable vector types for lmul=1 to use a 64 bit known 1425 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 1426 // vscale as VLENB / 8. 1427 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, 1428 DAG.getConstant(3, DL, VT)); 1429 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); 1430 } 1431 case ISD::FP_EXTEND: { 1432 // RVV can only do fp_extend to types double the size as the source. We 1433 // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going 1434 // via f32. 1435 SDLoc DL(Op); 1436 MVT VT = Op.getSimpleValueType(); 1437 SDValue Src = Op.getOperand(0); 1438 MVT SrcVT = Src.getSimpleValueType(); 1439 1440 // Prepare any fixed-length vector operands. 1441 MVT ContainerVT = VT; 1442 if (SrcVT.isFixedLengthVector()) { 1443 ContainerVT = getContainerForFixedLengthVector(VT); 1444 MVT SrcContainerVT = 1445 ContainerVT.changeVectorElementType(SrcVT.getVectorElementType()); 1446 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1447 } 1448 1449 if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 || 1450 SrcVT.getVectorElementType() != MVT::f16) { 1451 // For scalable vectors, we only need to close the gap between 1452 // vXf16->vXf64. 1453 if (!VT.isFixedLengthVector()) 1454 return Op; 1455 // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version. 1456 Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); 1457 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1458 } 1459 1460 MVT InterVT = VT.changeVectorElementType(MVT::f32); 1461 MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32); 1462 SDValue IntermediateExtend = getRVVFPExtendOrRound( 1463 Src, InterVT, InterContainerVT, DL, DAG, Subtarget); 1464 1465 SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT, 1466 DL, DAG, Subtarget); 1467 if (VT.isFixedLengthVector()) 1468 return convertFromScalableVector(VT, Extend, DAG, Subtarget); 1469 return Extend; 1470 } 1471 case ISD::FP_ROUND: { 1472 // RVV can only do fp_round to types half the size as the source. We 1473 // custom-lower f64->f16 rounds via RVV's round-to-odd float 1474 // conversion instruction. 1475 SDLoc DL(Op); 1476 MVT VT = Op.getSimpleValueType(); 1477 SDValue Src = Op.getOperand(0); 1478 MVT SrcVT = Src.getSimpleValueType(); 1479 1480 // Prepare any fixed-length vector operands. 1481 MVT ContainerVT = VT; 1482 if (VT.isFixedLengthVector()) { 1483 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 1484 ContainerVT = 1485 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 1486 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1487 } 1488 1489 if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || 1490 SrcVT.getVectorElementType() != MVT::f64) { 1491 // For scalable vectors, we only need to close the gap between 1492 // vXf64<->vXf16. 1493 if (!VT.isFixedLengthVector()) 1494 return Op; 1495 // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version. 1496 Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); 1497 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1498 } 1499 1500 SDValue Mask, VL; 1501 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1502 1503 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); 1504 SDValue IntermediateRound = 1505 DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL); 1506 SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT, 1507 DL, DAG, Subtarget); 1508 1509 if (VT.isFixedLengthVector()) 1510 return convertFromScalableVector(VT, Round, DAG, Subtarget); 1511 return Round; 1512 } 1513 case ISD::FP_TO_SINT: 1514 case ISD::FP_TO_UINT: 1515 case ISD::SINT_TO_FP: 1516 case ISD::UINT_TO_FP: { 1517 // RVV can only do fp<->int conversions to types half/double the size as 1518 // the source. We custom-lower any conversions that do two hops into 1519 // sequences. 1520 MVT VT = Op.getSimpleValueType(); 1521 if (!VT.isVector()) 1522 return Op; 1523 SDLoc DL(Op); 1524 SDValue Src = Op.getOperand(0); 1525 MVT EltVT = VT.getVectorElementType(); 1526 MVT SrcVT = Src.getSimpleValueType(); 1527 MVT SrcEltVT = SrcVT.getVectorElementType(); 1528 unsigned EltSize = EltVT.getSizeInBits(); 1529 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 1530 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 1531 "Unexpected vector element types"); 1532 1533 bool IsInt2FP = SrcEltVT.isInteger(); 1534 // Widening conversions 1535 if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) { 1536 if (IsInt2FP) { 1537 // Do a regular integer sign/zero extension then convert to float. 1538 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()), 1539 VT.getVectorElementCount()); 1540 unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP 1541 ? ISD::ZERO_EXTEND 1542 : ISD::SIGN_EXTEND; 1543 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 1544 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 1545 } 1546 // FP2Int 1547 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 1548 // Do one doubling fp_extend then complete the operation by converting 1549 // to int. 1550 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1551 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 1552 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 1553 } 1554 1555 // Narrowing conversions 1556 if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) { 1557 if (IsInt2FP) { 1558 // One narrowing int_to_fp, then an fp_round. 1559 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 1560 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1561 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 1562 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 1563 } 1564 // FP2Int 1565 // One narrowing fp_to_int, then truncate the integer. If the float isn't 1566 // representable by the integer, the result is poison. 1567 MVT IVecVT = 1568 MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2), 1569 VT.getVectorElementCount()); 1570 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 1571 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 1572 } 1573 1574 // Scalable vectors can exit here. Patterns will handle equally-sized 1575 // conversions halving/doubling ones. 1576 if (!VT.isFixedLengthVector()) 1577 return Op; 1578 1579 // For fixed-length vectors we lower to a custom "VL" node. 1580 unsigned RVVOpc = 0; 1581 switch (Op.getOpcode()) { 1582 default: 1583 llvm_unreachable("Impossible opcode"); 1584 case ISD::FP_TO_SINT: 1585 RVVOpc = RISCVISD::FP_TO_SINT_VL; 1586 break; 1587 case ISD::FP_TO_UINT: 1588 RVVOpc = RISCVISD::FP_TO_UINT_VL; 1589 break; 1590 case ISD::SINT_TO_FP: 1591 RVVOpc = RISCVISD::SINT_TO_FP_VL; 1592 break; 1593 case ISD::UINT_TO_FP: 1594 RVVOpc = RISCVISD::UINT_TO_FP_VL; 1595 break; 1596 } 1597 1598 MVT ContainerVT, SrcContainerVT; 1599 // Derive the reference container type from the larger vector type. 1600 if (SrcEltSize > EltSize) { 1601 SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 1602 ContainerVT = 1603 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 1604 } else { 1605 ContainerVT = getContainerForFixedLengthVector(VT); 1606 SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT); 1607 } 1608 1609 SDValue Mask, VL; 1610 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1611 1612 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1613 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL); 1614 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1615 } 1616 case ISD::VECREDUCE_ADD: 1617 case ISD::VECREDUCE_UMAX: 1618 case ISD::VECREDUCE_SMAX: 1619 case ISD::VECREDUCE_UMIN: 1620 case ISD::VECREDUCE_SMIN: 1621 case ISD::VECREDUCE_AND: 1622 case ISD::VECREDUCE_OR: 1623 case ISD::VECREDUCE_XOR: 1624 return lowerVECREDUCE(Op, DAG); 1625 case ISD::VECREDUCE_FADD: 1626 case ISD::VECREDUCE_SEQ_FADD: 1627 return lowerFPVECREDUCE(Op, DAG); 1628 case ISD::INSERT_SUBVECTOR: 1629 return lowerINSERT_SUBVECTOR(Op, DAG); 1630 case ISD::EXTRACT_SUBVECTOR: 1631 return lowerEXTRACT_SUBVECTOR(Op, DAG); 1632 case ISD::VECTOR_REVERSE: 1633 return lowerVECTOR_REVERSE(Op, DAG); 1634 case ISD::BUILD_VECTOR: 1635 return lowerBUILD_VECTOR(Op, DAG, Subtarget); 1636 case ISD::VECTOR_SHUFFLE: 1637 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); 1638 case ISD::CONCAT_VECTORS: { 1639 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is 1640 // better than going through the stack, as the default expansion does. 1641 SDLoc DL(Op); 1642 MVT VT = Op.getSimpleValueType(); 1643 unsigned NumOpElts = 1644 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements(); 1645 SDValue Vec = DAG.getUNDEF(VT); 1646 for (const auto &OpIdx : enumerate(Op->ops())) 1647 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(), 1648 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL)); 1649 return Vec; 1650 } 1651 case ISD::LOAD: 1652 return lowerFixedLengthVectorLoadToRVV(Op, DAG); 1653 case ISD::STORE: 1654 return lowerFixedLengthVectorStoreToRVV(Op, DAG); 1655 case ISD::MLOAD: 1656 return lowerFixedLengthVectorMaskedLoadToRVV(Op, DAG); 1657 case ISD::MSTORE: 1658 return lowerFixedLengthVectorMaskedStoreToRVV(Op, DAG); 1659 case ISD::SETCC: 1660 return lowerFixedLengthVectorSetccToRVV(Op, DAG); 1661 case ISD::ADD: 1662 return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL); 1663 case ISD::SUB: 1664 return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL); 1665 case ISD::MUL: 1666 return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL); 1667 case ISD::MULHS: 1668 return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL); 1669 case ISD::MULHU: 1670 return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL); 1671 case ISD::AND: 1672 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL, 1673 RISCVISD::AND_VL); 1674 case ISD::OR: 1675 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL, 1676 RISCVISD::OR_VL); 1677 case ISD::XOR: 1678 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL, 1679 RISCVISD::XOR_VL); 1680 case ISD::SDIV: 1681 return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL); 1682 case ISD::SREM: 1683 return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL); 1684 case ISD::UDIV: 1685 return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL); 1686 case ISD::UREM: 1687 return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL); 1688 case ISD::SHL: 1689 return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL); 1690 case ISD::SRA: 1691 return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL); 1692 case ISD::SRL: 1693 return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL); 1694 case ISD::FADD: 1695 return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL); 1696 case ISD::FSUB: 1697 return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL); 1698 case ISD::FMUL: 1699 return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL); 1700 case ISD::FDIV: 1701 return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL); 1702 case ISD::FNEG: 1703 return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL); 1704 case ISD::FABS: 1705 return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL); 1706 case ISD::FSQRT: 1707 return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL); 1708 case ISD::FMA: 1709 return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL); 1710 case ISD::SMIN: 1711 return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL); 1712 case ISD::SMAX: 1713 return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL); 1714 case ISD::UMIN: 1715 return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL); 1716 case ISD::UMAX: 1717 return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL); 1718 case ISD::ABS: 1719 return lowerABS(Op, DAG); 1720 case ISD::VSELECT: 1721 return lowerFixedLengthVectorSelectToRVV(Op, DAG); 1722 case ISD::FCOPYSIGN: 1723 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); 1724 case ISD::MGATHER: 1725 case ISD::MSCATTER: 1726 return lowerMGATHERMSCATTER(Op, DAG); 1727 } 1728 } 1729 1730 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 1731 SelectionDAG &DAG, unsigned Flags) { 1732 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 1733 } 1734 1735 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 1736 SelectionDAG &DAG, unsigned Flags) { 1737 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 1738 Flags); 1739 } 1740 1741 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 1742 SelectionDAG &DAG, unsigned Flags) { 1743 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 1744 N->getOffset(), Flags); 1745 } 1746 1747 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 1748 SelectionDAG &DAG, unsigned Flags) { 1749 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 1750 } 1751 1752 template <class NodeTy> 1753 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 1754 bool IsLocal) const { 1755 SDLoc DL(N); 1756 EVT Ty = getPointerTy(DAG.getDataLayout()); 1757 1758 if (isPositionIndependent()) { 1759 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1760 if (IsLocal) 1761 // Use PC-relative addressing to access the symbol. This generates the 1762 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 1763 // %pcrel_lo(auipc)). 1764 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1765 1766 // Use PC-relative addressing to access the GOT for this symbol, then load 1767 // the address from the GOT. This generates the pattern (PseudoLA sym), 1768 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 1769 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 1770 } 1771 1772 switch (getTargetMachine().getCodeModel()) { 1773 default: 1774 report_fatal_error("Unsupported code model for lowering"); 1775 case CodeModel::Small: { 1776 // Generate a sequence for accessing addresses within the first 2 GiB of 1777 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 1778 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 1779 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 1780 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1781 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 1782 } 1783 case CodeModel::Medium: { 1784 // Generate a sequence for accessing addresses within any 2GiB range within 1785 // the address space. This generates the pattern (PseudoLLA sym), which 1786 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 1787 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1788 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1789 } 1790 } 1791 } 1792 1793 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 1794 SelectionDAG &DAG) const { 1795 SDLoc DL(Op); 1796 EVT Ty = Op.getValueType(); 1797 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1798 int64_t Offset = N->getOffset(); 1799 MVT XLenVT = Subtarget.getXLenVT(); 1800 1801 const GlobalValue *GV = N->getGlobal(); 1802 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 1803 SDValue Addr = getAddr(N, DAG, IsLocal); 1804 1805 // In order to maximise the opportunity for common subexpression elimination, 1806 // emit a separate ADD node for the global address offset instead of folding 1807 // it in the global address node. Later peephole optimisations may choose to 1808 // fold it back in when profitable. 1809 if (Offset != 0) 1810 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1811 DAG.getConstant(Offset, DL, XLenVT)); 1812 return Addr; 1813 } 1814 1815 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 1816 SelectionDAG &DAG) const { 1817 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 1818 1819 return getAddr(N, DAG); 1820 } 1821 1822 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 1823 SelectionDAG &DAG) const { 1824 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 1825 1826 return getAddr(N, DAG); 1827 } 1828 1829 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 1830 SelectionDAG &DAG) const { 1831 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 1832 1833 return getAddr(N, DAG); 1834 } 1835 1836 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 1837 SelectionDAG &DAG, 1838 bool UseGOT) const { 1839 SDLoc DL(N); 1840 EVT Ty = getPointerTy(DAG.getDataLayout()); 1841 const GlobalValue *GV = N->getGlobal(); 1842 MVT XLenVT = Subtarget.getXLenVT(); 1843 1844 if (UseGOT) { 1845 // Use PC-relative addressing to access the GOT for this TLS symbol, then 1846 // load the address from the GOT and add the thread pointer. This generates 1847 // the pattern (PseudoLA_TLS_IE sym), which expands to 1848 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 1849 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1850 SDValue Load = 1851 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 1852 1853 // Add the thread pointer. 1854 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1855 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 1856 } 1857 1858 // Generate a sequence for accessing the address relative to the thread 1859 // pointer, with the appropriate adjustment for the thread pointer offset. 1860 // This generates the pattern 1861 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 1862 SDValue AddrHi = 1863 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 1864 SDValue AddrAdd = 1865 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 1866 SDValue AddrLo = 1867 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 1868 1869 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1870 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1871 SDValue MNAdd = SDValue( 1872 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 1873 0); 1874 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 1875 } 1876 1877 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 1878 SelectionDAG &DAG) const { 1879 SDLoc DL(N); 1880 EVT Ty = getPointerTy(DAG.getDataLayout()); 1881 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 1882 const GlobalValue *GV = N->getGlobal(); 1883 1884 // Use a PC-relative addressing mode to access the global dynamic GOT address. 1885 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 1886 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 1887 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1888 SDValue Load = 1889 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 1890 1891 // Prepare argument list to generate call. 1892 ArgListTy Args; 1893 ArgListEntry Entry; 1894 Entry.Node = Load; 1895 Entry.Ty = CallTy; 1896 Args.push_back(Entry); 1897 1898 // Setup call to __tls_get_addr. 1899 TargetLowering::CallLoweringInfo CLI(DAG); 1900 CLI.setDebugLoc(DL) 1901 .setChain(DAG.getEntryNode()) 1902 .setLibCallee(CallingConv::C, CallTy, 1903 DAG.getExternalSymbol("__tls_get_addr", Ty), 1904 std::move(Args)); 1905 1906 return LowerCallTo(CLI).first; 1907 } 1908 1909 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 1910 SelectionDAG &DAG) const { 1911 SDLoc DL(Op); 1912 EVT Ty = Op.getValueType(); 1913 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1914 int64_t Offset = N->getOffset(); 1915 MVT XLenVT = Subtarget.getXLenVT(); 1916 1917 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 1918 1919 if (DAG.getMachineFunction().getFunction().getCallingConv() == 1920 CallingConv::GHC) 1921 report_fatal_error("In GHC calling convention TLS is not supported"); 1922 1923 SDValue Addr; 1924 switch (Model) { 1925 case TLSModel::LocalExec: 1926 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 1927 break; 1928 case TLSModel::InitialExec: 1929 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 1930 break; 1931 case TLSModel::LocalDynamic: 1932 case TLSModel::GeneralDynamic: 1933 Addr = getDynamicTLSAddr(N, DAG); 1934 break; 1935 } 1936 1937 // In order to maximise the opportunity for common subexpression elimination, 1938 // emit a separate ADD node for the global address offset instead of folding 1939 // it in the global address node. Later peephole optimisations may choose to 1940 // fold it back in when profitable. 1941 if (Offset != 0) 1942 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1943 DAG.getConstant(Offset, DL, XLenVT)); 1944 return Addr; 1945 } 1946 1947 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 1948 SDValue CondV = Op.getOperand(0); 1949 SDValue TrueV = Op.getOperand(1); 1950 SDValue FalseV = Op.getOperand(2); 1951 SDLoc DL(Op); 1952 MVT XLenVT = Subtarget.getXLenVT(); 1953 1954 // If the result type is XLenVT and CondV is the output of a SETCC node 1955 // which also operated on XLenVT inputs, then merge the SETCC node into the 1956 // lowered RISCVISD::SELECT_CC to take advantage of the integer 1957 // compare+branch instructions. i.e.: 1958 // (select (setcc lhs, rhs, cc), truev, falsev) 1959 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 1960 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 1961 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 1962 SDValue LHS = CondV.getOperand(0); 1963 SDValue RHS = CondV.getOperand(1); 1964 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 1965 ISD::CondCode CCVal = CC->get(); 1966 1967 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 1968 1969 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 1970 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 1971 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1972 } 1973 1974 // Otherwise: 1975 // (select condv, truev, falsev) 1976 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 1977 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 1978 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 1979 1980 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 1981 1982 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1983 } 1984 1985 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { 1986 SDValue CondV = Op.getOperand(1); 1987 SDLoc DL(Op); 1988 MVT XLenVT = Subtarget.getXLenVT(); 1989 1990 if (CondV.getOpcode() == ISD::SETCC && 1991 CondV.getOperand(0).getValueType() == XLenVT) { 1992 SDValue LHS = CondV.getOperand(0); 1993 SDValue RHS = CondV.getOperand(1); 1994 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 1995 1996 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 1997 1998 SDValue TargetCC = DAG.getCondCode(CCVal); 1999 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 2000 LHS, RHS, TargetCC, Op.getOperand(2)); 2001 } 2002 2003 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 2004 CondV, DAG.getConstant(0, DL, XLenVT), 2005 DAG.getCondCode(ISD::SETNE), Op.getOperand(2)); 2006 } 2007 2008 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 2009 MachineFunction &MF = DAG.getMachineFunction(); 2010 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 2011 2012 SDLoc DL(Op); 2013 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 2014 getPointerTy(MF.getDataLayout())); 2015 2016 // vastart just stores the address of the VarArgsFrameIndex slot into the 2017 // memory location argument. 2018 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2019 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 2020 MachinePointerInfo(SV)); 2021 } 2022 2023 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 2024 SelectionDAG &DAG) const { 2025 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 2026 MachineFunction &MF = DAG.getMachineFunction(); 2027 MachineFrameInfo &MFI = MF.getFrameInfo(); 2028 MFI.setFrameAddressIsTaken(true); 2029 Register FrameReg = RI.getFrameRegister(MF); 2030 int XLenInBytes = Subtarget.getXLen() / 8; 2031 2032 EVT VT = Op.getValueType(); 2033 SDLoc DL(Op); 2034 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 2035 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2036 while (Depth--) { 2037 int Offset = -(XLenInBytes * 2); 2038 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 2039 DAG.getIntPtrConstant(Offset, DL)); 2040 FrameAddr = 2041 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 2042 } 2043 return FrameAddr; 2044 } 2045 2046 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 2047 SelectionDAG &DAG) const { 2048 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 2049 MachineFunction &MF = DAG.getMachineFunction(); 2050 MachineFrameInfo &MFI = MF.getFrameInfo(); 2051 MFI.setReturnAddressIsTaken(true); 2052 MVT XLenVT = Subtarget.getXLenVT(); 2053 int XLenInBytes = Subtarget.getXLen() / 8; 2054 2055 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 2056 return SDValue(); 2057 2058 EVT VT = Op.getValueType(); 2059 SDLoc DL(Op); 2060 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2061 if (Depth) { 2062 int Off = -XLenInBytes; 2063 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 2064 SDValue Offset = DAG.getConstant(Off, DL, VT); 2065 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 2066 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 2067 MachinePointerInfo()); 2068 } 2069 2070 // Return the value of the return address register, marking it an implicit 2071 // live-in. 2072 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 2073 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 2074 } 2075 2076 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 2077 SelectionDAG &DAG) const { 2078 SDLoc DL(Op); 2079 SDValue Lo = Op.getOperand(0); 2080 SDValue Hi = Op.getOperand(1); 2081 SDValue Shamt = Op.getOperand(2); 2082 EVT VT = Lo.getValueType(); 2083 2084 // if Shamt-XLEN < 0: // Shamt < XLEN 2085 // Lo = Lo << Shamt 2086 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 2087 // else: 2088 // Lo = 0 2089 // Hi = Lo << (Shamt-XLEN) 2090 2091 SDValue Zero = DAG.getConstant(0, DL, VT); 2092 SDValue One = DAG.getConstant(1, DL, VT); 2093 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 2094 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 2095 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 2096 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 2097 2098 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 2099 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 2100 SDValue ShiftRightLo = 2101 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 2102 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 2103 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 2104 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 2105 2106 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 2107 2108 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 2109 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 2110 2111 SDValue Parts[2] = {Lo, Hi}; 2112 return DAG.getMergeValues(Parts, DL); 2113 } 2114 2115 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 2116 bool IsSRA) const { 2117 SDLoc DL(Op); 2118 SDValue Lo = Op.getOperand(0); 2119 SDValue Hi = Op.getOperand(1); 2120 SDValue Shamt = Op.getOperand(2); 2121 EVT VT = Lo.getValueType(); 2122 2123 // SRA expansion: 2124 // if Shamt-XLEN < 0: // Shamt < XLEN 2125 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 2126 // Hi = Hi >>s Shamt 2127 // else: 2128 // Lo = Hi >>s (Shamt-XLEN); 2129 // Hi = Hi >>s (XLEN-1) 2130 // 2131 // SRL expansion: 2132 // if Shamt-XLEN < 0: // Shamt < XLEN 2133 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 2134 // Hi = Hi >>u Shamt 2135 // else: 2136 // Lo = Hi >>u (Shamt-XLEN); 2137 // Hi = 0; 2138 2139 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 2140 2141 SDValue Zero = DAG.getConstant(0, DL, VT); 2142 SDValue One = DAG.getConstant(1, DL, VT); 2143 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 2144 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 2145 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 2146 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 2147 2148 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 2149 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 2150 SDValue ShiftLeftHi = 2151 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 2152 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 2153 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 2154 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 2155 SDValue HiFalse = 2156 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 2157 2158 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 2159 2160 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 2161 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 2162 2163 SDValue Parts[2] = {Lo, Hi}; 2164 return DAG.getMergeValues(Parts, DL); 2165 } 2166 2167 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is 2168 // illegal (currently only vXi64 RV32). 2169 // FIXME: We could also catch non-constant sign-extended i32 values and lower 2170 // them to SPLAT_VECTOR_I64 2171 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op, 2172 SelectionDAG &DAG) const { 2173 SDLoc DL(Op); 2174 EVT VecVT = Op.getValueType(); 2175 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 2176 "Unexpected SPLAT_VECTOR_PARTS lowering"); 2177 2178 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!"); 2179 SDValue Lo = Op.getOperand(0); 2180 SDValue Hi = Op.getOperand(1); 2181 2182 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) { 2183 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue(); 2184 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue(); 2185 // If Hi constant is all the same sign bit as Lo, lower this as a custom 2186 // node in order to try and match RVV vector/scalar instructions. 2187 if ((LoC >> 31) == HiC) 2188 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 2189 } 2190 2191 // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not 2192 // to accidentally sign-extend the 32-bit halves to the e64 SEW: 2193 // vmv.v.x vX, hi 2194 // vsll.vx vX, vX, /*32*/ 2195 // vmv.v.x vY, lo 2196 // vsll.vx vY, vY, /*32*/ 2197 // vsrl.vx vY, vY, /*32*/ 2198 // vor.vv vX, vX, vY 2199 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT); 2200 2201 Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 2202 Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV); 2203 Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV); 2204 2205 if (isNullConstant(Hi)) 2206 return Lo; 2207 2208 Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi); 2209 Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV); 2210 2211 return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi); 2212 } 2213 2214 // Custom-lower extensions from mask vectors by using a vselect either with 1 2215 // for zero/any-extension or -1 for sign-extension: 2216 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 2217 // Note that any-extension is lowered identically to zero-extension. 2218 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 2219 int64_t ExtTrueVal) const { 2220 SDLoc DL(Op); 2221 MVT VecVT = Op.getSimpleValueType(); 2222 SDValue Src = Op.getOperand(0); 2223 // Only custom-lower extensions from mask types 2224 assert(Src.getValueType().isVector() && 2225 Src.getValueType().getVectorElementType() == MVT::i1); 2226 2227 MVT XLenVT = Subtarget.getXLenVT(); 2228 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 2229 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT); 2230 2231 if (VecVT.isScalableVector()) { 2232 // Be careful not to introduce illegal scalar types at this stage, and be 2233 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 2234 // illegal and must be expanded. Since we know that the constants are 2235 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 2236 bool IsRV32E64 = 2237 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 2238 2239 if (!IsRV32E64) { 2240 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 2241 SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal); 2242 } else { 2243 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 2244 SplatTrueVal = 2245 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal); 2246 } 2247 2248 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 2249 } 2250 2251 MVT ContainerVT = getContainerForFixedLengthVector(VecVT); 2252 MVT I1ContainerVT = 2253 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2254 2255 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget); 2256 2257 SDValue Mask, VL; 2258 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2259 2260 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL); 2261 SplatTrueVal = 2262 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL); 2263 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, 2264 SplatTrueVal, SplatZero, VL); 2265 2266 return convertFromScalableVector(VecVT, Select, DAG, Subtarget); 2267 } 2268 2269 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( 2270 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { 2271 MVT ExtVT = Op.getSimpleValueType(); 2272 // Only custom-lower extensions from fixed-length vector types. 2273 if (!ExtVT.isFixedLengthVector()) 2274 return Op; 2275 MVT VT = Op.getOperand(0).getSimpleValueType(); 2276 // Grab the canonical container type for the extended type. Infer the smaller 2277 // type from that to ensure the same number of vector elements, as we know 2278 // the LMUL will be sufficient to hold the smaller type. 2279 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT); 2280 // Get the extended container type manually to ensure the same number of 2281 // vector elements between source and dest. 2282 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), 2283 ContainerExtVT.getVectorElementCount()); 2284 2285 SDValue Op1 = 2286 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 2287 2288 SDLoc DL(Op); 2289 SDValue Mask, VL; 2290 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2291 2292 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL); 2293 2294 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget); 2295 } 2296 2297 // Custom-lower truncations from vectors to mask vectors by using a mask and a 2298 // setcc operation: 2299 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 2300 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, 2301 SelectionDAG &DAG) const { 2302 SDLoc DL(Op); 2303 EVT MaskVT = Op.getValueType(); 2304 // Only expect to custom-lower truncations to mask types 2305 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 2306 "Unexpected type for vector mask lowering"); 2307 SDValue Src = Op.getOperand(0); 2308 MVT VecVT = Src.getSimpleValueType(); 2309 2310 // If this is a fixed vector, we need to convert it to a scalable vector. 2311 MVT ContainerVT = VecVT; 2312 if (VecVT.isFixedLengthVector()) { 2313 ContainerVT = getContainerForFixedLengthVector(VecVT); 2314 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 2315 } 2316 2317 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 2318 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 2319 2320 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne); 2321 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero); 2322 2323 if (VecVT.isScalableVector()) { 2324 SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); 2325 return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); 2326 } 2327 2328 SDValue Mask, VL; 2329 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2330 2331 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 2332 SDValue Trunc = 2333 DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL); 2334 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero, 2335 DAG.getCondCode(ISD::SETNE), Mask, VL); 2336 return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); 2337 } 2338 2339 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the 2340 // first position of a vector, and that vector is slid up to the insert index. 2341 // By limiting the active vector length to index+1 and merging with the 2342 // original vector (with an undisturbed tail policy for elements >= VL), we 2343 // achieve the desired result of leaving all elements untouched except the one 2344 // at VL-1, which is replaced with the desired value. 2345 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 2346 SelectionDAG &DAG) const { 2347 SDLoc DL(Op); 2348 MVT VecVT = Op.getSimpleValueType(); 2349 SDValue Vec = Op.getOperand(0); 2350 SDValue Val = Op.getOperand(1); 2351 SDValue Idx = Op.getOperand(2); 2352 2353 MVT ContainerVT = VecVT; 2354 // If the operand is a fixed-length vector, convert to a scalable one. 2355 if (VecVT.isFixedLengthVector()) { 2356 ContainerVT = getContainerForFixedLengthVector(VecVT); 2357 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2358 } 2359 2360 MVT XLenVT = Subtarget.getXLenVT(); 2361 2362 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 2363 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64; 2364 // Even i64-element vectors on RV32 can be lowered without scalar 2365 // legalization if the most-significant 32 bits of the value are not affected 2366 // by the sign-extension of the lower 32 bits. 2367 // TODO: We could also catch sign extensions of a 32-bit value. 2368 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) { 2369 const auto *CVal = cast<ConstantSDNode>(Val); 2370 if (isInt<32>(CVal->getSExtValue())) { 2371 IsLegalInsert = true; 2372 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); 2373 } 2374 } 2375 2376 SDValue Mask, VL; 2377 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2378 2379 SDValue ValInVec; 2380 2381 if (IsLegalInsert) { 2382 if (isNullConstant(Idx)) 2383 return DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, Vec, Val, VL); 2384 ValInVec = DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, 2385 DAG.getUNDEF(ContainerVT), Val, VL); 2386 } else { 2387 // On RV32, i64-element vectors must be specially handled to place the 2388 // value at element 0, by using two vslide1up instructions in sequence on 2389 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for 2390 // this. 2391 SDValue One = DAG.getConstant(1, DL, XLenVT); 2392 SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero); 2393 SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One); 2394 MVT I32ContainerVT = 2395 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2); 2396 SDValue I32Mask = 2397 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first; 2398 // Limit the active VL to two. 2399 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT); 2400 // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied 2401 // undef doesn't obey the earlyclobber constraint. Just splat a zero value. 2402 ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero, 2403 InsertI64VL); 2404 // First slide in the hi value, then the lo in underneath it. 2405 ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec, 2406 ValHi, I32Mask, InsertI64VL); 2407 ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec, 2408 ValLo, I32Mask, InsertI64VL); 2409 // Bitcast back to the right container type. 2410 ValInVec = DAG.getBitcast(ContainerVT, ValInVec); 2411 } 2412 2413 // Now that the value is in a vector, slide it into position. 2414 SDValue InsertVL = 2415 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT)); 2416 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, 2417 ValInVec, Idx, Mask, InsertVL); 2418 if (!VecVT.isFixedLengthVector()) 2419 return Slideup; 2420 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 2421 } 2422 2423 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 2424 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer 2425 // types this is done using VMV_X_S to allow us to glean information about the 2426 // sign bits of the result. 2427 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 2428 SelectionDAG &DAG) const { 2429 SDLoc DL(Op); 2430 SDValue Idx = Op.getOperand(1); 2431 SDValue Vec = Op.getOperand(0); 2432 EVT EltVT = Op.getValueType(); 2433 MVT VecVT = Vec.getSimpleValueType(); 2434 MVT XLenVT = Subtarget.getXLenVT(); 2435 2436 if (VecVT.getVectorElementType() == MVT::i1) { 2437 // FIXME: For now we just promote to an i8 vector and extract from that, 2438 // but this is probably not optimal. 2439 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 2440 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); 2441 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx); 2442 } 2443 2444 // If this is a fixed vector, we need to convert it to a scalable vector. 2445 MVT ContainerVT = VecVT; 2446 if (VecVT.isFixedLengthVector()) { 2447 ContainerVT = getContainerForFixedLengthVector(VecVT); 2448 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2449 } 2450 2451 // If the index is 0, the vector is already in the right position. 2452 if (!isNullConstant(Idx)) { 2453 // Use a VL of 1 to avoid processing more elements than we need. 2454 SDValue VL = DAG.getConstant(1, DL, XLenVT); 2455 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2456 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2457 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 2458 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 2459 } 2460 2461 if (!EltVT.isInteger()) { 2462 // Floating-point extracts are handled in TableGen. 2463 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 2464 DAG.getConstant(0, DL, XLenVT)); 2465 } 2466 2467 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 2468 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 2469 } 2470 2471 // Called by type legalization to handle splat of i64 on RV32. 2472 // FIXME: We can optimize this when the type has sign or zero bits in one 2473 // of the halves. 2474 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar, 2475 SDValue VL, SelectionDAG &DAG) { 2476 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VT); 2477 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, 2478 DAG.getConstant(0, DL, MVT::i32)); 2479 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, 2480 DAG.getConstant(1, DL, MVT::i32)); 2481 2482 // vmv.v.x vX, hi 2483 // vsll.vx vX, vX, /*32*/ 2484 // vmv.v.x vY, lo 2485 // vsll.vx vY, vY, /*32*/ 2486 // vsrl.vx vY, vY, /*32*/ 2487 // vor.vv vX, vX, vY 2488 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); 2489 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2490 Lo = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL); 2491 Lo = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL); 2492 Lo = DAG.getNode(RISCVISD::SRL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL); 2493 2494 Hi = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Hi, VL); 2495 Hi = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Hi, ThirtyTwoV, Mask, VL); 2496 2497 return DAG.getNode(RISCVISD::OR_VL, DL, VT, Lo, Hi, Mask, VL); 2498 } 2499 2500 // Some RVV intrinsics may claim that they want an integer operand to be 2501 // promoted or expanded. 2502 static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG, 2503 const RISCVSubtarget &Subtarget) { 2504 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 2505 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && 2506 "Unexpected opcode"); 2507 2508 if (!Subtarget.hasStdExtV()) 2509 return SDValue(); 2510 2511 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; 2512 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); 2513 SDLoc DL(Op); 2514 2515 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 2516 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); 2517 if (!II || !II->SplatOperand) 2518 return SDValue(); 2519 2520 unsigned SplatOp = II->SplatOperand + HasChain; 2521 assert(SplatOp < Op.getNumOperands()); 2522 2523 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 2524 SDValue &ScalarOp = Operands[SplatOp]; 2525 MVT OpVT = ScalarOp.getSimpleValueType(); 2526 MVT VT = Op.getSimpleValueType(); 2527 MVT XLenVT = Subtarget.getXLenVT(); 2528 2529 // If this isn't a scalar, or its type is XLenVT we're done. 2530 if (!OpVT.isScalarInteger() || OpVT == XLenVT) 2531 return SDValue(); 2532 2533 // Simplest case is that the operand needs to be promoted to XLenVT. 2534 if (OpVT.bitsLT(XLenVT)) { 2535 // If the operand is a constant, sign extend to increase our chances 2536 // of being able to use a .vi instruction. ANY_EXTEND would become a 2537 // a zero extend and the simm5 check in isel would fail. 2538 // FIXME: Should we ignore the upper bits in isel instead? 2539 unsigned ExtOpc = 2540 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 2541 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp); 2542 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 2543 } 2544 2545 // The more complex case is when the scalar is larger than XLenVT. 2546 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 && 2547 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!"); 2548 2549 // If this is a sign-extended 32-bit constant, we can truncate it and rely 2550 // on the instruction to sign-extend since SEW>XLEN. 2551 if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) { 2552 if (isInt<32>(CVal->getSExtValue())) { 2553 ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); 2554 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 2555 } 2556 } 2557 2558 // We need to convert the scalar to a splat vector. 2559 // FIXME: Can we implicitly truncate the scalar if it is known to 2560 // be sign extended? 2561 // VL should be the last operand. 2562 SDValue VL = Op.getOperand(Op.getNumOperands() - 1); 2563 assert(VL.getValueType() == XLenVT); 2564 ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG); 2565 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 2566 } 2567 2568 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 2569 SelectionDAG &DAG) const { 2570 unsigned IntNo = Op.getConstantOperandVal(0); 2571 SDLoc DL(Op); 2572 MVT XLenVT = Subtarget.getXLenVT(); 2573 2574 switch (IntNo) { 2575 default: 2576 break; // Don't custom lower most intrinsics. 2577 case Intrinsic::thread_pointer: { 2578 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2579 return DAG.getRegister(RISCV::X4, PtrVT); 2580 } 2581 case Intrinsic::riscv_vmv_x_s: 2582 assert(Op.getValueType() == XLenVT && "Unexpected VT!"); 2583 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 2584 Op.getOperand(1)); 2585 case Intrinsic::riscv_vmv_v_x: { 2586 SDValue Scalar = Op.getOperand(1); 2587 if (Scalar.getValueType().bitsLE(XLenVT)) { 2588 unsigned ExtOpc = 2589 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 2590 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); 2591 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), Scalar, 2592 Op.getOperand(2)); 2593 } 2594 2595 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); 2596 2597 // If this is a sign-extended 32-bit constant, we can truncate it and rely 2598 // on the instruction to sign-extend since SEW>XLEN. 2599 if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar)) { 2600 if (isInt<32>(CVal->getSExtValue())) 2601 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), 2602 DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32), 2603 Op.getOperand(2)); 2604 } 2605 2606 // Otherwise use the more complicated splatting algorithm. 2607 return splatSplitI64WithVL(DL, Op.getSimpleValueType(), Scalar, 2608 Op.getOperand(2), DAG); 2609 } 2610 case Intrinsic::riscv_vfmv_v_f: 2611 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), 2612 Op.getOperand(1), Op.getOperand(2)); 2613 case Intrinsic::riscv_vmv_s_x: { 2614 SDValue Scalar = Op.getOperand(2); 2615 2616 if (Scalar.getValueType().bitsLE(XLenVT)) { 2617 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar); 2618 return DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, Op.getValueType(), 2619 Op.getOperand(1), Scalar, Op.getOperand(3)); 2620 } 2621 2622 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); 2623 2624 // This is an i64 value that lives in two scalar registers. We have to 2625 // insert this in a convoluted way. First we build vXi64 splat containing 2626 // the/ two values that we assemble using some bit math. Next we'll use 2627 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask 2628 // to merge element 0 from our splat into the source vector. 2629 // FIXME: This is probably not the best way to do this, but it is 2630 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting 2631 // point. 2632 // vmv.v.x vX, hi 2633 // vsll.vx vX, vX, /*32*/ 2634 // vmv.v.x vY, lo 2635 // vsll.vx vY, vY, /*32*/ 2636 // vsrl.vx vY, vY, /*32*/ 2637 // vor.vv vX, vX, vY 2638 // 2639 // vid.v vVid 2640 // vmseq.vx mMask, vVid, 0 2641 // vmerge.vvm vDest, vSrc, vVal, mMask 2642 MVT VT = Op.getSimpleValueType(); 2643 SDValue Vec = Op.getOperand(1); 2644 SDValue VL = Op.getOperand(3); 2645 2646 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); 2647 SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, 2648 DAG.getConstant(0, DL, MVT::i32), VL); 2649 2650 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); 2651 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2652 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 2653 SDValue SelectCond = 2654 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx, 2655 DAG.getCondCode(ISD::SETEQ), Mask, VL); 2656 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal, 2657 Vec, VL); 2658 } 2659 } 2660 2661 return lowerVectorIntrinsicSplats(Op, DAG, Subtarget); 2662 } 2663 2664 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 2665 SelectionDAG &DAG) const { 2666 return lowerVectorIntrinsicSplats(Op, DAG, Subtarget); 2667 } 2668 2669 static MVT getLMUL1VT(MVT VT) { 2670 assert(VT.getVectorElementType().getSizeInBits() <= 64 && 2671 "Unexpected vector MVT"); 2672 return MVT::getScalableVectorVT( 2673 VT.getVectorElementType(), 2674 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); 2675 } 2676 2677 static unsigned getRVVReductionOp(unsigned ISDOpcode) { 2678 switch (ISDOpcode) { 2679 default: 2680 llvm_unreachable("Unhandled reduction"); 2681 case ISD::VECREDUCE_ADD: 2682 return RISCVISD::VECREDUCE_ADD_VL; 2683 case ISD::VECREDUCE_UMAX: 2684 return RISCVISD::VECREDUCE_UMAX_VL; 2685 case ISD::VECREDUCE_SMAX: 2686 return RISCVISD::VECREDUCE_SMAX_VL; 2687 case ISD::VECREDUCE_UMIN: 2688 return RISCVISD::VECREDUCE_UMIN_VL; 2689 case ISD::VECREDUCE_SMIN: 2690 return RISCVISD::VECREDUCE_SMIN_VL; 2691 case ISD::VECREDUCE_AND: 2692 return RISCVISD::VECREDUCE_AND_VL; 2693 case ISD::VECREDUCE_OR: 2694 return RISCVISD::VECREDUCE_OR_VL; 2695 case ISD::VECREDUCE_XOR: 2696 return RISCVISD::VECREDUCE_XOR_VL; 2697 } 2698 } 2699 2700 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 2701 SelectionDAG &DAG) const { 2702 SDLoc DL(Op); 2703 SDValue Vec = Op.getOperand(0); 2704 EVT VecEVT = Vec.getValueType(); 2705 2706 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode()); 2707 2708 // Due to ordering in legalize types we may have a vector type that needs to 2709 // be split. Do that manually so we can get down to a legal type. 2710 while (getTypeAction(*DAG.getContext(), VecEVT) == 2711 TargetLowering::TypeSplitVector) { 2712 SDValue Lo, Hi; 2713 std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL); 2714 VecEVT = Lo.getValueType(); 2715 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi); 2716 } 2717 2718 // TODO: The type may need to be widened rather than split. Or widened before 2719 // it can be split. 2720 if (!isTypeLegal(VecEVT)) 2721 return SDValue(); 2722 2723 MVT VecVT = VecEVT.getSimpleVT(); 2724 MVT VecEltVT = VecVT.getVectorElementType(); 2725 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); 2726 2727 MVT ContainerVT = VecVT; 2728 if (VecVT.isFixedLengthVector()) { 2729 ContainerVT = getContainerForFixedLengthVector(VecVT); 2730 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2731 } 2732 2733 MVT M1VT = getLMUL1VT(ContainerVT); 2734 2735 SDValue Mask, VL; 2736 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2737 2738 // FIXME: This is a VLMAX splat which might be too large and can prevent 2739 // vsetvli removal. 2740 SDValue NeutralElem = 2741 DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); 2742 SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem); 2743 SDValue Reduction = 2744 DAG.getNode(RVVOpcode, DL, M1VT, Vec, IdentitySplat, Mask, VL); 2745 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 2746 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 2747 return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); 2748 } 2749 2750 // Given a reduction op, this function returns the matching reduction opcode, 2751 // the vector SDValue and the scalar SDValue required to lower this to a 2752 // RISCVISD node. 2753 static std::tuple<unsigned, SDValue, SDValue> 2754 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { 2755 SDLoc DL(Op); 2756 switch (Op.getOpcode()) { 2757 default: 2758 llvm_unreachable("Unhandled reduction"); 2759 case ISD::VECREDUCE_FADD: 2760 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), 2761 DAG.getConstantFP(0.0, DL, EltVT)); 2762 case ISD::VECREDUCE_SEQ_FADD: 2763 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), 2764 Op.getOperand(0)); 2765 } 2766 } 2767 2768 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, 2769 SelectionDAG &DAG) const { 2770 SDLoc DL(Op); 2771 MVT VecEltVT = Op.getSimpleValueType(); 2772 2773 unsigned RVVOpcode; 2774 SDValue VectorVal, ScalarVal; 2775 std::tie(RVVOpcode, VectorVal, ScalarVal) = 2776 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT); 2777 MVT VecVT = VectorVal.getSimpleValueType(); 2778 2779 MVT ContainerVT = VecVT; 2780 if (VecVT.isFixedLengthVector()) { 2781 ContainerVT = getContainerForFixedLengthVector(VecVT); 2782 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget); 2783 } 2784 2785 MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType()); 2786 2787 SDValue Mask, VL; 2788 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2789 2790 // FIXME: This is a VLMAX splat which might be too large and can prevent 2791 // vsetvli removal. 2792 SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal); 2793 SDValue Reduction = 2794 DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat, Mask, VL); 2795 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 2796 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 2797 } 2798 2799 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, 2800 SelectionDAG &DAG) const { 2801 SDValue Vec = Op.getOperand(0); 2802 SDValue SubVec = Op.getOperand(1); 2803 MVT VecVT = Vec.getSimpleValueType(); 2804 MVT SubVecVT = SubVec.getSimpleValueType(); 2805 2806 SDLoc DL(Op); 2807 MVT XLenVT = Subtarget.getXLenVT(); 2808 unsigned OrigIdx = Op.getConstantOperandVal(2); 2809 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2810 2811 // We don't have the ability to slide mask vectors up indexed by their i1 2812 // elements; the smallest we can do is i8. Often we are able to bitcast to 2813 // equivalent i8 vectors. Note that when inserting a fixed-length vector 2814 // into a scalable one, we might not necessarily have enough scalable 2815 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. 2816 if (SubVecVT.getVectorElementType() == MVT::i1 && 2817 (OrigIdx != 0 || !Vec.isUndef())) { 2818 if (VecVT.getVectorMinNumElements() >= 8 && 2819 SubVecVT.getVectorMinNumElements() >= 8) { 2820 assert(OrigIdx % 8 == 0 && "Invalid index"); 2821 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 2822 SubVecVT.getVectorMinNumElements() % 8 == 0 && 2823 "Unexpected mask vector lowering"); 2824 OrigIdx /= 8; 2825 SubVecVT = 2826 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 2827 SubVecVT.isScalableVector()); 2828 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 2829 VecVT.isScalableVector()); 2830 Vec = DAG.getBitcast(VecVT, Vec); 2831 SubVec = DAG.getBitcast(SubVecVT, SubVec); 2832 } else { 2833 // We can't slide this mask vector up indexed by its i1 elements. 2834 // This poses a problem when we wish to insert a scalable vector which 2835 // can't be re-expressed as a larger type. Just choose the slow path and 2836 // extend to a larger type, then truncate back down. 2837 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 2838 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 2839 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 2840 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec); 2841 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec, 2842 Op.getOperand(2)); 2843 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT); 2844 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE); 2845 } 2846 } 2847 2848 // If the subvector vector is a fixed-length type, we cannot use subregister 2849 // manipulation to simplify the codegen; we don't know which register of a 2850 // LMUL group contains the specific subvector as we only know the minimum 2851 // register size. Therefore we must slide the vector group up the full 2852 // amount. 2853 if (SubVecVT.isFixedLengthVector()) { 2854 if (OrigIdx == 0 && Vec.isUndef()) 2855 return Op; 2856 MVT ContainerVT = VecVT; 2857 if (VecVT.isFixedLengthVector()) { 2858 ContainerVT = getContainerForFixedLengthVector(VecVT); 2859 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2860 } 2861 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, 2862 DAG.getUNDEF(ContainerVT), SubVec, 2863 DAG.getConstant(0, DL, XLenVT)); 2864 SDValue Mask = 2865 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 2866 // Set the vector length to only the number of elements we care about. Note 2867 // that for slideup this includes the offset. 2868 SDValue VL = 2869 DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT); 2870 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 2871 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, 2872 SubVec, SlideupAmt, Mask, VL); 2873 if (!VecVT.isFixedLengthVector()) 2874 return Slideup; 2875 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 2876 } 2877 2878 unsigned SubRegIdx, RemIdx; 2879 std::tie(SubRegIdx, RemIdx) = 2880 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2881 VecVT, SubVecVT, OrigIdx, TRI); 2882 2883 RISCVVLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT); 2884 bool IsSubVecPartReg = SubVecLMUL == RISCVVLMUL::LMUL_F2 || 2885 SubVecLMUL == RISCVVLMUL::LMUL_F4 || 2886 SubVecLMUL == RISCVVLMUL::LMUL_F8; 2887 2888 // 1. If the Idx has been completely eliminated and this subvector's size is 2889 // a vector register or a multiple thereof, or the surrounding elements are 2890 // undef, then this is a subvector insert which naturally aligns to a vector 2891 // register. These can easily be handled using subregister manipulation. 2892 // 2. If the subvector is smaller than a vector register, then the insertion 2893 // must preserve the undisturbed elements of the register. We do this by 2894 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type 2895 // (which resolves to a subregister copy), performing a VSLIDEUP to place the 2896 // subvector within the vector register, and an INSERT_SUBVECTOR of that 2897 // LMUL=1 type back into the larger vector (resolving to another subregister 2898 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type 2899 // to avoid allocating a large register group to hold our subvector. 2900 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef())) 2901 return Op; 2902 2903 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements 2904 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy 2905 // (in our case undisturbed). This means we can set up a subvector insertion 2906 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the 2907 // size of the subvector. 2908 MVT InterSubVT = VecVT; 2909 SDValue AlignedExtract = Vec; 2910 unsigned AlignedIdx = OrigIdx - RemIdx; 2911 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 2912 InterSubVT = getLMUL1VT(VecVT); 2913 // Extract a subvector equal to the nearest full vector register type. This 2914 // should resolve to a EXTRACT_SUBREG instruction. 2915 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 2916 DAG.getConstant(AlignedIdx, DL, XLenVT)); 2917 } 2918 2919 SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT); 2920 // For scalable vectors this must be further multiplied by vscale. 2921 SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt); 2922 2923 SDValue Mask, VL; 2924 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 2925 2926 // Construct the vector length corresponding to RemIdx + length(SubVecVT). 2927 VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT); 2928 VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL); 2929 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL); 2930 2931 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT, 2932 DAG.getUNDEF(InterSubVT), SubVec, 2933 DAG.getConstant(0, DL, XLenVT)); 2934 2935 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT, 2936 AlignedExtract, SubVec, SlideupAmt, Mask, VL); 2937 2938 // If required, insert this subvector back into the correct vector register. 2939 // This should resolve to an INSERT_SUBREG instruction. 2940 if (VecVT.bitsGT(InterSubVT)) 2941 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup, 2942 DAG.getConstant(AlignedIdx, DL, XLenVT)); 2943 2944 // We might have bitcast from a mask type: cast back to the original type if 2945 // required. 2946 return DAG.getBitcast(Op.getSimpleValueType(), Slideup); 2947 } 2948 2949 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, 2950 SelectionDAG &DAG) const { 2951 SDValue Vec = Op.getOperand(0); 2952 MVT SubVecVT = Op.getSimpleValueType(); 2953 MVT VecVT = Vec.getSimpleValueType(); 2954 2955 SDLoc DL(Op); 2956 MVT XLenVT = Subtarget.getXLenVT(); 2957 unsigned OrigIdx = Op.getConstantOperandVal(1); 2958 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2959 2960 // We don't have the ability to slide mask vectors down indexed by their i1 2961 // elements; the smallest we can do is i8. Often we are able to bitcast to 2962 // equivalent i8 vectors. Note that when extracting a fixed-length vector 2963 // from a scalable one, we might not necessarily have enough scalable 2964 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. 2965 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) { 2966 if (VecVT.getVectorMinNumElements() >= 8 && 2967 SubVecVT.getVectorMinNumElements() >= 8) { 2968 assert(OrigIdx % 8 == 0 && "Invalid index"); 2969 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 2970 SubVecVT.getVectorMinNumElements() % 8 == 0 && 2971 "Unexpected mask vector lowering"); 2972 OrigIdx /= 8; 2973 SubVecVT = 2974 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 2975 SubVecVT.isScalableVector()); 2976 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 2977 VecVT.isScalableVector()); 2978 Vec = DAG.getBitcast(VecVT, Vec); 2979 } else { 2980 // We can't slide this mask vector down, indexed by its i1 elements. 2981 // This poses a problem when we wish to extract a scalable vector which 2982 // can't be re-expressed as a larger type. Just choose the slow path and 2983 // extend to a larger type, then truncate back down. 2984 // TODO: We could probably improve this when extracting certain fixed 2985 // from fixed, where we can extract as i8 and shift the correct element 2986 // right to reach the desired subvector? 2987 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 2988 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 2989 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 2990 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec, 2991 Op.getOperand(1)); 2992 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT); 2993 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE); 2994 } 2995 } 2996 2997 // If the subvector vector is a fixed-length type, we cannot use subregister 2998 // manipulation to simplify the codegen; we don't know which register of a 2999 // LMUL group contains the specific subvector as we only know the minimum 3000 // register size. Therefore we must slide the vector group down the full 3001 // amount. 3002 if (SubVecVT.isFixedLengthVector()) { 3003 // With an index of 0 this is a cast-like subvector, which can be performed 3004 // with subregister operations. 3005 if (OrigIdx == 0) 3006 return Op; 3007 MVT ContainerVT = VecVT; 3008 if (VecVT.isFixedLengthVector()) { 3009 ContainerVT = getContainerForFixedLengthVector(VecVT); 3010 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3011 } 3012 SDValue Mask = 3013 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 3014 // Set the vector length to only the number of elements we care about. This 3015 // avoids sliding down elements we're going to discard straight away. 3016 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT); 3017 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 3018 SDValue Slidedown = 3019 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 3020 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL); 3021 // Now we can use a cast-like subvector extract to get the result. 3022 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 3023 DAG.getConstant(0, DL, XLenVT)); 3024 } 3025 3026 unsigned SubRegIdx, RemIdx; 3027 std::tie(SubRegIdx, RemIdx) = 3028 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 3029 VecVT, SubVecVT, OrigIdx, TRI); 3030 3031 // If the Idx has been completely eliminated then this is a subvector extract 3032 // which naturally aligns to a vector register. These can easily be handled 3033 // using subregister manipulation. 3034 if (RemIdx == 0) 3035 return Op; 3036 3037 // Else we must shift our vector register directly to extract the subvector. 3038 // Do this using VSLIDEDOWN. 3039 3040 // If the vector type is an LMUL-group type, extract a subvector equal to the 3041 // nearest full vector register type. This should resolve to a EXTRACT_SUBREG 3042 // instruction. 3043 MVT InterSubVT = VecVT; 3044 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 3045 InterSubVT = getLMUL1VT(VecVT); 3046 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 3047 DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT)); 3048 } 3049 3050 // Slide this vector register down by the desired number of elements in order 3051 // to place the desired subvector starting at element 0. 3052 SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT); 3053 // For scalable vectors this must be further multiplied by vscale. 3054 SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt); 3055 3056 SDValue Mask, VL; 3057 std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); 3058 SDValue Slidedown = 3059 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT, 3060 DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL); 3061 3062 // Now the vector is in the right position, extract our final subvector. This 3063 // should resolve to a COPY. 3064 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 3065 DAG.getConstant(0, DL, XLenVT)); 3066 3067 // We might have bitcast from a mask type: cast back to the original type if 3068 // required. 3069 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown); 3070 } 3071 3072 // Implement vector_reverse using vrgather.vv with indices determined by 3073 // subtracting the id of each element from (VLMAX-1). This will convert 3074 // the indices like so: 3075 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). 3076 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. 3077 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, 3078 SelectionDAG &DAG) const { 3079 SDLoc DL(Op); 3080 MVT VecVT = Op.getSimpleValueType(); 3081 unsigned EltSize = VecVT.getScalarSizeInBits(); 3082 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); 3083 3084 unsigned MaxVLMAX = 0; 3085 unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits(); 3086 if (VectorBitsMax != 0) 3087 MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; 3088 3089 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; 3090 MVT IntVT = VecVT.changeVectorElementTypeToInteger(); 3091 3092 // If this is SEW=8 and VLMAX is unknown or more than 256, we need 3093 // to use vrgatherei16.vv. 3094 // TODO: It's also possible to use vrgatherei16.vv for other types to 3095 // decrease register width for the index calculation. 3096 if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) { 3097 // If this is LMUL=8, we have to split before can use vrgatherei16.vv. 3098 // Reverse each half, then reassemble them in reverse order. 3099 // NOTE: It's also possible that after splitting that VLMAX no longer 3100 // requires vrgatherei16.vv. 3101 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { 3102 SDValue Lo, Hi; 3103 std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0); 3104 EVT LoVT, HiVT; 3105 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); 3106 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); 3107 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); 3108 // Reassemble the low and high pieces reversed. 3109 // FIXME: This is a CONCAT_VECTORS. 3110 SDValue Res = 3111 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi, 3112 DAG.getIntPtrConstant(0, DL)); 3113 return DAG.getNode( 3114 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo, 3115 DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL)); 3116 } 3117 3118 // Just promote the int type to i16 which will double the LMUL. 3119 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount()); 3120 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; 3121 } 3122 3123 MVT XLenVT = Subtarget.getXLenVT(); 3124 SDValue Mask, VL; 3125 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 3126 3127 // Calculate VLMAX-1 for the desired SEW. 3128 unsigned MinElts = VecVT.getVectorMinNumElements(); 3129 SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, 3130 DAG.getConstant(MinElts, DL, XLenVT)); 3131 SDValue VLMinus1 = 3132 DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT)); 3133 3134 // Splat VLMAX-1 taking care to handle SEW==64 on RV32. 3135 bool IsRV32E64 = 3136 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64; 3137 SDValue SplatVL; 3138 if (!IsRV32E64) 3139 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1); 3140 else 3141 SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1); 3142 3143 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL); 3144 SDValue Indices = 3145 DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL); 3146 3147 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL); 3148 } 3149 3150 SDValue 3151 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, 3152 SelectionDAG &DAG) const { 3153 auto *Load = cast<LoadSDNode>(Op); 3154 3155 SDLoc DL(Op); 3156 MVT VT = Op.getSimpleValueType(); 3157 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3158 3159 SDValue VL = 3160 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 3161 3162 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 3163 SDValue NewLoad = DAG.getMemIntrinsicNode( 3164 RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL}, 3165 Load->getMemoryVT(), Load->getMemOperand()); 3166 3167 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 3168 return DAG.getMergeValues({Result, Load->getChain()}, DL); 3169 } 3170 3171 SDValue 3172 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, 3173 SelectionDAG &DAG) const { 3174 auto *Store = cast<StoreSDNode>(Op); 3175 3176 SDLoc DL(Op); 3177 MVT VT = Store->getValue().getSimpleValueType(); 3178 3179 // FIXME: We probably need to zero any extra bits in a byte for mask stores. 3180 // This is tricky to do. 3181 3182 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3183 3184 SDValue VL = 3185 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 3186 3187 SDValue NewValue = 3188 convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget); 3189 return DAG.getMemIntrinsicNode( 3190 RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other), 3191 {Store->getChain(), NewValue, Store->getBasePtr(), VL}, 3192 Store->getMemoryVT(), Store->getMemOperand()); 3193 } 3194 3195 SDValue RISCVTargetLowering::lowerFixedLengthVectorMaskedLoadToRVV( 3196 SDValue Op, SelectionDAG &DAG) const { 3197 auto *Load = cast<MaskedLoadSDNode>(Op); 3198 3199 SDLoc DL(Op); 3200 MVT VT = Op.getSimpleValueType(); 3201 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3202 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3203 MVT XLenVT = Subtarget.getXLenVT(); 3204 3205 SDValue Mask = 3206 convertToScalableVector(MaskVT, Load->getMask(), DAG, Subtarget); 3207 SDValue PassThru = 3208 convertToScalableVector(ContainerVT, Load->getPassThru(), DAG, Subtarget); 3209 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 3210 3211 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 3212 SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vle_mask, DL, XLenVT); 3213 SDValue Ops[] = {Load->getChain(), IntID, PassThru, 3214 Load->getBasePtr(), Mask, VL}; 3215 SDValue NewLoad = 3216 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 3217 Load->getMemoryVT(), Load->getMemOperand()); 3218 3219 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 3220 return DAG.getMergeValues({Result, Load->getChain()}, DL); 3221 } 3222 3223 SDValue RISCVTargetLowering::lowerFixedLengthVectorMaskedStoreToRVV( 3224 SDValue Op, SelectionDAG &DAG) const { 3225 auto *Store = cast<MaskedStoreSDNode>(Op); 3226 3227 SDLoc DL(Op); 3228 SDValue Val = Store->getValue(); 3229 MVT VT = Val.getSimpleValueType(); 3230 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3231 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3232 MVT XLenVT = Subtarget.getXLenVT(); 3233 3234 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 3235 SDValue Mask = 3236 convertToScalableVector(MaskVT, Store->getMask(), DAG, Subtarget); 3237 3238 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 3239 3240 SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vse_mask, DL, XLenVT); 3241 return DAG.getMemIntrinsicNode( 3242 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), 3243 {Store->getChain(), IntID, Val, Store->getBasePtr(), Mask, VL}, 3244 Store->getMemoryVT(), Store->getMemOperand()); 3245 } 3246 3247 SDValue 3248 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, 3249 SelectionDAG &DAG) const { 3250 MVT InVT = Op.getOperand(0).getSimpleValueType(); 3251 MVT ContainerVT = getContainerForFixedLengthVector(InVT); 3252 3253 MVT VT = Op.getSimpleValueType(); 3254 3255 SDValue Op1 = 3256 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 3257 SDValue Op2 = 3258 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 3259 3260 SDLoc DL(Op); 3261 SDValue VL = 3262 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 3263 3264 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 3265 3266 bool Invert = false; 3267 Optional<unsigned> LogicOpc; 3268 if (ContainerVT.isFloatingPoint()) { 3269 bool Swap = false; 3270 switch (CC) { 3271 default: 3272 break; 3273 case ISD::SETULE: 3274 case ISD::SETULT: 3275 Swap = true; 3276 LLVM_FALLTHROUGH; 3277 case ISD::SETUGE: 3278 case ISD::SETUGT: 3279 CC = getSetCCInverse(CC, ContainerVT); 3280 Invert = true; 3281 break; 3282 case ISD::SETOGE: 3283 case ISD::SETOGT: 3284 case ISD::SETGE: 3285 case ISD::SETGT: 3286 Swap = true; 3287 break; 3288 case ISD::SETUEQ: 3289 // Use !((OLT Op1, Op2) || (OLT Op2, Op1)) 3290 Invert = true; 3291 LogicOpc = RISCVISD::VMOR_VL; 3292 CC = ISD::SETOLT; 3293 break; 3294 case ISD::SETONE: 3295 // Use ((OLT Op1, Op2) || (OLT Op2, Op1)) 3296 LogicOpc = RISCVISD::VMOR_VL; 3297 CC = ISD::SETOLT; 3298 break; 3299 case ISD::SETO: 3300 // Use (OEQ Op1, Op1) && (OEQ Op2, Op2) 3301 LogicOpc = RISCVISD::VMAND_VL; 3302 CC = ISD::SETOEQ; 3303 break; 3304 case ISD::SETUO: 3305 // Use (UNE Op1, Op1) || (UNE Op2, Op2) 3306 LogicOpc = RISCVISD::VMOR_VL; 3307 CC = ISD::SETUNE; 3308 break; 3309 } 3310 3311 if (Swap) { 3312 CC = getSetCCSwappedOperands(CC); 3313 std::swap(Op1, Op2); 3314 } 3315 } 3316 3317 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3318 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 3319 3320 // There are 3 cases we need to emit. 3321 // 1. For (OEQ Op1, Op1) && (OEQ Op2, Op2) or (UNE Op1, Op1) || (UNE Op2, Op2) 3322 // we need to compare each operand with itself. 3323 // 2. For (OLT Op1, Op2) || (OLT Op2, Op1) we need to compare Op1 and Op2 in 3324 // both orders. 3325 // 3. For any other case we just need one compare with Op1 and Op2. 3326 SDValue Cmp; 3327 if (LogicOpc && (CC == ISD::SETOEQ || CC == ISD::SETUNE)) { 3328 Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op1, 3329 DAG.getCondCode(CC), Mask, VL); 3330 SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op2, 3331 DAG.getCondCode(CC), Mask, VL); 3332 Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL); 3333 } else { 3334 Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2, 3335 DAG.getCondCode(CC), Mask, VL); 3336 if (LogicOpc) { 3337 SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op1, 3338 DAG.getCondCode(CC), Mask, VL); 3339 Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL); 3340 } 3341 } 3342 3343 if (Invert) { 3344 SDValue AllOnes = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 3345 Cmp = DAG.getNode(RISCVISD::VMXOR_VL, DL, MaskVT, Cmp, AllOnes, VL); 3346 } 3347 3348 return convertFromScalableVector(VT, Cmp, DAG, Subtarget); 3349 } 3350 3351 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV( 3352 SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const { 3353 MVT VT = Op.getSimpleValueType(); 3354 3355 if (VT.getVectorElementType() == MVT::i1) 3356 return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false); 3357 3358 return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true); 3359 } 3360 3361 // Lower vector ABS to smax(X, sub(0, X)). 3362 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { 3363 SDLoc DL(Op); 3364 MVT VT = Op.getSimpleValueType(); 3365 SDValue X = Op.getOperand(0); 3366 3367 assert(VT.isFixedLengthVector() && "Unexpected type"); 3368 3369 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3370 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); 3371 3372 SDValue Mask, VL; 3373 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3374 3375 SDValue SplatZero = 3376 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 3377 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 3378 SDValue NegX = 3379 DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL); 3380 SDValue Max = 3381 DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL); 3382 3383 return convertFromScalableVector(VT, Max, DAG, Subtarget); 3384 } 3385 3386 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV( 3387 SDValue Op, SelectionDAG &DAG) const { 3388 SDLoc DL(Op); 3389 MVT VT = Op.getSimpleValueType(); 3390 SDValue Mag = Op.getOperand(0); 3391 SDValue Sign = Op.getOperand(1); 3392 assert(Mag.getValueType() == Sign.getValueType() && 3393 "Can only handle COPYSIGN with matching types."); 3394 3395 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3396 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget); 3397 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget); 3398 3399 SDValue Mask, VL; 3400 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3401 3402 SDValue CopySign = 3403 DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL); 3404 3405 return convertFromScalableVector(VT, CopySign, DAG, Subtarget); 3406 } 3407 3408 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( 3409 SDValue Op, SelectionDAG &DAG) const { 3410 MVT VT = Op.getSimpleValueType(); 3411 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3412 3413 MVT I1ContainerVT = 3414 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3415 3416 SDValue CC = 3417 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget); 3418 SDValue Op1 = 3419 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 3420 SDValue Op2 = 3421 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget); 3422 3423 SDLoc DL(Op); 3424 SDValue Mask, VL; 3425 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3426 3427 SDValue Select = 3428 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL); 3429 3430 return convertFromScalableVector(VT, Select, DAG, Subtarget); 3431 } 3432 3433 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, 3434 unsigned NewOpc, 3435 bool HasMask) const { 3436 MVT VT = Op.getSimpleValueType(); 3437 assert(useRVVForFixedLengthVectorVT(VT) && 3438 "Only expected to lower fixed length vector operation!"); 3439 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3440 3441 // Create list of operands by converting existing ones to scalable types. 3442 SmallVector<SDValue, 6> Ops; 3443 for (const SDValue &V : Op->op_values()) { 3444 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 3445 3446 // Pass through non-vector operands. 3447 if (!V.getValueType().isVector()) { 3448 Ops.push_back(V); 3449 continue; 3450 } 3451 3452 // "cast" fixed length vector to a scalable vector. 3453 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && 3454 "Only fixed length vectors are supported!"); 3455 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 3456 } 3457 3458 SDLoc DL(Op); 3459 SDValue Mask, VL; 3460 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3461 if (HasMask) 3462 Ops.push_back(Mask); 3463 Ops.push_back(VL); 3464 3465 SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops); 3466 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 3467 } 3468 3469 // Custom lower MGATHER to a legalized form for RVV. It will then be matched to 3470 // a RVV indexed load. The RVV indexed load/store instructions only support the 3471 // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or 3472 // truncated to XLEN and are treated as byte offsets. Any signed or scaled 3473 // indexing is extended to the XLEN value type and scaled accordingly. 3474 SDValue RISCVTargetLowering::lowerMGATHERMSCATTER(SDValue Op, 3475 SelectionDAG &DAG) const { 3476 auto *N = cast<MaskedGatherScatterSDNode>(Op.getNode()); 3477 SDLoc DL(Op); 3478 SDValue Index = N->getIndex(); 3479 SDValue Mask = N->getMask(); 3480 3481 MVT XLenVT = Subtarget.getXLenVT(); 3482 assert(N->getBasePtr().getSimpleValueType() == XLenVT && 3483 "Unexpected pointer type"); 3484 // Targets have to explicitly opt-in for extending vector loads and 3485 // truncating vector stores. 3486 const auto *MGN = dyn_cast<MaskedGatherSDNode>(N); 3487 const auto *MSN = dyn_cast<MaskedScatterSDNode>(N); 3488 assert((!MGN || MGN->getExtensionType() == ISD::NON_EXTLOAD) && 3489 "Unexpected extending MGATHER"); 3490 assert((!MSN || !MSN->isTruncatingStore()) && 3491 "Unexpected extending MSCATTER"); 3492 3493 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 3494 // the selection of the masked intrinsics doesn't do this for us. 3495 unsigned IntID = 0; 3496 MVT IndexVT = Index.getSimpleValueType(); 3497 SDValue VL = getDefaultVLOps(IndexVT, IndexVT, DL, DAG, Subtarget).second; 3498 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 3499 3500 if (IsUnmasked) 3501 IntID = MGN ? Intrinsic::riscv_vloxei : Intrinsic::riscv_vsoxei; 3502 else 3503 IntID = MGN ? Intrinsic::riscv_vloxei_mask : Intrinsic::riscv_vsoxei_mask; 3504 SmallVector<SDValue, 8> Ops{N->getChain(), 3505 DAG.getTargetConstant(IntID, DL, XLenVT)}; 3506 if (MSN) 3507 Ops.push_back(MSN->getValue()); 3508 else if (!IsUnmasked) 3509 Ops.push_back(MGN->getPassThru()); 3510 Ops.push_back(N->getBasePtr()); 3511 Ops.push_back(Index); 3512 if (!IsUnmasked) 3513 Ops.push_back(Mask); 3514 Ops.push_back(VL); 3515 return DAG.getMemIntrinsicNode( 3516 MGN ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL, N->getVTList(), 3517 Ops, N->getMemoryVT(), N->getMemOperand()); 3518 } 3519 3520 // Returns the opcode of the target-specific SDNode that implements the 32-bit 3521 // form of the given Opcode. 3522 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 3523 switch (Opcode) { 3524 default: 3525 llvm_unreachable("Unexpected opcode"); 3526 case ISD::SHL: 3527 return RISCVISD::SLLW; 3528 case ISD::SRA: 3529 return RISCVISD::SRAW; 3530 case ISD::SRL: 3531 return RISCVISD::SRLW; 3532 case ISD::SDIV: 3533 return RISCVISD::DIVW; 3534 case ISD::UDIV: 3535 return RISCVISD::DIVUW; 3536 case ISD::UREM: 3537 return RISCVISD::REMUW; 3538 case ISD::ROTL: 3539 return RISCVISD::ROLW; 3540 case ISD::ROTR: 3541 return RISCVISD::RORW; 3542 case RISCVISD::GREVI: 3543 return RISCVISD::GREVIW; 3544 case RISCVISD::GORCI: 3545 return RISCVISD::GORCIW; 3546 } 3547 } 3548 3549 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 3550 // Because i32 isn't a legal type for RV64, these operations would otherwise 3551 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 3552 // later one because the fact the operation was originally of type i32 is 3553 // lost. 3554 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 3555 unsigned ExtOpc = ISD::ANY_EXTEND) { 3556 SDLoc DL(N); 3557 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 3558 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 3559 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 3560 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 3561 // ReplaceNodeResults requires we maintain the same type for the return value. 3562 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 3563 } 3564 3565 // Converts the given 32-bit operation to a i64 operation with signed extension 3566 // semantic to reduce the signed extension instructions. 3567 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 3568 SDLoc DL(N); 3569 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3570 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 3571 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 3572 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 3573 DAG.getValueType(MVT::i32)); 3574 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 3575 } 3576 3577 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 3578 SmallVectorImpl<SDValue> &Results, 3579 SelectionDAG &DAG) const { 3580 SDLoc DL(N); 3581 switch (N->getOpcode()) { 3582 default: 3583 llvm_unreachable("Don't know how to custom type legalize this operation!"); 3584 case ISD::STRICT_FP_TO_SINT: 3585 case ISD::STRICT_FP_TO_UINT: 3586 case ISD::FP_TO_SINT: 3587 case ISD::FP_TO_UINT: { 3588 bool IsStrict = N->isStrictFPOpcode(); 3589 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3590 "Unexpected custom legalisation"); 3591 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 3592 // If the FP type needs to be softened, emit a library call using the 'si' 3593 // version. If we left it to default legalization we'd end up with 'di'. If 3594 // the FP type doesn't need to be softened just let generic type 3595 // legalization promote the result type. 3596 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 3597 TargetLowering::TypeSoftenFloat) 3598 return; 3599 RTLIB::Libcall LC; 3600 if (N->getOpcode() == ISD::FP_TO_SINT || 3601 N->getOpcode() == ISD::STRICT_FP_TO_SINT) 3602 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 3603 else 3604 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 3605 MakeLibCallOptions CallOptions; 3606 EVT OpVT = Op0.getValueType(); 3607 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 3608 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 3609 SDValue Result; 3610 std::tie(Result, Chain) = 3611 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 3612 Results.push_back(Result); 3613 if (IsStrict) 3614 Results.push_back(Chain); 3615 break; 3616 } 3617 case ISD::READCYCLECOUNTER: { 3618 assert(!Subtarget.is64Bit() && 3619 "READCYCLECOUNTER only has custom type legalization on riscv32"); 3620 3621 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 3622 SDValue RCW = 3623 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 3624 3625 Results.push_back( 3626 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 3627 Results.push_back(RCW.getValue(2)); 3628 break; 3629 } 3630 case ISD::ADD: 3631 case ISD::SUB: 3632 case ISD::MUL: 3633 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3634 "Unexpected custom legalisation"); 3635 if (N->getOperand(1).getOpcode() == ISD::Constant) 3636 return; 3637 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 3638 break; 3639 case ISD::SHL: 3640 case ISD::SRA: 3641 case ISD::SRL: 3642 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3643 "Unexpected custom legalisation"); 3644 if (N->getOperand(1).getOpcode() == ISD::Constant) 3645 return; 3646 Results.push_back(customLegalizeToWOp(N, DAG)); 3647 break; 3648 case ISD::ROTL: 3649 case ISD::ROTR: 3650 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3651 "Unexpected custom legalisation"); 3652 Results.push_back(customLegalizeToWOp(N, DAG)); 3653 break; 3654 case ISD::SDIV: 3655 case ISD::UDIV: 3656 case ISD::UREM: { 3657 MVT VT = N->getSimpleValueType(0); 3658 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 3659 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 3660 "Unexpected custom legalisation"); 3661 if (N->getOperand(0).getOpcode() == ISD::Constant || 3662 N->getOperand(1).getOpcode() == ISD::Constant) 3663 return; 3664 3665 // If the input is i32, use ANY_EXTEND since the W instructions don't read 3666 // the upper 32 bits. For other types we need to sign or zero extend 3667 // based on the opcode. 3668 unsigned ExtOpc = ISD::ANY_EXTEND; 3669 if (VT != MVT::i32) 3670 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 3671 : ISD::ZERO_EXTEND; 3672 3673 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 3674 break; 3675 } 3676 case ISD::UADDO: 3677 case ISD::USUBO: { 3678 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3679 "Unexpected custom legalisation"); 3680 bool IsAdd = N->getOpcode() == ISD::UADDO; 3681 SDLoc DL(N); 3682 // Create an ADDW or SUBW. 3683 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3684 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 3685 SDValue Res = 3686 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS); 3687 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, 3688 DAG.getValueType(MVT::i32)); 3689 3690 // Sign extend the LHS and perform an unsigned compare with the ADDW result. 3691 // Since the inputs are sign extended from i32, this is equivalent to 3692 // comparing the lower 32 bits. 3693 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 3694 SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS, 3695 IsAdd ? ISD::SETULT : ISD::SETUGT); 3696 3697 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 3698 Results.push_back(Overflow); 3699 return; 3700 } 3701 case ISD::UADDSAT: 3702 case ISD::USUBSAT: { 3703 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3704 "Unexpected custom legalisation"); 3705 SDLoc DL(N); 3706 if (Subtarget.hasStdExtZbb()) { 3707 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using 3708 // sign extend allows overflow of the lower 32 bits to be detected on 3709 // the promoted size. 3710 SDValue LHS = 3711 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 3712 SDValue RHS = 3713 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); 3714 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS); 3715 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 3716 return; 3717 } 3718 3719 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom 3720 // promotion for UADDO/USUBO. 3721 Results.push_back(expandAddSubSat(N, DAG)); 3722 return; 3723 } 3724 case ISD::BITCAST: { 3725 assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3726 Subtarget.hasStdExtF()) || 3727 (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) && 3728 "Unexpected custom legalisation"); 3729 SDValue Op0 = N->getOperand(0); 3730 if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) { 3731 if (Op0.getValueType() != MVT::f16) 3732 return; 3733 SDValue FPConv = 3734 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0); 3735 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 3736 } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3737 Subtarget.hasStdExtF()) { 3738 if (Op0.getValueType() != MVT::f32) 3739 return; 3740 SDValue FPConv = 3741 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 3742 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 3743 } 3744 break; 3745 } 3746 case RISCVISD::GREVI: 3747 case RISCVISD::GORCI: { 3748 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3749 "Unexpected custom legalisation"); 3750 // This is similar to customLegalizeToWOp, except that we pass the second 3751 // operand (a TargetConstant) straight through: it is already of type 3752 // XLenVT. 3753 SDLoc DL(N); 3754 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 3755 SDValue NewOp0 = 3756 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3757 SDValue NewRes = 3758 DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1)); 3759 // ReplaceNodeResults requires we maintain the same type for the return 3760 // value. 3761 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 3762 break; 3763 } 3764 case RISCVISD::SHFLI: { 3765 // There is no SHFLIW instruction, but we can just promote the operation. 3766 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3767 "Unexpected custom legalisation"); 3768 SDLoc DL(N); 3769 SDValue NewOp0 = 3770 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3771 SDValue NewRes = 3772 DAG.getNode(RISCVISD::SHFLI, DL, MVT::i64, NewOp0, N->getOperand(1)); 3773 // ReplaceNodeResults requires we maintain the same type for the return 3774 // value. 3775 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 3776 break; 3777 } 3778 case ISD::BSWAP: 3779 case ISD::BITREVERSE: { 3780 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3781 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 3782 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 3783 N->getOperand(0)); 3784 unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24; 3785 SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0, 3786 DAG.getTargetConstant(Imm, DL, 3787 Subtarget.getXLenVT())); 3788 // ReplaceNodeResults requires we maintain the same type for the return 3789 // value. 3790 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW)); 3791 break; 3792 } 3793 case ISD::FSHL: 3794 case ISD::FSHR: { 3795 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3796 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); 3797 SDValue NewOp0 = 3798 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3799 SDValue NewOp1 = 3800 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 3801 SDValue NewOp2 = 3802 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 3803 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. 3804 // Mask the shift amount to 5 bits. 3805 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, 3806 DAG.getConstant(0x1f, DL, MVT::i64)); 3807 unsigned Opc = 3808 N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; 3809 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); 3810 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); 3811 break; 3812 } 3813 case ISD::EXTRACT_VECTOR_ELT: { 3814 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 3815 // type is illegal (currently only vXi64 RV32). 3816 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 3817 // transferred to the destination register. We issue two of these from the 3818 // upper- and lower- halves of the SEW-bit vector element, slid down to the 3819 // first element. 3820 SDLoc DL(N); 3821 SDValue Vec = N->getOperand(0); 3822 SDValue Idx = N->getOperand(1); 3823 3824 // The vector type hasn't been legalized yet so we can't issue target 3825 // specific nodes if it needs legalization. 3826 // FIXME: We would manually legalize if it's important. 3827 if (!isTypeLegal(Vec.getValueType())) 3828 return; 3829 3830 MVT VecVT = Vec.getSimpleValueType(); 3831 3832 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 3833 VecVT.getVectorElementType() == MVT::i64 && 3834 "Unexpected EXTRACT_VECTOR_ELT legalization"); 3835 3836 // If this is a fixed vector, we need to convert it to a scalable vector. 3837 MVT ContainerVT = VecVT; 3838 if (VecVT.isFixedLengthVector()) { 3839 ContainerVT = getContainerForFixedLengthVector(VecVT); 3840 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3841 } 3842 3843 MVT XLenVT = Subtarget.getXLenVT(); 3844 3845 // Use a VL of 1 to avoid processing more elements than we need. 3846 MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); 3847 SDValue VL = DAG.getConstant(1, DL, XLenVT); 3848 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 3849 3850 // Unless the index is known to be 0, we must slide the vector down to get 3851 // the desired element into index 0. 3852 if (!isNullConstant(Idx)) { 3853 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 3854 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 3855 } 3856 3857 // Extract the lower XLEN bits of the correct vector element. 3858 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 3859 3860 // To extract the upper XLEN bits of the vector element, shift the first 3861 // element right by 32 bits and re-extract the lower XLEN bits. 3862 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 3863 DAG.getConstant(32, DL, XLenVT), VL); 3864 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, 3865 ThirtyTwoV, Mask, VL); 3866 3867 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 3868 3869 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 3870 break; 3871 } 3872 case ISD::INTRINSIC_WO_CHAIN: { 3873 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 3874 switch (IntNo) { 3875 default: 3876 llvm_unreachable( 3877 "Don't know how to custom type legalize this intrinsic!"); 3878 case Intrinsic::riscv_vmv_x_s: { 3879 EVT VT = N->getValueType(0); 3880 MVT XLenVT = Subtarget.getXLenVT(); 3881 if (VT.bitsLT(XLenVT)) { 3882 // Simple case just extract using vmv.x.s and truncate. 3883 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 3884 Subtarget.getXLenVT(), N->getOperand(1)); 3885 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 3886 return; 3887 } 3888 3889 assert(VT == MVT::i64 && !Subtarget.is64Bit() && 3890 "Unexpected custom legalization"); 3891 3892 // We need to do the move in two steps. 3893 SDValue Vec = N->getOperand(1); 3894 MVT VecVT = Vec.getSimpleValueType(); 3895 3896 // First extract the lower XLEN bits of the element. 3897 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 3898 3899 // To extract the upper XLEN bits of the vector element, shift the first 3900 // element right by 32 bits and re-extract the lower XLEN bits. 3901 SDValue VL = DAG.getConstant(1, DL, XLenVT); 3902 MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); 3903 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 3904 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, 3905 DAG.getConstant(32, DL, XLenVT), VL); 3906 SDValue LShr32 = 3907 DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL); 3908 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 3909 3910 Results.push_back( 3911 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 3912 break; 3913 } 3914 } 3915 break; 3916 } 3917 case ISD::VECREDUCE_ADD: 3918 case ISD::VECREDUCE_AND: 3919 case ISD::VECREDUCE_OR: 3920 case ISD::VECREDUCE_XOR: 3921 case ISD::VECREDUCE_SMAX: 3922 case ISD::VECREDUCE_UMAX: 3923 case ISD::VECREDUCE_SMIN: 3924 case ISD::VECREDUCE_UMIN: 3925 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG)) 3926 Results.push_back(V); 3927 break; 3928 } 3929 } 3930 3931 // A structure to hold one of the bit-manipulation patterns below. Together, a 3932 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source: 3933 // (or (and (shl x, 1), 0xAAAAAAAA), 3934 // (and (srl x, 1), 0x55555555)) 3935 struct RISCVBitmanipPat { 3936 SDValue Op; 3937 unsigned ShAmt; 3938 bool IsSHL; 3939 3940 bool formsPairWith(const RISCVBitmanipPat &Other) const { 3941 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; 3942 } 3943 }; 3944 3945 // Matches patterns of the form 3946 // (and (shl x, C2), (C1 << C2)) 3947 // (and (srl x, C2), C1) 3948 // (shl (and x, C1), C2) 3949 // (srl (and x, (C1 << C2)), C2) 3950 // Where C2 is a power of 2 and C1 has at least that many leading zeroes. 3951 // The expected masks for each shift amount are specified in BitmanipMasks where 3952 // BitmanipMasks[log2(C2)] specifies the expected C1 value. 3953 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether 3954 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible 3955 // XLen is 64. 3956 static Optional<RISCVBitmanipPat> 3957 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) { 3958 assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) && 3959 "Unexpected number of masks"); 3960 Optional<uint64_t> Mask; 3961 // Optionally consume a mask around the shift operation. 3962 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { 3963 Mask = Op.getConstantOperandVal(1); 3964 Op = Op.getOperand(0); 3965 } 3966 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) 3967 return None; 3968 bool IsSHL = Op.getOpcode() == ISD::SHL; 3969 3970 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3971 return None; 3972 uint64_t ShAmt = Op.getConstantOperandVal(1); 3973 3974 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 3975 if (ShAmt >= Width && !isPowerOf2_64(ShAmt)) 3976 return None; 3977 // If we don't have enough masks for 64 bit, then we must be trying to 3978 // match SHFL so we're only allowed to shift 1/4 of the width. 3979 if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2)) 3980 return None; 3981 3982 SDValue Src = Op.getOperand(0); 3983 3984 // The expected mask is shifted left when the AND is found around SHL 3985 // patterns. 3986 // ((x >> 1) & 0x55555555) 3987 // ((x << 1) & 0xAAAAAAAA) 3988 bool SHLExpMask = IsSHL; 3989 3990 if (!Mask) { 3991 // Sometimes LLVM keeps the mask as an operand of the shift, typically when 3992 // the mask is all ones: consume that now. 3993 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { 3994 Mask = Src.getConstantOperandVal(1); 3995 Src = Src.getOperand(0); 3996 // The expected mask is now in fact shifted left for SRL, so reverse the 3997 // decision. 3998 // ((x & 0xAAAAAAAA) >> 1) 3999 // ((x & 0x55555555) << 1) 4000 SHLExpMask = !SHLExpMask; 4001 } else { 4002 // Use a default shifted mask of all-ones if there's no AND, truncated 4003 // down to the expected width. This simplifies the logic later on. 4004 Mask = maskTrailingOnes<uint64_t>(Width); 4005 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); 4006 } 4007 } 4008 4009 unsigned MaskIdx = Log2_32(ShAmt); 4010 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 4011 4012 if (SHLExpMask) 4013 ExpMask <<= ShAmt; 4014 4015 if (Mask != ExpMask) 4016 return None; 4017 4018 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; 4019 } 4020 4021 // Matches any of the following bit-manipulation patterns: 4022 // (and (shl x, 1), (0x55555555 << 1)) 4023 // (and (srl x, 1), 0x55555555) 4024 // (shl (and x, 0x55555555), 1) 4025 // (srl (and x, (0x55555555 << 1)), 1) 4026 // where the shift amount and mask may vary thus: 4027 // [1] = 0x55555555 / 0xAAAAAAAA 4028 // [2] = 0x33333333 / 0xCCCCCCCC 4029 // [4] = 0x0F0F0F0F / 0xF0F0F0F0 4030 // [8] = 0x00FF00FF / 0xFF00FF00 4031 // [16] = 0x0000FFFF / 0xFFFFFFFF 4032 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) 4033 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) { 4034 // These are the unshifted masks which we use to match bit-manipulation 4035 // patterns. They may be shifted left in certain circumstances. 4036 static const uint64_t BitmanipMasks[] = { 4037 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 4038 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; 4039 4040 return matchRISCVBitmanipPat(Op, BitmanipMasks); 4041 } 4042 4043 // Match the following pattern as a GREVI(W) operation 4044 // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) 4045 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, 4046 const RISCVSubtarget &Subtarget) { 4047 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 4048 EVT VT = Op.getValueType(); 4049 4050 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 4051 auto LHS = matchGREVIPat(Op.getOperand(0)); 4052 auto RHS = matchGREVIPat(Op.getOperand(1)); 4053 if (LHS && RHS && LHS->formsPairWith(*RHS)) { 4054 SDLoc DL(Op); 4055 return DAG.getNode( 4056 RISCVISD::GREVI, DL, VT, LHS->Op, 4057 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 4058 } 4059 } 4060 return SDValue(); 4061 } 4062 4063 // Matches any the following pattern as a GORCI(W) operation 4064 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2 4065 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2 4066 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) 4067 // Note that with the variant of 3., 4068 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) 4069 // the inner pattern will first be matched as GREVI and then the outer 4070 // pattern will be matched to GORC via the first rule above. 4071 // 4. (or (rotl/rotr x, bitwidth/2), x) 4072 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, 4073 const RISCVSubtarget &Subtarget) { 4074 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 4075 EVT VT = Op.getValueType(); 4076 4077 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 4078 SDLoc DL(Op); 4079 SDValue Op0 = Op.getOperand(0); 4080 SDValue Op1 = Op.getOperand(1); 4081 4082 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { 4083 if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X && 4084 isPowerOf2_32(Reverse.getConstantOperandVal(1))) 4085 return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1)); 4086 // We can also form GORCI from ROTL/ROTR by half the bitwidth. 4087 if ((Reverse.getOpcode() == ISD::ROTL || 4088 Reverse.getOpcode() == ISD::ROTR) && 4089 Reverse.getOperand(0) == X && 4090 isa<ConstantSDNode>(Reverse.getOperand(1))) { 4091 uint64_t RotAmt = Reverse.getConstantOperandVal(1); 4092 if (RotAmt == (VT.getSizeInBits() / 2)) 4093 return DAG.getNode( 4094 RISCVISD::GORCI, DL, VT, X, 4095 DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT())); 4096 } 4097 return SDValue(); 4098 }; 4099 4100 // Check for either commutable permutation of (or (GREVI x, shamt), x) 4101 if (SDValue V = MatchOROfReverse(Op0, Op1)) 4102 return V; 4103 if (SDValue V = MatchOROfReverse(Op1, Op0)) 4104 return V; 4105 4106 // OR is commutable so canonicalize its OR operand to the left 4107 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) 4108 std::swap(Op0, Op1); 4109 if (Op0.getOpcode() != ISD::OR) 4110 return SDValue(); 4111 SDValue OrOp0 = Op0.getOperand(0); 4112 SDValue OrOp1 = Op0.getOperand(1); 4113 auto LHS = matchGREVIPat(OrOp0); 4114 // OR is commutable so swap the operands and try again: x might have been 4115 // on the left 4116 if (!LHS) { 4117 std::swap(OrOp0, OrOp1); 4118 LHS = matchGREVIPat(OrOp0); 4119 } 4120 auto RHS = matchGREVIPat(Op1); 4121 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { 4122 return DAG.getNode( 4123 RISCVISD::GORCI, DL, VT, LHS->Op, 4124 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 4125 } 4126 } 4127 return SDValue(); 4128 } 4129 4130 // Matches any of the following bit-manipulation patterns: 4131 // (and (shl x, 1), (0x22222222 << 1)) 4132 // (and (srl x, 1), 0x22222222) 4133 // (shl (and x, 0x22222222), 1) 4134 // (srl (and x, (0x22222222 << 1)), 1) 4135 // where the shift amount and mask may vary thus: 4136 // [1] = 0x22222222 / 0x44444444 4137 // [2] = 0x0C0C0C0C / 0x3C3C3C3C 4138 // [4] = 0x00F000F0 / 0x0F000F00 4139 // [8] = 0x0000FF00 / 0x00FF0000 4140 // [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64) 4141 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) { 4142 // These are the unshifted masks which we use to match bit-manipulation 4143 // patterns. They may be shifted left in certain circumstances. 4144 static const uint64_t BitmanipMasks[] = { 4145 0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL, 4146 0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL}; 4147 4148 return matchRISCVBitmanipPat(Op, BitmanipMasks); 4149 } 4150 4151 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x) 4152 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG, 4153 const RISCVSubtarget &Subtarget) { 4154 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 4155 EVT VT = Op.getValueType(); 4156 4157 if (VT != MVT::i32 && VT != Subtarget.getXLenVT()) 4158 return SDValue(); 4159 4160 SDValue Op0 = Op.getOperand(0); 4161 SDValue Op1 = Op.getOperand(1); 4162 4163 // Or is commutable so canonicalize the second OR to the LHS. 4164 if (Op0.getOpcode() != ISD::OR) 4165 std::swap(Op0, Op1); 4166 if (Op0.getOpcode() != ISD::OR) 4167 return SDValue(); 4168 4169 // We found an inner OR, so our operands are the operands of the inner OR 4170 // and the other operand of the outer OR. 4171 SDValue A = Op0.getOperand(0); 4172 SDValue B = Op0.getOperand(1); 4173 SDValue C = Op1; 4174 4175 auto Match1 = matchSHFLPat(A); 4176 auto Match2 = matchSHFLPat(B); 4177 4178 // If neither matched, we failed. 4179 if (!Match1 && !Match2) 4180 return SDValue(); 4181 4182 // We had at least one match. if one failed, try the remaining C operand. 4183 if (!Match1) { 4184 std::swap(A, C); 4185 Match1 = matchSHFLPat(A); 4186 if (!Match1) 4187 return SDValue(); 4188 } else if (!Match2) { 4189 std::swap(B, C); 4190 Match2 = matchSHFLPat(B); 4191 if (!Match2) 4192 return SDValue(); 4193 } 4194 assert(Match1 && Match2); 4195 4196 // Make sure our matches pair up. 4197 if (!Match1->formsPairWith(*Match2)) 4198 return SDValue(); 4199 4200 // All the remains is to make sure C is an AND with the same input, that masks 4201 // out the bits that are being shuffled. 4202 if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) || 4203 C.getOperand(0) != Match1->Op) 4204 return SDValue(); 4205 4206 uint64_t Mask = C.getConstantOperandVal(1); 4207 4208 static const uint64_t BitmanipMasks[] = { 4209 0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL, 4210 0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL, 4211 }; 4212 4213 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 4214 unsigned MaskIdx = Log2_32(Match1->ShAmt); 4215 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 4216 4217 if (Mask != ExpMask) 4218 return SDValue(); 4219 4220 SDLoc DL(Op); 4221 return DAG.getNode( 4222 RISCVISD::SHFLI, DL, VT, Match1->Op, 4223 DAG.getTargetConstant(Match1->ShAmt, DL, Subtarget.getXLenVT())); 4224 } 4225 4226 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is 4227 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. 4228 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does 4229 // not undo itself, but they are redundant. 4230 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { 4231 unsigned ShAmt1 = N->getConstantOperandVal(1); 4232 SDValue Src = N->getOperand(0); 4233 4234 if (Src.getOpcode() != N->getOpcode()) 4235 return SDValue(); 4236 4237 unsigned ShAmt2 = Src.getConstantOperandVal(1); 4238 Src = Src.getOperand(0); 4239 4240 unsigned CombinedShAmt; 4241 if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW) 4242 CombinedShAmt = ShAmt1 | ShAmt2; 4243 else 4244 CombinedShAmt = ShAmt1 ^ ShAmt2; 4245 4246 if (CombinedShAmt == 0) 4247 return Src; 4248 4249 SDLoc DL(N); 4250 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src, 4251 DAG.getTargetConstant(CombinedShAmt, DL, 4252 N->getOperand(1).getValueType())); 4253 } 4254 4255 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 4256 DAGCombinerInfo &DCI) const { 4257 SelectionDAG &DAG = DCI.DAG; 4258 4259 switch (N->getOpcode()) { 4260 default: 4261 break; 4262 case RISCVISD::SplitF64: { 4263 SDValue Op0 = N->getOperand(0); 4264 // If the input to SplitF64 is just BuildPairF64 then the operation is 4265 // redundant. Instead, use BuildPairF64's operands directly. 4266 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 4267 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 4268 4269 SDLoc DL(N); 4270 4271 // It's cheaper to materialise two 32-bit integers than to load a double 4272 // from the constant pool and transfer it to integer registers through the 4273 // stack. 4274 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 4275 APInt V = C->getValueAPF().bitcastToAPInt(); 4276 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 4277 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 4278 return DCI.CombineTo(N, Lo, Hi); 4279 } 4280 4281 // This is a target-specific version of a DAGCombine performed in 4282 // DAGCombiner::visitBITCAST. It performs the equivalent of: 4283 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 4284 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 4285 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 4286 !Op0.getNode()->hasOneUse()) 4287 break; 4288 SDValue NewSplitF64 = 4289 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 4290 Op0.getOperand(0)); 4291 SDValue Lo = NewSplitF64.getValue(0); 4292 SDValue Hi = NewSplitF64.getValue(1); 4293 APInt SignBit = APInt::getSignMask(32); 4294 if (Op0.getOpcode() == ISD::FNEG) { 4295 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 4296 DAG.getConstant(SignBit, DL, MVT::i32)); 4297 return DCI.CombineTo(N, Lo, NewHi); 4298 } 4299 assert(Op0.getOpcode() == ISD::FABS); 4300 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 4301 DAG.getConstant(~SignBit, DL, MVT::i32)); 4302 return DCI.CombineTo(N, Lo, NewHi); 4303 } 4304 case RISCVISD::SLLW: 4305 case RISCVISD::SRAW: 4306 case RISCVISD::SRLW: 4307 case RISCVISD::ROLW: 4308 case RISCVISD::RORW: { 4309 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 4310 SDValue LHS = N->getOperand(0); 4311 SDValue RHS = N->getOperand(1); 4312 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 4313 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 4314 if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) || 4315 SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) { 4316 if (N->getOpcode() != ISD::DELETED_NODE) 4317 DCI.AddToWorklist(N); 4318 return SDValue(N, 0); 4319 } 4320 break; 4321 } 4322 case RISCVISD::FSL: 4323 case RISCVISD::FSR: { 4324 // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read. 4325 SDValue ShAmt = N->getOperand(2); 4326 unsigned BitWidth = ShAmt.getValueSizeInBits(); 4327 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width"); 4328 APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1); 4329 if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 4330 if (N->getOpcode() != ISD::DELETED_NODE) 4331 DCI.AddToWorklist(N); 4332 return SDValue(N, 0); 4333 } 4334 break; 4335 } 4336 case RISCVISD::FSLW: 4337 case RISCVISD::FSRW: { 4338 // Only the lower 32 bits of Values and lower 6 bits of shift amount are 4339 // read. 4340 SDValue Op0 = N->getOperand(0); 4341 SDValue Op1 = N->getOperand(1); 4342 SDValue ShAmt = N->getOperand(2); 4343 APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 4344 APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6); 4345 if (SimplifyDemandedBits(Op0, OpMask, DCI) || 4346 SimplifyDemandedBits(Op1, OpMask, DCI) || 4347 SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 4348 if (N->getOpcode() != ISD::DELETED_NODE) 4349 DCI.AddToWorklist(N); 4350 return SDValue(N, 0); 4351 } 4352 break; 4353 } 4354 case RISCVISD::GREVIW: 4355 case RISCVISD::GORCIW: { 4356 // Only the lower 32 bits of the first operand are read 4357 SDValue Op0 = N->getOperand(0); 4358 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 4359 if (SimplifyDemandedBits(Op0, Mask, DCI)) { 4360 if (N->getOpcode() != ISD::DELETED_NODE) 4361 DCI.AddToWorklist(N); 4362 return SDValue(N, 0); 4363 } 4364 4365 return combineGREVI_GORCI(N, DCI.DAG); 4366 } 4367 case RISCVISD::FMV_X_ANYEXTW_RV64: { 4368 SDLoc DL(N); 4369 SDValue Op0 = N->getOperand(0); 4370 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 4371 // conversion is unnecessary and can be replaced with an ANY_EXTEND 4372 // of the FMV_W_X_RV64 operand. 4373 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 4374 assert(Op0.getOperand(0).getValueType() == MVT::i64 && 4375 "Unexpected value type!"); 4376 return Op0.getOperand(0); 4377 } 4378 4379 // This is a target-specific version of a DAGCombine performed in 4380 // DAGCombiner::visitBITCAST. It performs the equivalent of: 4381 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 4382 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 4383 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 4384 !Op0.getNode()->hasOneUse()) 4385 break; 4386 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 4387 Op0.getOperand(0)); 4388 APInt SignBit = APInt::getSignMask(32).sext(64); 4389 if (Op0.getOpcode() == ISD::FNEG) 4390 return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 4391 DAG.getConstant(SignBit, DL, MVT::i64)); 4392 4393 assert(Op0.getOpcode() == ISD::FABS); 4394 return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 4395 DAG.getConstant(~SignBit, DL, MVT::i64)); 4396 } 4397 case RISCVISD::GREVI: 4398 case RISCVISD::GORCI: 4399 return combineGREVI_GORCI(N, DCI.DAG); 4400 case ISD::OR: 4401 if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) 4402 return GREV; 4403 if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) 4404 return GORC; 4405 if (auto SHFL = combineORToSHFL(SDValue(N, 0), DCI.DAG, Subtarget)) 4406 return SHFL; 4407 break; 4408 case RISCVISD::SELECT_CC: { 4409 // Transform 4410 SDValue LHS = N->getOperand(0); 4411 SDValue RHS = N->getOperand(1); 4412 auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2)); 4413 if (!ISD::isIntEqualitySetCC(CCVal)) 4414 break; 4415 4416 // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) -> 4417 // (select_cc X, Y, lt, trueV, falseV) 4418 // Sometimes the setcc is introduced after select_cc has been formed. 4419 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && 4420 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { 4421 // If we're looking for eq 0 instead of ne 0, we need to invert the 4422 // condition. 4423 bool Invert = CCVal == ISD::SETEQ; 4424 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); 4425 if (Invert) 4426 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4427 4428 SDLoc DL(N); 4429 RHS = LHS.getOperand(1); 4430 LHS = LHS.getOperand(0); 4431 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 4432 4433 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 4434 return DAG.getNode( 4435 RISCVISD::SELECT_CC, DL, N->getValueType(0), 4436 {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)}); 4437 } 4438 4439 // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) -> 4440 // (select_cc X, Y, eq/ne, trueV, falseV) 4441 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) 4442 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0), 4443 {LHS.getOperand(0), LHS.getOperand(1), 4444 N->getOperand(2), N->getOperand(3), 4445 N->getOperand(4)}); 4446 // (select_cc X, 1, setne, trueV, falseV) -> 4447 // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. 4448 // This can occur when legalizing some floating point comparisons. 4449 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 4450 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { 4451 SDLoc DL(N); 4452 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4453 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 4454 RHS = DAG.getConstant(0, DL, LHS.getValueType()); 4455 return DAG.getNode( 4456 RISCVISD::SELECT_CC, DL, N->getValueType(0), 4457 {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)}); 4458 } 4459 4460 break; 4461 } 4462 case RISCVISD::BR_CC: { 4463 SDValue LHS = N->getOperand(1); 4464 SDValue RHS = N->getOperand(2); 4465 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get(); 4466 if (!ISD::isIntEqualitySetCC(CCVal)) 4467 break; 4468 4469 // Fold (br_cc (setlt X, Y), 0, ne, dest) -> 4470 // (br_cc X, Y, lt, dest) 4471 // Sometimes the setcc is introduced after br_cc has been formed. 4472 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && 4473 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { 4474 // If we're looking for eq 0 instead of ne 0, we need to invert the 4475 // condition. 4476 bool Invert = CCVal == ISD::SETEQ; 4477 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); 4478 if (Invert) 4479 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4480 4481 SDLoc DL(N); 4482 RHS = LHS.getOperand(1); 4483 LHS = LHS.getOperand(0); 4484 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 4485 4486 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), 4487 N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal), 4488 N->getOperand(4)); 4489 } 4490 4491 // Fold (br_cc (xor X, Y), 0, eq/ne, dest) -> 4492 // (br_cc X, Y, eq/ne, trueV, falseV) 4493 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) 4494 return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0), 4495 N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1), 4496 N->getOperand(3), N->getOperand(4)); 4497 4498 // (br_cc X, 1, setne, br_cc) -> 4499 // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1. 4500 // This can occur when legalizing some floating point comparisons. 4501 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 4502 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { 4503 SDLoc DL(N); 4504 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4505 SDValue TargetCC = DAG.getCondCode(CCVal); 4506 RHS = DAG.getConstant(0, DL, LHS.getValueType()); 4507 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), 4508 N->getOperand(0), LHS, RHS, TargetCC, 4509 N->getOperand(4)); 4510 } 4511 break; 4512 } 4513 case ISD::FCOPYSIGN: { 4514 EVT VT = N->getValueType(0); 4515 if (!VT.isVector()) 4516 break; 4517 // There is a form of VFSGNJ which injects the negated sign of its second 4518 // operand. Try and bubble any FNEG up after the extend/round to produce 4519 // this optimized pattern. Avoid modifying cases where FP_ROUND and 4520 // TRUNC=1. 4521 SDValue In2 = N->getOperand(1); 4522 // Avoid cases where the extend/round has multiple uses, as duplicating 4523 // those is typically more expensive than removing a fneg. 4524 if (!In2.hasOneUse()) 4525 break; 4526 if (In2.getOpcode() != ISD::FP_EXTEND && 4527 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0)) 4528 break; 4529 In2 = In2.getOperand(0); 4530 if (In2.getOpcode() != ISD::FNEG) 4531 break; 4532 SDLoc DL(N); 4533 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT); 4534 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), 4535 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); 4536 } 4537 case ISD::MGATHER: 4538 case ISD::MSCATTER: { 4539 if (!DCI.isBeforeLegalize()) 4540 break; 4541 MaskedGatherScatterSDNode *MGSN = cast<MaskedGatherScatterSDNode>(N); 4542 SDValue Index = MGSN->getIndex(); 4543 EVT IndexVT = Index.getValueType(); 4544 MVT XLenVT = Subtarget.getXLenVT(); 4545 // RISCV indexed loads only support the "unsigned unscaled" addressing 4546 // mode, so anything else must be manually legalized. 4547 bool NeedsIdxLegalization = MGSN->isIndexScaled() || 4548 (MGSN->isIndexSigned() && 4549 IndexVT.getVectorElementType().bitsLT(XLenVT)); 4550 if (!NeedsIdxLegalization) 4551 break; 4552 4553 SDLoc DL(N); 4554 4555 // Any index legalization should first promote to XLenVT, so we don't lose 4556 // bits when scaling. This may create an illegal index type so we let 4557 // LLVM's legalization take care of the splitting. 4558 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) { 4559 IndexVT = IndexVT.changeVectorElementType(XLenVT); 4560 Index = DAG.getNode(MGSN->isIndexSigned() ? ISD::SIGN_EXTEND 4561 : ISD::ZERO_EXTEND, 4562 DL, IndexVT, Index); 4563 } 4564 4565 unsigned Scale = N->getConstantOperandVal(5); 4566 if (MGSN->isIndexScaled() && Scale != 1) { 4567 // Manually scale the indices by the element size. 4568 // TODO: Sanitize the scale operand here? 4569 assert(isPowerOf2_32(Scale) && "Expecting power-of-two types"); 4570 SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT); 4571 Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale); 4572 } 4573 4574 ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED; 4575 if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N)) { 4576 return DAG.getMaskedGather( 4577 N->getVTList(), MGSN->getMemoryVT(), DL, 4578 {MGSN->getChain(), MGN->getPassThru(), MGSN->getMask(), 4579 MGSN->getBasePtr(), Index, MGN->getScale()}, 4580 MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType()); 4581 } 4582 const auto *MSN = cast<MaskedScatterSDNode>(N); 4583 return DAG.getMaskedScatter( 4584 N->getVTList(), MGSN->getMemoryVT(), DL, 4585 {MGSN->getChain(), MSN->getValue(), MGSN->getMask(), MGSN->getBasePtr(), 4586 Index, MGSN->getScale()}, 4587 MGSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore()); 4588 } 4589 } 4590 4591 return SDValue(); 4592 } 4593 4594 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 4595 const SDNode *N, CombineLevel Level) const { 4596 // The following folds are only desirable if `(OP _, c1 << c2)` can be 4597 // materialised in fewer instructions than `(OP _, c1)`: 4598 // 4599 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 4600 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 4601 SDValue N0 = N->getOperand(0); 4602 EVT Ty = N0.getValueType(); 4603 if (Ty.isScalarInteger() && 4604 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 4605 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 4606 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 4607 if (C1 && C2) { 4608 const APInt &C1Int = C1->getAPIntValue(); 4609 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 4610 4611 // We can materialise `c1 << c2` into an add immediate, so it's "free", 4612 // and the combine should happen, to potentially allow further combines 4613 // later. 4614 if (ShiftedC1Int.getMinSignedBits() <= 64 && 4615 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 4616 return true; 4617 4618 // We can materialise `c1` in an add immediate, so it's "free", and the 4619 // combine should be prevented. 4620 if (C1Int.getMinSignedBits() <= 64 && 4621 isLegalAddImmediate(C1Int.getSExtValue())) 4622 return false; 4623 4624 // Neither constant will fit into an immediate, so find materialisation 4625 // costs. 4626 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 4627 Subtarget.is64Bit()); 4628 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 4629 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); 4630 4631 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 4632 // combine should be prevented. 4633 if (C1Cost < ShiftedC1Cost) 4634 return false; 4635 } 4636 } 4637 return true; 4638 } 4639 4640 bool RISCVTargetLowering::targetShrinkDemandedConstant( 4641 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 4642 TargetLoweringOpt &TLO) const { 4643 // Delay this optimization as late as possible. 4644 if (!TLO.LegalOps) 4645 return false; 4646 4647 EVT VT = Op.getValueType(); 4648 if (VT.isVector()) 4649 return false; 4650 4651 // Only handle AND for now. 4652 if (Op.getOpcode() != ISD::AND) 4653 return false; 4654 4655 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 4656 if (!C) 4657 return false; 4658 4659 const APInt &Mask = C->getAPIntValue(); 4660 4661 // Clear all non-demanded bits initially. 4662 APInt ShrunkMask = Mask & DemandedBits; 4663 4664 // If the shrunk mask fits in sign extended 12 bits, let the target 4665 // independent code apply it. 4666 if (ShrunkMask.isSignedIntN(12)) 4667 return false; 4668 4669 // Try to make a smaller immediate by setting undemanded bits. 4670 4671 // We need to be able to make a negative number through a combination of mask 4672 // and undemanded bits. 4673 APInt ExpandedMask = Mask | ~DemandedBits; 4674 if (!ExpandedMask.isNegative()) 4675 return false; 4676 4677 // What is the fewest number of bits we need to represent the negative number. 4678 unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); 4679 4680 // Try to make a 12 bit negative immediate. If that fails try to make a 32 4681 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 4682 APInt NewMask = ShrunkMask; 4683 if (MinSignedBits <= 12) 4684 NewMask.setBitsFrom(11); 4685 else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 4686 NewMask.setBitsFrom(31); 4687 else 4688 return false; 4689 4690 // Sanity check that our new mask is a subset of the demanded mask. 4691 assert(NewMask.isSubsetOf(ExpandedMask)); 4692 4693 // If we aren't changing the mask, just return true to keep it and prevent 4694 // the caller from optimizing. 4695 if (NewMask == Mask) 4696 return true; 4697 4698 // Replace the constant with the new mask. 4699 SDLoc DL(Op); 4700 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); 4701 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); 4702 return TLO.CombineTo(Op, NewOp); 4703 } 4704 4705 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 4706 KnownBits &Known, 4707 const APInt &DemandedElts, 4708 const SelectionDAG &DAG, 4709 unsigned Depth) const { 4710 unsigned BitWidth = Known.getBitWidth(); 4711 unsigned Opc = Op.getOpcode(); 4712 assert((Opc >= ISD::BUILTIN_OP_END || 4713 Opc == ISD::INTRINSIC_WO_CHAIN || 4714 Opc == ISD::INTRINSIC_W_CHAIN || 4715 Opc == ISD::INTRINSIC_VOID) && 4716 "Should use MaskedValueIsZero if you don't know whether Op" 4717 " is a target node!"); 4718 4719 Known.resetAll(); 4720 switch (Opc) { 4721 default: break; 4722 case RISCVISD::SELECT_CC: { 4723 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1); 4724 // If we don't know any bits, early out. 4725 if (Known.isUnknown()) 4726 break; 4727 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1); 4728 4729 // Only known if known in both the LHS and RHS. 4730 Known = KnownBits::commonBits(Known, Known2); 4731 break; 4732 } 4733 case RISCVISD::REMUW: { 4734 KnownBits Known2; 4735 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4736 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4737 // We only care about the lower 32 bits. 4738 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 4739 // Restore the original width by sign extending. 4740 Known = Known.sext(BitWidth); 4741 break; 4742 } 4743 case RISCVISD::DIVUW: { 4744 KnownBits Known2; 4745 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4746 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4747 // We only care about the lower 32 bits. 4748 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 4749 // Restore the original width by sign extending. 4750 Known = Known.sext(BitWidth); 4751 break; 4752 } 4753 case RISCVISD::READ_VLENB: 4754 // We assume VLENB is at least 8 bytes. 4755 // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits. 4756 Known.Zero.setLowBits(3); 4757 break; 4758 } 4759 } 4760 4761 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 4762 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 4763 unsigned Depth) const { 4764 switch (Op.getOpcode()) { 4765 default: 4766 break; 4767 case RISCVISD::SLLW: 4768 case RISCVISD::SRAW: 4769 case RISCVISD::SRLW: 4770 case RISCVISD::DIVW: 4771 case RISCVISD::DIVUW: 4772 case RISCVISD::REMUW: 4773 case RISCVISD::ROLW: 4774 case RISCVISD::RORW: 4775 case RISCVISD::GREVIW: 4776 case RISCVISD::GORCIW: 4777 case RISCVISD::FSLW: 4778 case RISCVISD::FSRW: 4779 // TODO: As the result is sign-extended, this is conservatively correct. A 4780 // more precise answer could be calculated for SRAW depending on known 4781 // bits in the shift amount. 4782 return 33; 4783 case RISCVISD::SHFLI: { 4784 // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word 4785 // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but 4786 // will stay within the upper 32 bits. If there were more than 32 sign bits 4787 // before there will be at least 33 sign bits after. 4788 if (Op.getValueType() == MVT::i64 && 4789 (Op.getConstantOperandVal(1) & 0x10) == 0) { 4790 unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); 4791 if (Tmp > 32) 4792 return 33; 4793 } 4794 break; 4795 } 4796 case RISCVISD::VMV_X_S: 4797 // The number of sign bits of the scalar result is computed by obtaining the 4798 // element type of the input vector operand, subtracting its width from the 4799 // XLEN, and then adding one (sign bit within the element type). If the 4800 // element type is wider than XLen, the least-significant XLEN bits are 4801 // taken. 4802 if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen()) 4803 return 1; 4804 return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; 4805 } 4806 4807 return 1; 4808 } 4809 4810 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 4811 MachineBasicBlock *BB) { 4812 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 4813 4814 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 4815 // Should the count have wrapped while it was being read, we need to try 4816 // again. 4817 // ... 4818 // read: 4819 // rdcycleh x3 # load high word of cycle 4820 // rdcycle x2 # load low word of cycle 4821 // rdcycleh x4 # load high word of cycle 4822 // bne x3, x4, read # check if high word reads match, otherwise try again 4823 // ... 4824 4825 MachineFunction &MF = *BB->getParent(); 4826 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4827 MachineFunction::iterator It = ++BB->getIterator(); 4828 4829 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 4830 MF.insert(It, LoopMBB); 4831 4832 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 4833 MF.insert(It, DoneMBB); 4834 4835 // Transfer the remainder of BB and its successor edges to DoneMBB. 4836 DoneMBB->splice(DoneMBB->begin(), BB, 4837 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 4838 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 4839 4840 BB->addSuccessor(LoopMBB); 4841 4842 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4843 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 4844 Register LoReg = MI.getOperand(0).getReg(); 4845 Register HiReg = MI.getOperand(1).getReg(); 4846 DebugLoc DL = MI.getDebugLoc(); 4847 4848 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 4849 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 4850 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 4851 .addReg(RISCV::X0); 4852 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 4853 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 4854 .addReg(RISCV::X0); 4855 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 4856 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 4857 .addReg(RISCV::X0); 4858 4859 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 4860 .addReg(HiReg) 4861 .addReg(ReadAgainReg) 4862 .addMBB(LoopMBB); 4863 4864 LoopMBB->addSuccessor(LoopMBB); 4865 LoopMBB->addSuccessor(DoneMBB); 4866 4867 MI.eraseFromParent(); 4868 4869 return DoneMBB; 4870 } 4871 4872 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 4873 MachineBasicBlock *BB) { 4874 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 4875 4876 MachineFunction &MF = *BB->getParent(); 4877 DebugLoc DL = MI.getDebugLoc(); 4878 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 4879 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 4880 Register LoReg = MI.getOperand(0).getReg(); 4881 Register HiReg = MI.getOperand(1).getReg(); 4882 Register SrcReg = MI.getOperand(2).getReg(); 4883 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 4884 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 4885 4886 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 4887 RI); 4888 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 4889 MachineMemOperand *MMOLo = 4890 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 4891 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 4892 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 4893 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 4894 .addFrameIndex(FI) 4895 .addImm(0) 4896 .addMemOperand(MMOLo); 4897 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 4898 .addFrameIndex(FI) 4899 .addImm(4) 4900 .addMemOperand(MMOHi); 4901 MI.eraseFromParent(); // The pseudo instruction is gone now. 4902 return BB; 4903 } 4904 4905 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 4906 MachineBasicBlock *BB) { 4907 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 4908 "Unexpected instruction"); 4909 4910 MachineFunction &MF = *BB->getParent(); 4911 DebugLoc DL = MI.getDebugLoc(); 4912 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 4913 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 4914 Register DstReg = MI.getOperand(0).getReg(); 4915 Register LoReg = MI.getOperand(1).getReg(); 4916 Register HiReg = MI.getOperand(2).getReg(); 4917 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 4918 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 4919 4920 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 4921 MachineMemOperand *MMOLo = 4922 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 4923 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 4924 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 4925 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 4926 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 4927 .addFrameIndex(FI) 4928 .addImm(0) 4929 .addMemOperand(MMOLo); 4930 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 4931 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 4932 .addFrameIndex(FI) 4933 .addImm(4) 4934 .addMemOperand(MMOHi); 4935 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 4936 MI.eraseFromParent(); // The pseudo instruction is gone now. 4937 return BB; 4938 } 4939 4940 static bool isSelectPseudo(MachineInstr &MI) { 4941 switch (MI.getOpcode()) { 4942 default: 4943 return false; 4944 case RISCV::Select_GPR_Using_CC_GPR: 4945 case RISCV::Select_FPR16_Using_CC_GPR: 4946 case RISCV::Select_FPR32_Using_CC_GPR: 4947 case RISCV::Select_FPR64_Using_CC_GPR: 4948 return true; 4949 } 4950 } 4951 4952 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 4953 MachineBasicBlock *BB) { 4954 // To "insert" Select_* instructions, we actually have to insert the triangle 4955 // control-flow pattern. The incoming instructions know the destination vreg 4956 // to set, the condition code register to branch on, the true/false values to 4957 // select between, and the condcode to use to select the appropriate branch. 4958 // 4959 // We produce the following control flow: 4960 // HeadMBB 4961 // | \ 4962 // | IfFalseMBB 4963 // | / 4964 // TailMBB 4965 // 4966 // When we find a sequence of selects we attempt to optimize their emission 4967 // by sharing the control flow. Currently we only handle cases where we have 4968 // multiple selects with the exact same condition (same LHS, RHS and CC). 4969 // The selects may be interleaved with other instructions if the other 4970 // instructions meet some requirements we deem safe: 4971 // - They are debug instructions. Otherwise, 4972 // - They do not have side-effects, do not access memory and their inputs do 4973 // not depend on the results of the select pseudo-instructions. 4974 // The TrueV/FalseV operands of the selects cannot depend on the result of 4975 // previous selects in the sequence. 4976 // These conditions could be further relaxed. See the X86 target for a 4977 // related approach and more information. 4978 Register LHS = MI.getOperand(1).getReg(); 4979 Register RHS = MI.getOperand(2).getReg(); 4980 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 4981 4982 SmallVector<MachineInstr *, 4> SelectDebugValues; 4983 SmallSet<Register, 4> SelectDests; 4984 SelectDests.insert(MI.getOperand(0).getReg()); 4985 4986 MachineInstr *LastSelectPseudo = &MI; 4987 4988 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 4989 SequenceMBBI != E; ++SequenceMBBI) { 4990 if (SequenceMBBI->isDebugInstr()) 4991 continue; 4992 else if (isSelectPseudo(*SequenceMBBI)) { 4993 if (SequenceMBBI->getOperand(1).getReg() != LHS || 4994 SequenceMBBI->getOperand(2).getReg() != RHS || 4995 SequenceMBBI->getOperand(3).getImm() != CC || 4996 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 4997 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 4998 break; 4999 LastSelectPseudo = &*SequenceMBBI; 5000 SequenceMBBI->collectDebugValues(SelectDebugValues); 5001 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 5002 } else { 5003 if (SequenceMBBI->hasUnmodeledSideEffects() || 5004 SequenceMBBI->mayLoadOrStore()) 5005 break; 5006 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 5007 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 5008 })) 5009 break; 5010 } 5011 } 5012 5013 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 5014 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5015 DebugLoc DL = MI.getDebugLoc(); 5016 MachineFunction::iterator I = ++BB->getIterator(); 5017 5018 MachineBasicBlock *HeadMBB = BB; 5019 MachineFunction *F = BB->getParent(); 5020 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 5021 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 5022 5023 F->insert(I, IfFalseMBB); 5024 F->insert(I, TailMBB); 5025 5026 // Transfer debug instructions associated with the selects to TailMBB. 5027 for (MachineInstr *DebugInstr : SelectDebugValues) { 5028 TailMBB->push_back(DebugInstr->removeFromParent()); 5029 } 5030 5031 // Move all instructions after the sequence to TailMBB. 5032 TailMBB->splice(TailMBB->end(), HeadMBB, 5033 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 5034 // Update machine-CFG edges by transferring all successors of the current 5035 // block to the new block which will contain the Phi nodes for the selects. 5036 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 5037 // Set the successors for HeadMBB. 5038 HeadMBB->addSuccessor(IfFalseMBB); 5039 HeadMBB->addSuccessor(TailMBB); 5040 5041 // Insert appropriate branch. 5042 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 5043 5044 BuildMI(HeadMBB, DL, TII.get(Opcode)) 5045 .addReg(LHS) 5046 .addReg(RHS) 5047 .addMBB(TailMBB); 5048 5049 // IfFalseMBB just falls through to TailMBB. 5050 IfFalseMBB->addSuccessor(TailMBB); 5051 5052 // Create PHIs for all of the select pseudo-instructions. 5053 auto SelectMBBI = MI.getIterator(); 5054 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 5055 auto InsertionPoint = TailMBB->begin(); 5056 while (SelectMBBI != SelectEnd) { 5057 auto Next = std::next(SelectMBBI); 5058 if (isSelectPseudo(*SelectMBBI)) { 5059 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 5060 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 5061 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 5062 .addReg(SelectMBBI->getOperand(4).getReg()) 5063 .addMBB(HeadMBB) 5064 .addReg(SelectMBBI->getOperand(5).getReg()) 5065 .addMBB(IfFalseMBB); 5066 SelectMBBI->eraseFromParent(); 5067 } 5068 SelectMBBI = Next; 5069 } 5070 5071 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 5072 return TailMBB; 5073 } 5074 5075 static MachineInstr *elideCopies(MachineInstr *MI, 5076 const MachineRegisterInfo &MRI) { 5077 while (true) { 5078 if (!MI->isFullCopy()) 5079 return MI; 5080 if (!Register::isVirtualRegister(MI->getOperand(1).getReg())) 5081 return nullptr; 5082 MI = MRI.getVRegDef(MI->getOperand(1).getReg()); 5083 if (!MI) 5084 return nullptr; 5085 } 5086 } 5087 5088 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, 5089 int VLIndex, unsigned SEWIndex, 5090 RISCVVLMUL VLMul, bool ForceTailAgnostic) { 5091 MachineFunction &MF = *BB->getParent(); 5092 DebugLoc DL = MI.getDebugLoc(); 5093 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 5094 5095 unsigned SEW = MI.getOperand(SEWIndex).getImm(); 5096 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 5097 RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8)); 5098 5099 MachineRegisterInfo &MRI = MF.getRegInfo(); 5100 5101 auto BuildVSETVLI = [&]() { 5102 if (VLIndex >= 0) { 5103 Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 5104 Register VLReg = MI.getOperand(VLIndex).getReg(); 5105 5106 // VL might be a compile time constant, but isel would have to put it 5107 // in a register. See if VL comes from an ADDI X0, imm. 5108 if (VLReg.isVirtual()) { 5109 MachineInstr *Def = MRI.getVRegDef(VLReg); 5110 if (Def && Def->getOpcode() == RISCV::ADDI && 5111 Def->getOperand(1).getReg() == RISCV::X0 && 5112 Def->getOperand(2).isImm()) { 5113 uint64_t Imm = Def->getOperand(2).getImm(); 5114 // VSETIVLI allows a 5-bit zero extended immediate. 5115 if (isUInt<5>(Imm)) 5116 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI)) 5117 .addReg(DestReg, RegState::Define | RegState::Dead) 5118 .addImm(Imm); 5119 } 5120 } 5121 5122 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) 5123 .addReg(DestReg, RegState::Define | RegState::Dead) 5124 .addReg(VLReg); 5125 } 5126 5127 // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). 5128 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) 5129 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 5130 .addReg(RISCV::X0, RegState::Kill); 5131 }; 5132 5133 MachineInstrBuilder MIB = BuildVSETVLI(); 5134 5135 // Default to tail agnostic unless the destination is tied to a source. In 5136 // that case the user would have some control over the tail values. The tail 5137 // policy is also ignored on instructions that only update element 0 like 5138 // vmv.s.x or reductions so use agnostic there to match the common case. 5139 // FIXME: This is conservatively correct, but we might want to detect that 5140 // the input is undefined. 5141 bool TailAgnostic = true; 5142 unsigned UseOpIdx; 5143 if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 5144 TailAgnostic = false; 5145 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 5146 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 5147 MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); 5148 if (UseMI) { 5149 UseMI = elideCopies(UseMI, MRI); 5150 if (UseMI && UseMI->isImplicitDef()) 5151 TailAgnostic = true; 5152 } 5153 } 5154 5155 // For simplicity we reuse the vtype representation here. 5156 MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth, 5157 /*TailAgnostic*/ TailAgnostic, 5158 /*MaskAgnostic*/ false)); 5159 5160 // Remove (now) redundant operands from pseudo 5161 if (VLIndex >= 0) { 5162 MI.getOperand(VLIndex).setReg(RISCV::NoRegister); 5163 MI.getOperand(VLIndex).setIsKill(false); 5164 } 5165 5166 return BB; 5167 } 5168 5169 MachineBasicBlock * 5170 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 5171 MachineBasicBlock *BB) const { 5172 uint64_t TSFlags = MI.getDesc().TSFlags; 5173 5174 if (TSFlags & RISCVII::HasSEWOpMask) { 5175 unsigned NumOperands = MI.getNumExplicitOperands(); 5176 int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1; 5177 unsigned SEWIndex = NumOperands - 1; 5178 bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask; 5179 5180 RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >> 5181 RISCVII::VLMulShift); 5182 return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic); 5183 } 5184 5185 switch (MI.getOpcode()) { 5186 default: 5187 llvm_unreachable("Unexpected instr type to insert"); 5188 case RISCV::ReadCycleWide: 5189 assert(!Subtarget.is64Bit() && 5190 "ReadCycleWrite is only to be used on riscv32"); 5191 return emitReadCycleWidePseudo(MI, BB); 5192 case RISCV::Select_GPR_Using_CC_GPR: 5193 case RISCV::Select_FPR16_Using_CC_GPR: 5194 case RISCV::Select_FPR32_Using_CC_GPR: 5195 case RISCV::Select_FPR64_Using_CC_GPR: 5196 return emitSelectPseudo(MI, BB); 5197 case RISCV::BuildPairF64Pseudo: 5198 return emitBuildPairF64Pseudo(MI, BB); 5199 case RISCV::SplitF64Pseudo: 5200 return emitSplitF64Pseudo(MI, BB); 5201 } 5202 } 5203 5204 // Calling Convention Implementation. 5205 // The expectations for frontend ABI lowering vary from target to target. 5206 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 5207 // details, but this is a longer term goal. For now, we simply try to keep the 5208 // role of the frontend as simple and well-defined as possible. The rules can 5209 // be summarised as: 5210 // * Never split up large scalar arguments. We handle them here. 5211 // * If a hardfloat calling convention is being used, and the struct may be 5212 // passed in a pair of registers (fp+fp, int+fp), and both registers are 5213 // available, then pass as two separate arguments. If either the GPRs or FPRs 5214 // are exhausted, then pass according to the rule below. 5215 // * If a struct could never be passed in registers or directly in a stack 5216 // slot (as it is larger than 2*XLEN and the floating point rules don't 5217 // apply), then pass it using a pointer with the byval attribute. 5218 // * If a struct is less than 2*XLEN, then coerce to either a two-element 5219 // word-sized array or a 2*XLEN scalar (depending on alignment). 5220 // * The frontend can determine whether a struct is returned by reference or 5221 // not based on its size and fields. If it will be returned by reference, the 5222 // frontend must modify the prototype so a pointer with the sret annotation is 5223 // passed as the first argument. This is not necessary for large scalar 5224 // returns. 5225 // * Struct return values and varargs should be coerced to structs containing 5226 // register-size fields in the same situations they would be for fixed 5227 // arguments. 5228 5229 static const MCPhysReg ArgGPRs[] = { 5230 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 5231 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 5232 }; 5233 static const MCPhysReg ArgFPR16s[] = { 5234 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 5235 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 5236 }; 5237 static const MCPhysReg ArgFPR32s[] = { 5238 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 5239 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 5240 }; 5241 static const MCPhysReg ArgFPR64s[] = { 5242 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 5243 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 5244 }; 5245 // This is an interim calling convention and it may be changed in the future. 5246 static const MCPhysReg ArgVRs[] = { 5247 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 5248 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 5249 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 5250 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 5251 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 5252 RISCV::V20M2, RISCV::V22M2}; 5253 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 5254 RISCV::V20M4}; 5255 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 5256 5257 // Pass a 2*XLEN argument that has been split into two XLEN values through 5258 // registers or the stack as necessary. 5259 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 5260 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 5261 MVT ValVT2, MVT LocVT2, 5262 ISD::ArgFlagsTy ArgFlags2) { 5263 unsigned XLenInBytes = XLen / 8; 5264 if (Register Reg = State.AllocateReg(ArgGPRs)) { 5265 // At least one half can be passed via register. 5266 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 5267 VA1.getLocVT(), CCValAssign::Full)); 5268 } else { 5269 // Both halves must be passed on the stack, with proper alignment. 5270 Align StackAlign = 5271 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 5272 State.addLoc( 5273 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 5274 State.AllocateStack(XLenInBytes, StackAlign), 5275 VA1.getLocVT(), CCValAssign::Full)); 5276 State.addLoc(CCValAssign::getMem( 5277 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 5278 LocVT2, CCValAssign::Full)); 5279 return false; 5280 } 5281 5282 if (Register Reg = State.AllocateReg(ArgGPRs)) { 5283 // The second half can also be passed via register. 5284 State.addLoc( 5285 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 5286 } else { 5287 // The second half is passed via the stack, without additional alignment. 5288 State.addLoc(CCValAssign::getMem( 5289 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 5290 LocVT2, CCValAssign::Full)); 5291 } 5292 5293 return false; 5294 } 5295 5296 // Implements the RISC-V calling convention. Returns true upon failure. 5297 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 5298 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 5299 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 5300 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 5301 Optional<unsigned> FirstMaskArgument) { 5302 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 5303 assert(XLen == 32 || XLen == 64); 5304 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 5305 5306 // Any return value split in to more than two values can't be returned 5307 // directly. Vectors are returned via the available vector registers. 5308 if (!LocVT.isVector() && IsRet && ValNo > 1) 5309 return true; 5310 5311 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 5312 // variadic argument, or if no F16/F32 argument registers are available. 5313 bool UseGPRForF16_F32 = true; 5314 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 5315 // variadic argument, or if no F64 argument registers are available. 5316 bool UseGPRForF64 = true; 5317 5318 switch (ABI) { 5319 default: 5320 llvm_unreachable("Unexpected ABI"); 5321 case RISCVABI::ABI_ILP32: 5322 case RISCVABI::ABI_LP64: 5323 break; 5324 case RISCVABI::ABI_ILP32F: 5325 case RISCVABI::ABI_LP64F: 5326 UseGPRForF16_F32 = !IsFixed; 5327 break; 5328 case RISCVABI::ABI_ILP32D: 5329 case RISCVABI::ABI_LP64D: 5330 UseGPRForF16_F32 = !IsFixed; 5331 UseGPRForF64 = !IsFixed; 5332 break; 5333 } 5334 5335 // FPR16, FPR32, and FPR64 alias each other. 5336 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { 5337 UseGPRForF16_F32 = true; 5338 UseGPRForF64 = true; 5339 } 5340 5341 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 5342 // similar local variables rather than directly checking against the target 5343 // ABI. 5344 5345 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { 5346 LocVT = XLenVT; 5347 LocInfo = CCValAssign::BCvt; 5348 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 5349 LocVT = MVT::i64; 5350 LocInfo = CCValAssign::BCvt; 5351 } 5352 5353 // If this is a variadic argument, the RISC-V calling convention requires 5354 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 5355 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 5356 // be used regardless of whether the original argument was split during 5357 // legalisation or not. The argument will not be passed by registers if the 5358 // original type is larger than 2*XLEN, so the register alignment rule does 5359 // not apply. 5360 unsigned TwoXLenInBytes = (2 * XLen) / 8; 5361 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 5362 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 5363 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 5364 // Skip 'odd' register if necessary. 5365 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 5366 State.AllocateReg(ArgGPRs); 5367 } 5368 5369 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 5370 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 5371 State.getPendingArgFlags(); 5372 5373 assert(PendingLocs.size() == PendingArgFlags.size() && 5374 "PendingLocs and PendingArgFlags out of sync"); 5375 5376 // Handle passing f64 on RV32D with a soft float ABI or when floating point 5377 // registers are exhausted. 5378 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 5379 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 5380 "Can't lower f64 if it is split"); 5381 // Depending on available argument GPRS, f64 may be passed in a pair of 5382 // GPRs, split between a GPR and the stack, or passed completely on the 5383 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 5384 // cases. 5385 Register Reg = State.AllocateReg(ArgGPRs); 5386 LocVT = MVT::i32; 5387 if (!Reg) { 5388 unsigned StackOffset = State.AllocateStack(8, Align(8)); 5389 State.addLoc( 5390 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 5391 return false; 5392 } 5393 if (!State.AllocateReg(ArgGPRs)) 5394 State.AllocateStack(4, Align(4)); 5395 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5396 return false; 5397 } 5398 5399 // Fixed-length vectors are located in the corresponding scalable-vector 5400 // container types. 5401 if (ValVT.isFixedLengthVector()) 5402 LocVT = TLI.getContainerForFixedLengthVector(LocVT); 5403 5404 // Split arguments might be passed indirectly, so keep track of the pending 5405 // values. Split vectors are passed via a mix of registers and indirectly, so 5406 // treat them as we would any other argument. 5407 if (!LocVT.isVector() && (ArgFlags.isSplit() || !PendingLocs.empty())) { 5408 LocVT = XLenVT; 5409 LocInfo = CCValAssign::Indirect; 5410 PendingLocs.push_back( 5411 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 5412 PendingArgFlags.push_back(ArgFlags); 5413 if (!ArgFlags.isSplitEnd()) { 5414 return false; 5415 } 5416 } 5417 5418 // If the split argument only had two elements, it should be passed directly 5419 // in registers or on the stack. 5420 if (!LocVT.isVector() && ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 5421 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 5422 // Apply the normal calling convention rules to the first half of the 5423 // split argument. 5424 CCValAssign VA = PendingLocs[0]; 5425 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 5426 PendingLocs.clear(); 5427 PendingArgFlags.clear(); 5428 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 5429 ArgFlags); 5430 } 5431 5432 // Allocate to a register if possible, or else a stack slot. 5433 Register Reg; 5434 if (ValVT == MVT::f16 && !UseGPRForF16_F32) 5435 Reg = State.AllocateReg(ArgFPR16s); 5436 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 5437 Reg = State.AllocateReg(ArgFPR32s); 5438 else if (ValVT == MVT::f64 && !UseGPRForF64) 5439 Reg = State.AllocateReg(ArgFPR64s); 5440 else if (ValVT.isVector()) { 5441 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 5442 if (RC == &RISCV::VRRegClass) { 5443 // Assign the first mask argument to V0. 5444 // This is an interim calling convention and it may be changed in the 5445 // future. 5446 if (FirstMaskArgument.hasValue() && 5447 ValNo == FirstMaskArgument.getValue()) { 5448 Reg = State.AllocateReg(RISCV::V0); 5449 } else { 5450 Reg = State.AllocateReg(ArgVRs); 5451 } 5452 } else if (RC == &RISCV::VRM2RegClass) { 5453 Reg = State.AllocateReg(ArgVRM2s); 5454 } else if (RC == &RISCV::VRM4RegClass) { 5455 Reg = State.AllocateReg(ArgVRM4s); 5456 } else if (RC == &RISCV::VRM8RegClass) { 5457 Reg = State.AllocateReg(ArgVRM8s); 5458 } else { 5459 llvm_unreachable("Unhandled class register for ValueType"); 5460 } 5461 if (!Reg) { 5462 // For return values, the vector must be passed fully via registers or 5463 // via the stack. 5464 // FIXME: The proposed vector ABI only mandates v8-v15 for return values, 5465 // but we're using all of them. 5466 if (IsRet) 5467 return true; 5468 LocInfo = CCValAssign::Indirect; 5469 // Try using a GPR to pass the address 5470 Reg = State.AllocateReg(ArgGPRs); 5471 LocVT = XLenVT; 5472 } 5473 } else 5474 Reg = State.AllocateReg(ArgGPRs); 5475 unsigned StackOffset = 5476 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); 5477 5478 // If we reach this point and PendingLocs is non-empty, we must be at the 5479 // end of a split argument that must be passed indirectly. 5480 if (!PendingLocs.empty()) { 5481 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 5482 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 5483 5484 for (auto &It : PendingLocs) { 5485 if (Reg) 5486 It.convertToReg(Reg); 5487 else 5488 It.convertToMem(StackOffset); 5489 State.addLoc(It); 5490 } 5491 PendingLocs.clear(); 5492 PendingArgFlags.clear(); 5493 return false; 5494 } 5495 5496 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 5497 (TLI.getSubtarget().hasStdExtV() && ValVT.isVector())) && 5498 "Expected an XLenVT or vector types at this stage"); 5499 5500 if (Reg) { 5501 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5502 return false; 5503 } 5504 5505 // When a floating-point value is passed on the stack, no bit-conversion is 5506 // needed. 5507 if (ValVT.isFloatingPoint()) { 5508 LocVT = ValVT; 5509 LocInfo = CCValAssign::Full; 5510 } 5511 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 5512 return false; 5513 } 5514 5515 template <typename ArgTy> 5516 static Optional<unsigned> preAssignMask(const ArgTy &Args) { 5517 for (const auto &ArgIdx : enumerate(Args)) { 5518 MVT ArgVT = ArgIdx.value().VT; 5519 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1) 5520 return ArgIdx.index(); 5521 } 5522 return None; 5523 } 5524 5525 void RISCVTargetLowering::analyzeInputArgs( 5526 MachineFunction &MF, CCState &CCInfo, 5527 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 5528 unsigned NumArgs = Ins.size(); 5529 FunctionType *FType = MF.getFunction().getFunctionType(); 5530 5531 Optional<unsigned> FirstMaskArgument; 5532 if (Subtarget.hasStdExtV()) 5533 FirstMaskArgument = preAssignMask(Ins); 5534 5535 for (unsigned i = 0; i != NumArgs; ++i) { 5536 MVT ArgVT = Ins[i].VT; 5537 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 5538 5539 Type *ArgTy = nullptr; 5540 if (IsRet) 5541 ArgTy = FType->getReturnType(); 5542 else if (Ins[i].isOrigArg()) 5543 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 5544 5545 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 5546 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 5547 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 5548 FirstMaskArgument)) { 5549 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 5550 << EVT(ArgVT).getEVTString() << '\n'); 5551 llvm_unreachable(nullptr); 5552 } 5553 } 5554 } 5555 5556 void RISCVTargetLowering::analyzeOutputArgs( 5557 MachineFunction &MF, CCState &CCInfo, 5558 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 5559 CallLoweringInfo *CLI) const { 5560 unsigned NumArgs = Outs.size(); 5561 5562 Optional<unsigned> FirstMaskArgument; 5563 if (Subtarget.hasStdExtV()) 5564 FirstMaskArgument = preAssignMask(Outs); 5565 5566 for (unsigned i = 0; i != NumArgs; i++) { 5567 MVT ArgVT = Outs[i].VT; 5568 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 5569 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 5570 5571 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 5572 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 5573 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 5574 FirstMaskArgument)) { 5575 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 5576 << EVT(ArgVT).getEVTString() << "\n"); 5577 llvm_unreachable(nullptr); 5578 } 5579 } 5580 } 5581 5582 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 5583 // values. 5584 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 5585 const CCValAssign &VA, const SDLoc &DL, 5586 const RISCVSubtarget &Subtarget) { 5587 switch (VA.getLocInfo()) { 5588 default: 5589 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5590 case CCValAssign::Full: 5591 if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector()) 5592 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget); 5593 break; 5594 case CCValAssign::BCvt: 5595 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 5596 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); 5597 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 5598 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 5599 else 5600 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 5601 break; 5602 } 5603 return Val; 5604 } 5605 5606 // The caller is responsible for loading the full value if the argument is 5607 // passed with CCValAssign::Indirect. 5608 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 5609 const CCValAssign &VA, const SDLoc &DL, 5610 const RISCVTargetLowering &TLI) { 5611 MachineFunction &MF = DAG.getMachineFunction(); 5612 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5613 EVT LocVT = VA.getLocVT(); 5614 SDValue Val; 5615 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 5616 Register VReg = RegInfo.createVirtualRegister(RC); 5617 RegInfo.addLiveIn(VA.getLocReg(), VReg); 5618 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 5619 5620 if (VA.getLocInfo() == CCValAssign::Indirect) 5621 return Val; 5622 5623 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget()); 5624 } 5625 5626 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 5627 const CCValAssign &VA, const SDLoc &DL, 5628 const RISCVSubtarget &Subtarget) { 5629 EVT LocVT = VA.getLocVT(); 5630 5631 switch (VA.getLocInfo()) { 5632 default: 5633 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5634 case CCValAssign::Full: 5635 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector()) 5636 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget); 5637 break; 5638 case CCValAssign::BCvt: 5639 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 5640 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 5641 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 5642 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 5643 else 5644 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 5645 break; 5646 } 5647 return Val; 5648 } 5649 5650 // The caller is responsible for loading the full value if the argument is 5651 // passed with CCValAssign::Indirect. 5652 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 5653 const CCValAssign &VA, const SDLoc &DL) { 5654 MachineFunction &MF = DAG.getMachineFunction(); 5655 MachineFrameInfo &MFI = MF.getFrameInfo(); 5656 EVT LocVT = VA.getLocVT(); 5657 EVT ValVT = VA.getValVT(); 5658 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 5659 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 5660 VA.getLocMemOffset(), /*Immutable=*/true); 5661 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 5662 SDValue Val; 5663 5664 ISD::LoadExtType ExtType; 5665 switch (VA.getLocInfo()) { 5666 default: 5667 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5668 case CCValAssign::Full: 5669 case CCValAssign::Indirect: 5670 case CCValAssign::BCvt: 5671 ExtType = ISD::NON_EXTLOAD; 5672 break; 5673 } 5674 Val = DAG.getExtLoad( 5675 ExtType, DL, LocVT, Chain, FIN, 5676 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 5677 return Val; 5678 } 5679 5680 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 5681 const CCValAssign &VA, const SDLoc &DL) { 5682 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 5683 "Unexpected VA"); 5684 MachineFunction &MF = DAG.getMachineFunction(); 5685 MachineFrameInfo &MFI = MF.getFrameInfo(); 5686 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5687 5688 if (VA.isMemLoc()) { 5689 // f64 is passed on the stack. 5690 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 5691 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 5692 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 5693 MachinePointerInfo::getFixedStack(MF, FI)); 5694 } 5695 5696 assert(VA.isRegLoc() && "Expected register VA assignment"); 5697 5698 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 5699 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 5700 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 5701 SDValue Hi; 5702 if (VA.getLocReg() == RISCV::X17) { 5703 // Second half of f64 is passed on the stack. 5704 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 5705 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 5706 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 5707 MachinePointerInfo::getFixedStack(MF, FI)); 5708 } else { 5709 // Second half of f64 is passed in another GPR. 5710 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 5711 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 5712 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 5713 } 5714 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 5715 } 5716 5717 // FastCC has less than 1% performance improvement for some particular 5718 // benchmark. But theoretically, it may has benenfit for some cases. 5719 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, 5720 CCValAssign::LocInfo LocInfo, 5721 ISD::ArgFlagsTy ArgFlags, CCState &State) { 5722 5723 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 5724 // X5 and X6 might be used for save-restore libcall. 5725 static const MCPhysReg GPRList[] = { 5726 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 5727 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 5728 RISCV::X29, RISCV::X30, RISCV::X31}; 5729 if (unsigned Reg = State.AllocateReg(GPRList)) { 5730 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5731 return false; 5732 } 5733 } 5734 5735 if (LocVT == MVT::f16) { 5736 static const MCPhysReg FPR16List[] = { 5737 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 5738 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 5739 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 5740 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 5741 if (unsigned Reg = State.AllocateReg(FPR16List)) { 5742 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5743 return false; 5744 } 5745 } 5746 5747 if (LocVT == MVT::f32) { 5748 static const MCPhysReg FPR32List[] = { 5749 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 5750 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 5751 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 5752 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 5753 if (unsigned Reg = State.AllocateReg(FPR32List)) { 5754 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5755 return false; 5756 } 5757 } 5758 5759 if (LocVT == MVT::f64) { 5760 static const MCPhysReg FPR64List[] = { 5761 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 5762 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 5763 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 5764 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 5765 if (unsigned Reg = State.AllocateReg(FPR64List)) { 5766 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5767 return false; 5768 } 5769 } 5770 5771 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 5772 unsigned Offset4 = State.AllocateStack(4, Align(4)); 5773 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 5774 return false; 5775 } 5776 5777 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 5778 unsigned Offset5 = State.AllocateStack(8, Align(8)); 5779 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 5780 return false; 5781 } 5782 5783 return true; // CC didn't match. 5784 } 5785 5786 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 5787 CCValAssign::LocInfo LocInfo, 5788 ISD::ArgFlagsTy ArgFlags, CCState &State) { 5789 5790 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 5791 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 5792 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 5793 static const MCPhysReg GPRList[] = { 5794 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 5795 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 5796 if (unsigned Reg = State.AllocateReg(GPRList)) { 5797 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5798 return false; 5799 } 5800 } 5801 5802 if (LocVT == MVT::f32) { 5803 // Pass in STG registers: F1, ..., F6 5804 // fs0 ... fs5 5805 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 5806 RISCV::F18_F, RISCV::F19_F, 5807 RISCV::F20_F, RISCV::F21_F}; 5808 if (unsigned Reg = State.AllocateReg(FPR32List)) { 5809 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5810 return false; 5811 } 5812 } 5813 5814 if (LocVT == MVT::f64) { 5815 // Pass in STG registers: D1, ..., D6 5816 // fs6 ... fs11 5817 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 5818 RISCV::F24_D, RISCV::F25_D, 5819 RISCV::F26_D, RISCV::F27_D}; 5820 if (unsigned Reg = State.AllocateReg(FPR64List)) { 5821 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5822 return false; 5823 } 5824 } 5825 5826 report_fatal_error("No registers left in GHC calling convention"); 5827 return true; 5828 } 5829 5830 // Transform physical registers into virtual registers. 5831 SDValue RISCVTargetLowering::LowerFormalArguments( 5832 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 5833 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 5834 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 5835 5836 MachineFunction &MF = DAG.getMachineFunction(); 5837 5838 switch (CallConv) { 5839 default: 5840 report_fatal_error("Unsupported calling convention"); 5841 case CallingConv::C: 5842 case CallingConv::Fast: 5843 break; 5844 case CallingConv::GHC: 5845 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || 5846 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) 5847 report_fatal_error( 5848 "GHC calling convention requires the F and D instruction set extensions"); 5849 } 5850 5851 const Function &Func = MF.getFunction(); 5852 if (Func.hasFnAttribute("interrupt")) { 5853 if (!Func.arg_empty()) 5854 report_fatal_error( 5855 "Functions with the interrupt attribute cannot have arguments!"); 5856 5857 StringRef Kind = 5858 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 5859 5860 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 5861 report_fatal_error( 5862 "Function interrupt attribute argument not supported!"); 5863 } 5864 5865 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5866 MVT XLenVT = Subtarget.getXLenVT(); 5867 unsigned XLenInBytes = Subtarget.getXLen() / 8; 5868 // Used with vargs to acumulate store chains. 5869 std::vector<SDValue> OutChains; 5870 5871 // Assign locations to all of the incoming arguments. 5872 SmallVector<CCValAssign, 16> ArgLocs; 5873 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 5874 5875 if (CallConv == CallingConv::Fast) 5876 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); 5877 else if (CallConv == CallingConv::GHC) 5878 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); 5879 else 5880 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 5881 5882 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 5883 CCValAssign &VA = ArgLocs[i]; 5884 SDValue ArgValue; 5885 // Passing f64 on RV32D with a soft float ABI must be handled as a special 5886 // case. 5887 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 5888 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 5889 else if (VA.isRegLoc()) 5890 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 5891 else 5892 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 5893 5894 if (VA.getLocInfo() == CCValAssign::Indirect) { 5895 // If the original argument was split and passed by reference (e.g. i128 5896 // on RV32), we need to load all parts of it here (using the same 5897 // address). Vectors may be partly split to registers and partly to the 5898 // stack, in which case the base address is partly offset and subsequent 5899 // stores are relative to that. 5900 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 5901 MachinePointerInfo())); 5902 unsigned ArgIndex = Ins[i].OrigArgIndex; 5903 unsigned ArgPartOffset = Ins[i].PartOffset; 5904 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 5905 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 5906 CCValAssign &PartVA = ArgLocs[i + 1]; 5907 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; 5908 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 5909 DAG.getIntPtrConstant(PartOffset, DL)); 5910 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 5911 MachinePointerInfo())); 5912 ++i; 5913 } 5914 continue; 5915 } 5916 InVals.push_back(ArgValue); 5917 } 5918 5919 if (IsVarArg) { 5920 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 5921 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 5922 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 5923 MachineFrameInfo &MFI = MF.getFrameInfo(); 5924 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5925 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 5926 5927 // Offset of the first variable argument from stack pointer, and size of 5928 // the vararg save area. For now, the varargs save area is either zero or 5929 // large enough to hold a0-a7. 5930 int VaArgOffset, VarArgsSaveSize; 5931 5932 // If all registers are allocated, then all varargs must be passed on the 5933 // stack and we don't need to save any argregs. 5934 if (ArgRegs.size() == Idx) { 5935 VaArgOffset = CCInfo.getNextStackOffset(); 5936 VarArgsSaveSize = 0; 5937 } else { 5938 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 5939 VaArgOffset = -VarArgsSaveSize; 5940 } 5941 5942 // Record the frame index of the first variable argument 5943 // which is a value necessary to VASTART. 5944 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 5945 RVFI->setVarArgsFrameIndex(FI); 5946 5947 // If saving an odd number of registers then create an extra stack slot to 5948 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 5949 // offsets to even-numbered registered remain 2*XLEN-aligned. 5950 if (Idx % 2) { 5951 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 5952 VarArgsSaveSize += XLenInBytes; 5953 } 5954 5955 // Copy the integer registers that may have been used for passing varargs 5956 // to the vararg save area. 5957 for (unsigned I = Idx; I < ArgRegs.size(); 5958 ++I, VaArgOffset += XLenInBytes) { 5959 const Register Reg = RegInfo.createVirtualRegister(RC); 5960 RegInfo.addLiveIn(ArgRegs[I], Reg); 5961 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 5962 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 5963 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 5964 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 5965 MachinePointerInfo::getFixedStack(MF, FI)); 5966 cast<StoreSDNode>(Store.getNode()) 5967 ->getMemOperand() 5968 ->setValue((Value *)nullptr); 5969 OutChains.push_back(Store); 5970 } 5971 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 5972 } 5973 5974 // All stores are grouped in one node to allow the matching between 5975 // the size of Ins and InVals. This only happens for vararg functions. 5976 if (!OutChains.empty()) { 5977 OutChains.push_back(Chain); 5978 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 5979 } 5980 5981 return Chain; 5982 } 5983 5984 /// isEligibleForTailCallOptimization - Check whether the call is eligible 5985 /// for tail call optimization. 5986 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 5987 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 5988 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 5989 const SmallVector<CCValAssign, 16> &ArgLocs) const { 5990 5991 auto &Callee = CLI.Callee; 5992 auto CalleeCC = CLI.CallConv; 5993 auto &Outs = CLI.Outs; 5994 auto &Caller = MF.getFunction(); 5995 auto CallerCC = Caller.getCallingConv(); 5996 5997 // Exception-handling functions need a special set of instructions to 5998 // indicate a return to the hardware. Tail-calling another function would 5999 // probably break this. 6000 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 6001 // should be expanded as new function attributes are introduced. 6002 if (Caller.hasFnAttribute("interrupt")) 6003 return false; 6004 6005 // Do not tail call opt if the stack is used to pass parameters. 6006 if (CCInfo.getNextStackOffset() != 0) 6007 return false; 6008 6009 // Do not tail call opt if any parameters need to be passed indirectly. 6010 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 6011 // passed indirectly. So the address of the value will be passed in a 6012 // register, or if not available, then the address is put on the stack. In 6013 // order to pass indirectly, space on the stack often needs to be allocated 6014 // in order to store the value. In this case the CCInfo.getNextStackOffset() 6015 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 6016 // are passed CCValAssign::Indirect. 6017 for (auto &VA : ArgLocs) 6018 if (VA.getLocInfo() == CCValAssign::Indirect) 6019 return false; 6020 6021 // Do not tail call opt if either caller or callee uses struct return 6022 // semantics. 6023 auto IsCallerStructRet = Caller.hasStructRetAttr(); 6024 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 6025 if (IsCallerStructRet || IsCalleeStructRet) 6026 return false; 6027 6028 // Externally-defined functions with weak linkage should not be 6029 // tail-called. The behaviour of branch instructions in this situation (as 6030 // used for tail calls) is implementation-defined, so we cannot rely on the 6031 // linker replacing the tail call with a return. 6032 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 6033 const GlobalValue *GV = G->getGlobal(); 6034 if (GV->hasExternalWeakLinkage()) 6035 return false; 6036 } 6037 6038 // The callee has to preserve all registers the caller needs to preserve. 6039 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 6040 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 6041 if (CalleeCC != CallerCC) { 6042 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 6043 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 6044 return false; 6045 } 6046 6047 // Byval parameters hand the function a pointer directly into the stack area 6048 // we want to reuse during a tail call. Working around this *is* possible 6049 // but less efficient and uglier in LowerCall. 6050 for (auto &Arg : Outs) 6051 if (Arg.Flags.isByVal()) 6052 return false; 6053 6054 return true; 6055 } 6056 6057 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 6058 // and output parameter nodes. 6059 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 6060 SmallVectorImpl<SDValue> &InVals) const { 6061 SelectionDAG &DAG = CLI.DAG; 6062 SDLoc &DL = CLI.DL; 6063 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 6064 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 6065 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 6066 SDValue Chain = CLI.Chain; 6067 SDValue Callee = CLI.Callee; 6068 bool &IsTailCall = CLI.IsTailCall; 6069 CallingConv::ID CallConv = CLI.CallConv; 6070 bool IsVarArg = CLI.IsVarArg; 6071 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6072 MVT XLenVT = Subtarget.getXLenVT(); 6073 6074 MachineFunction &MF = DAG.getMachineFunction(); 6075 6076 // Analyze the operands of the call, assigning locations to each operand. 6077 SmallVector<CCValAssign, 16> ArgLocs; 6078 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 6079 6080 if (CallConv == CallingConv::Fast) 6081 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); 6082 else if (CallConv == CallingConv::GHC) 6083 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); 6084 else 6085 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 6086 6087 // Check if it's really possible to do a tail call. 6088 if (IsTailCall) 6089 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 6090 6091 if (IsTailCall) 6092 ++NumTailCalls; 6093 else if (CLI.CB && CLI.CB->isMustTailCall()) 6094 report_fatal_error("failed to perform tail call elimination on a call " 6095 "site marked musttail"); 6096 6097 // Get a count of how many bytes are to be pushed on the stack. 6098 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 6099 6100 // Create local copies for byval args 6101 SmallVector<SDValue, 8> ByValArgs; 6102 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 6103 ISD::ArgFlagsTy Flags = Outs[i].Flags; 6104 if (!Flags.isByVal()) 6105 continue; 6106 6107 SDValue Arg = OutVals[i]; 6108 unsigned Size = Flags.getByValSize(); 6109 Align Alignment = Flags.getNonZeroByValAlign(); 6110 6111 int FI = 6112 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 6113 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 6114 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 6115 6116 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 6117 /*IsVolatile=*/false, 6118 /*AlwaysInline=*/false, IsTailCall, 6119 MachinePointerInfo(), MachinePointerInfo()); 6120 ByValArgs.push_back(FIPtr); 6121 } 6122 6123 if (!IsTailCall) 6124 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 6125 6126 // Copy argument values to their designated locations. 6127 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 6128 SmallVector<SDValue, 8> MemOpChains; 6129 SDValue StackPtr; 6130 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 6131 CCValAssign &VA = ArgLocs[i]; 6132 SDValue ArgValue = OutVals[i]; 6133 ISD::ArgFlagsTy Flags = Outs[i].Flags; 6134 6135 // Handle passing f64 on RV32D with a soft float ABI as a special case. 6136 bool IsF64OnRV32DSoftABI = 6137 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 6138 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 6139 SDValue SplitF64 = DAG.getNode( 6140 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 6141 SDValue Lo = SplitF64.getValue(0); 6142 SDValue Hi = SplitF64.getValue(1); 6143 6144 Register RegLo = VA.getLocReg(); 6145 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 6146 6147 if (RegLo == RISCV::X17) { 6148 // Second half of f64 is passed on the stack. 6149 // Work out the address of the stack slot. 6150 if (!StackPtr.getNode()) 6151 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 6152 // Emit the store. 6153 MemOpChains.push_back( 6154 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 6155 } else { 6156 // Second half of f64 is passed in another GPR. 6157 assert(RegLo < RISCV::X31 && "Invalid register pair"); 6158 Register RegHigh = RegLo + 1; 6159 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 6160 } 6161 continue; 6162 } 6163 6164 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 6165 // as any other MemLoc. 6166 6167 // Promote the value if needed. 6168 // For now, only handle fully promoted and indirect arguments. 6169 if (VA.getLocInfo() == CCValAssign::Indirect) { 6170 // Store the argument in a stack slot and pass its address. 6171 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 6172 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 6173 MemOpChains.push_back( 6174 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 6175 MachinePointerInfo::getFixedStack(MF, FI))); 6176 // If the original argument was split (e.g. i128), we need 6177 // to store the required parts of it here (and pass just one address). 6178 // Vectors may be partly split to registers and partly to the stack, in 6179 // which case the base address is partly offset and subsequent stores are 6180 // relative to that. 6181 unsigned ArgIndex = Outs[i].OrigArgIndex; 6182 unsigned ArgPartOffset = Outs[i].PartOffset; 6183 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 6184 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 6185 SDValue PartValue = OutVals[i + 1]; 6186 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; 6187 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 6188 DAG.getIntPtrConstant(PartOffset, DL)); 6189 MemOpChains.push_back( 6190 DAG.getStore(Chain, DL, PartValue, Address, 6191 MachinePointerInfo::getFixedStack(MF, FI))); 6192 ++i; 6193 } 6194 ArgValue = SpillSlot; 6195 } else { 6196 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget); 6197 } 6198 6199 // Use local copy if it is a byval arg. 6200 if (Flags.isByVal()) 6201 ArgValue = ByValArgs[j++]; 6202 6203 if (VA.isRegLoc()) { 6204 // Queue up the argument copies and emit them at the end. 6205 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 6206 } else { 6207 assert(VA.isMemLoc() && "Argument not register or memory"); 6208 assert(!IsTailCall && "Tail call not allowed if stack is used " 6209 "for passing parameters"); 6210 6211 // Work out the address of the stack slot. 6212 if (!StackPtr.getNode()) 6213 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 6214 SDValue Address = 6215 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 6216 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 6217 6218 // Emit the store. 6219 MemOpChains.push_back( 6220 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 6221 } 6222 } 6223 6224 // Join the stores, which are independent of one another. 6225 if (!MemOpChains.empty()) 6226 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 6227 6228 SDValue Glue; 6229 6230 // Build a sequence of copy-to-reg nodes, chained and glued together. 6231 for (auto &Reg : RegsToPass) { 6232 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 6233 Glue = Chain.getValue(1); 6234 } 6235 6236 // Validate that none of the argument registers have been marked as 6237 // reserved, if so report an error. Do the same for the return address if this 6238 // is not a tailcall. 6239 validateCCReservedRegs(RegsToPass, MF); 6240 if (!IsTailCall && 6241 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 6242 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 6243 MF.getFunction(), 6244 "Return address register required, but has been reserved."}); 6245 6246 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 6247 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 6248 // split it and then direct call can be matched by PseudoCALL. 6249 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 6250 const GlobalValue *GV = S->getGlobal(); 6251 6252 unsigned OpFlags = RISCVII::MO_CALL; 6253 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 6254 OpFlags = RISCVII::MO_PLT; 6255 6256 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 6257 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 6258 unsigned OpFlags = RISCVII::MO_CALL; 6259 6260 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 6261 nullptr)) 6262 OpFlags = RISCVII::MO_PLT; 6263 6264 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 6265 } 6266 6267 // The first call operand is the chain and the second is the target address. 6268 SmallVector<SDValue, 8> Ops; 6269 Ops.push_back(Chain); 6270 Ops.push_back(Callee); 6271 6272 // Add argument registers to the end of the list so that they are 6273 // known live into the call. 6274 for (auto &Reg : RegsToPass) 6275 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 6276 6277 if (!IsTailCall) { 6278 // Add a register mask operand representing the call-preserved registers. 6279 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 6280 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 6281 assert(Mask && "Missing call preserved mask for calling convention"); 6282 Ops.push_back(DAG.getRegisterMask(Mask)); 6283 } 6284 6285 // Glue the call to the argument copies, if any. 6286 if (Glue.getNode()) 6287 Ops.push_back(Glue); 6288 6289 // Emit the call. 6290 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 6291 6292 if (IsTailCall) { 6293 MF.getFrameInfo().setHasTailCall(); 6294 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 6295 } 6296 6297 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 6298 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 6299 Glue = Chain.getValue(1); 6300 6301 // Mark the end of the call, which is glued to the call itself. 6302 Chain = DAG.getCALLSEQ_END(Chain, 6303 DAG.getConstant(NumBytes, DL, PtrVT, true), 6304 DAG.getConstant(0, DL, PtrVT, true), 6305 Glue, DL); 6306 Glue = Chain.getValue(1); 6307 6308 // Assign locations to each value returned by this call. 6309 SmallVector<CCValAssign, 16> RVLocs; 6310 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 6311 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 6312 6313 // Copy all of the result registers out of their specified physreg. 6314 for (auto &VA : RVLocs) { 6315 // Copy the value out 6316 SDValue RetValue = 6317 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 6318 // Glue the RetValue to the end of the call sequence 6319 Chain = RetValue.getValue(1); 6320 Glue = RetValue.getValue(2); 6321 6322 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 6323 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 6324 SDValue RetValue2 = 6325 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 6326 Chain = RetValue2.getValue(1); 6327 Glue = RetValue2.getValue(2); 6328 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 6329 RetValue2); 6330 } 6331 6332 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget); 6333 6334 InVals.push_back(RetValue); 6335 } 6336 6337 return Chain; 6338 } 6339 6340 bool RISCVTargetLowering::CanLowerReturn( 6341 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 6342 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 6343 SmallVector<CCValAssign, 16> RVLocs; 6344 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 6345 6346 Optional<unsigned> FirstMaskArgument; 6347 if (Subtarget.hasStdExtV()) 6348 FirstMaskArgument = preAssignMask(Outs); 6349 6350 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 6351 MVT VT = Outs[i].VT; 6352 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 6353 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 6354 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 6355 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 6356 *this, FirstMaskArgument)) 6357 return false; 6358 } 6359 return true; 6360 } 6361 6362 SDValue 6363 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 6364 bool IsVarArg, 6365 const SmallVectorImpl<ISD::OutputArg> &Outs, 6366 const SmallVectorImpl<SDValue> &OutVals, 6367 const SDLoc &DL, SelectionDAG &DAG) const { 6368 const MachineFunction &MF = DAG.getMachineFunction(); 6369 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 6370 6371 // Stores the assignment of the return value to a location. 6372 SmallVector<CCValAssign, 16> RVLocs; 6373 6374 // Info about the registers and stack slot. 6375 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 6376 *DAG.getContext()); 6377 6378 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 6379 nullptr); 6380 6381 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 6382 report_fatal_error("GHC functions return void only"); 6383 6384 SDValue Glue; 6385 SmallVector<SDValue, 4> RetOps(1, Chain); 6386 6387 // Copy the result values into the output registers. 6388 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 6389 SDValue Val = OutVals[i]; 6390 CCValAssign &VA = RVLocs[i]; 6391 assert(VA.isRegLoc() && "Can only return in registers!"); 6392 6393 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 6394 // Handle returning f64 on RV32D with a soft float ABI. 6395 assert(VA.isRegLoc() && "Expected return via registers"); 6396 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 6397 DAG.getVTList(MVT::i32, MVT::i32), Val); 6398 SDValue Lo = SplitF64.getValue(0); 6399 SDValue Hi = SplitF64.getValue(1); 6400 Register RegLo = VA.getLocReg(); 6401 assert(RegLo < RISCV::X31 && "Invalid register pair"); 6402 Register RegHi = RegLo + 1; 6403 6404 if (STI.isRegisterReservedByUser(RegLo) || 6405 STI.isRegisterReservedByUser(RegHi)) 6406 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 6407 MF.getFunction(), 6408 "Return value register required, but has been reserved."}); 6409 6410 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 6411 Glue = Chain.getValue(1); 6412 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 6413 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 6414 Glue = Chain.getValue(1); 6415 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 6416 } else { 6417 // Handle a 'normal' return. 6418 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget); 6419 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 6420 6421 if (STI.isRegisterReservedByUser(VA.getLocReg())) 6422 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 6423 MF.getFunction(), 6424 "Return value register required, but has been reserved."}); 6425 6426 // Guarantee that all emitted copies are stuck together. 6427 Glue = Chain.getValue(1); 6428 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 6429 } 6430 } 6431 6432 RetOps[0] = Chain; // Update chain. 6433 6434 // Add the glue node if we have it. 6435 if (Glue.getNode()) { 6436 RetOps.push_back(Glue); 6437 } 6438 6439 // Interrupt service routines use different return instructions. 6440 const Function &Func = DAG.getMachineFunction().getFunction(); 6441 if (Func.hasFnAttribute("interrupt")) { 6442 if (!Func.getReturnType()->isVoidTy()) 6443 report_fatal_error( 6444 "Functions with the interrupt attribute must have void return type!"); 6445 6446 MachineFunction &MF = DAG.getMachineFunction(); 6447 StringRef Kind = 6448 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 6449 6450 unsigned RetOpc; 6451 if (Kind == "user") 6452 RetOpc = RISCVISD::URET_FLAG; 6453 else if (Kind == "supervisor") 6454 RetOpc = RISCVISD::SRET_FLAG; 6455 else 6456 RetOpc = RISCVISD::MRET_FLAG; 6457 6458 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 6459 } 6460 6461 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 6462 } 6463 6464 void RISCVTargetLowering::validateCCReservedRegs( 6465 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 6466 MachineFunction &MF) const { 6467 const Function &F = MF.getFunction(); 6468 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 6469 6470 if (llvm::any_of(Regs, [&STI](auto Reg) { 6471 return STI.isRegisterReservedByUser(Reg.first); 6472 })) 6473 F.getContext().diagnose(DiagnosticInfoUnsupported{ 6474 F, "Argument register required, but has been reserved."}); 6475 } 6476 6477 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 6478 return CI->isTailCall(); 6479 } 6480 6481 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 6482 #define NODE_NAME_CASE(NODE) \ 6483 case RISCVISD::NODE: \ 6484 return "RISCVISD::" #NODE; 6485 // clang-format off 6486 switch ((RISCVISD::NodeType)Opcode) { 6487 case RISCVISD::FIRST_NUMBER: 6488 break; 6489 NODE_NAME_CASE(RET_FLAG) 6490 NODE_NAME_CASE(URET_FLAG) 6491 NODE_NAME_CASE(SRET_FLAG) 6492 NODE_NAME_CASE(MRET_FLAG) 6493 NODE_NAME_CASE(CALL) 6494 NODE_NAME_CASE(SELECT_CC) 6495 NODE_NAME_CASE(BR_CC) 6496 NODE_NAME_CASE(BuildPairF64) 6497 NODE_NAME_CASE(SplitF64) 6498 NODE_NAME_CASE(TAIL) 6499 NODE_NAME_CASE(SLLW) 6500 NODE_NAME_CASE(SRAW) 6501 NODE_NAME_CASE(SRLW) 6502 NODE_NAME_CASE(DIVW) 6503 NODE_NAME_CASE(DIVUW) 6504 NODE_NAME_CASE(REMUW) 6505 NODE_NAME_CASE(ROLW) 6506 NODE_NAME_CASE(RORW) 6507 NODE_NAME_CASE(FSLW) 6508 NODE_NAME_CASE(FSRW) 6509 NODE_NAME_CASE(FSL) 6510 NODE_NAME_CASE(FSR) 6511 NODE_NAME_CASE(FMV_H_X) 6512 NODE_NAME_CASE(FMV_X_ANYEXTH) 6513 NODE_NAME_CASE(FMV_W_X_RV64) 6514 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 6515 NODE_NAME_CASE(READ_CYCLE_WIDE) 6516 NODE_NAME_CASE(GREVI) 6517 NODE_NAME_CASE(GREVIW) 6518 NODE_NAME_CASE(GORCI) 6519 NODE_NAME_CASE(GORCIW) 6520 NODE_NAME_CASE(SHFLI) 6521 NODE_NAME_CASE(VMV_V_X_VL) 6522 NODE_NAME_CASE(VFMV_V_F_VL) 6523 NODE_NAME_CASE(VMV_X_S) 6524 NODE_NAME_CASE(VMV_S_XF_VL) 6525 NODE_NAME_CASE(SPLAT_VECTOR_I64) 6526 NODE_NAME_CASE(READ_VLENB) 6527 NODE_NAME_CASE(TRUNCATE_VECTOR_VL) 6528 NODE_NAME_CASE(VLEFF) 6529 NODE_NAME_CASE(VLEFF_MASK) 6530 NODE_NAME_CASE(VSLIDEUP_VL) 6531 NODE_NAME_CASE(VSLIDE1UP_VL) 6532 NODE_NAME_CASE(VSLIDEDOWN_VL) 6533 NODE_NAME_CASE(VID_VL) 6534 NODE_NAME_CASE(VFNCVT_ROD_VL) 6535 NODE_NAME_CASE(VECREDUCE_ADD_VL) 6536 NODE_NAME_CASE(VECREDUCE_UMAX_VL) 6537 NODE_NAME_CASE(VECREDUCE_SMAX_VL) 6538 NODE_NAME_CASE(VECREDUCE_UMIN_VL) 6539 NODE_NAME_CASE(VECREDUCE_SMIN_VL) 6540 NODE_NAME_CASE(VECREDUCE_AND_VL) 6541 NODE_NAME_CASE(VECREDUCE_OR_VL) 6542 NODE_NAME_CASE(VECREDUCE_XOR_VL) 6543 NODE_NAME_CASE(VECREDUCE_FADD_VL) 6544 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL) 6545 NODE_NAME_CASE(ADD_VL) 6546 NODE_NAME_CASE(AND_VL) 6547 NODE_NAME_CASE(MUL_VL) 6548 NODE_NAME_CASE(OR_VL) 6549 NODE_NAME_CASE(SDIV_VL) 6550 NODE_NAME_CASE(SHL_VL) 6551 NODE_NAME_CASE(SREM_VL) 6552 NODE_NAME_CASE(SRA_VL) 6553 NODE_NAME_CASE(SRL_VL) 6554 NODE_NAME_CASE(SUB_VL) 6555 NODE_NAME_CASE(UDIV_VL) 6556 NODE_NAME_CASE(UREM_VL) 6557 NODE_NAME_CASE(XOR_VL) 6558 NODE_NAME_CASE(FADD_VL) 6559 NODE_NAME_CASE(FSUB_VL) 6560 NODE_NAME_CASE(FMUL_VL) 6561 NODE_NAME_CASE(FDIV_VL) 6562 NODE_NAME_CASE(FNEG_VL) 6563 NODE_NAME_CASE(FABS_VL) 6564 NODE_NAME_CASE(FSQRT_VL) 6565 NODE_NAME_CASE(FMA_VL) 6566 NODE_NAME_CASE(FCOPYSIGN_VL) 6567 NODE_NAME_CASE(SMIN_VL) 6568 NODE_NAME_CASE(SMAX_VL) 6569 NODE_NAME_CASE(UMIN_VL) 6570 NODE_NAME_CASE(UMAX_VL) 6571 NODE_NAME_CASE(MULHS_VL) 6572 NODE_NAME_CASE(MULHU_VL) 6573 NODE_NAME_CASE(FP_TO_SINT_VL) 6574 NODE_NAME_CASE(FP_TO_UINT_VL) 6575 NODE_NAME_CASE(SINT_TO_FP_VL) 6576 NODE_NAME_CASE(UINT_TO_FP_VL) 6577 NODE_NAME_CASE(FP_EXTEND_VL) 6578 NODE_NAME_CASE(FP_ROUND_VL) 6579 NODE_NAME_CASE(SETCC_VL) 6580 NODE_NAME_CASE(VSELECT_VL) 6581 NODE_NAME_CASE(VMAND_VL) 6582 NODE_NAME_CASE(VMOR_VL) 6583 NODE_NAME_CASE(VMXOR_VL) 6584 NODE_NAME_CASE(VMCLR_VL) 6585 NODE_NAME_CASE(VMSET_VL) 6586 NODE_NAME_CASE(VRGATHER_VX_VL) 6587 NODE_NAME_CASE(VRGATHER_VV_VL) 6588 NODE_NAME_CASE(VRGATHEREI16_VV_VL) 6589 NODE_NAME_CASE(VSEXT_VL) 6590 NODE_NAME_CASE(VZEXT_VL) 6591 NODE_NAME_CASE(VLE_VL) 6592 NODE_NAME_CASE(VSE_VL) 6593 } 6594 // clang-format on 6595 return nullptr; 6596 #undef NODE_NAME_CASE 6597 } 6598 6599 /// getConstraintType - Given a constraint letter, return the type of 6600 /// constraint it is for this target. 6601 RISCVTargetLowering::ConstraintType 6602 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 6603 if (Constraint.size() == 1) { 6604 switch (Constraint[0]) { 6605 default: 6606 break; 6607 case 'f': 6608 case 'v': 6609 return C_RegisterClass; 6610 case 'I': 6611 case 'J': 6612 case 'K': 6613 return C_Immediate; 6614 case 'A': 6615 return C_Memory; 6616 } 6617 } 6618 return TargetLowering::getConstraintType(Constraint); 6619 } 6620 6621 std::pair<unsigned, const TargetRegisterClass *> 6622 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 6623 StringRef Constraint, 6624 MVT VT) const { 6625 // First, see if this is a constraint that directly corresponds to a 6626 // RISCV register class. 6627 if (Constraint.size() == 1) { 6628 switch (Constraint[0]) { 6629 case 'r': 6630 return std::make_pair(0U, &RISCV::GPRRegClass); 6631 case 'f': 6632 if (Subtarget.hasStdExtZfh() && VT == MVT::f16) 6633 return std::make_pair(0U, &RISCV::FPR16RegClass); 6634 if (Subtarget.hasStdExtF() && VT == MVT::f32) 6635 return std::make_pair(0U, &RISCV::FPR32RegClass); 6636 if (Subtarget.hasStdExtD() && VT == MVT::f64) 6637 return std::make_pair(0U, &RISCV::FPR64RegClass); 6638 break; 6639 case 'v': 6640 for (const auto *RC : 6641 {&RISCV::VMRegClass, &RISCV::VRRegClass, &RISCV::VRM2RegClass, 6642 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 6643 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) 6644 return std::make_pair(0U, RC); 6645 } 6646 break; 6647 default: 6648 break; 6649 } 6650 } 6651 6652 // Clang will correctly decode the usage of register name aliases into their 6653 // official names. However, other frontends like `rustc` do not. This allows 6654 // users of these frontends to use the ABI names for registers in LLVM-style 6655 // register constraints. 6656 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 6657 .Case("{zero}", RISCV::X0) 6658 .Case("{ra}", RISCV::X1) 6659 .Case("{sp}", RISCV::X2) 6660 .Case("{gp}", RISCV::X3) 6661 .Case("{tp}", RISCV::X4) 6662 .Case("{t0}", RISCV::X5) 6663 .Case("{t1}", RISCV::X6) 6664 .Case("{t2}", RISCV::X7) 6665 .Cases("{s0}", "{fp}", RISCV::X8) 6666 .Case("{s1}", RISCV::X9) 6667 .Case("{a0}", RISCV::X10) 6668 .Case("{a1}", RISCV::X11) 6669 .Case("{a2}", RISCV::X12) 6670 .Case("{a3}", RISCV::X13) 6671 .Case("{a4}", RISCV::X14) 6672 .Case("{a5}", RISCV::X15) 6673 .Case("{a6}", RISCV::X16) 6674 .Case("{a7}", RISCV::X17) 6675 .Case("{s2}", RISCV::X18) 6676 .Case("{s3}", RISCV::X19) 6677 .Case("{s4}", RISCV::X20) 6678 .Case("{s5}", RISCV::X21) 6679 .Case("{s6}", RISCV::X22) 6680 .Case("{s7}", RISCV::X23) 6681 .Case("{s8}", RISCV::X24) 6682 .Case("{s9}", RISCV::X25) 6683 .Case("{s10}", RISCV::X26) 6684 .Case("{s11}", RISCV::X27) 6685 .Case("{t3}", RISCV::X28) 6686 .Case("{t4}", RISCV::X29) 6687 .Case("{t5}", RISCV::X30) 6688 .Case("{t6}", RISCV::X31) 6689 .Default(RISCV::NoRegister); 6690 if (XRegFromAlias != RISCV::NoRegister) 6691 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 6692 6693 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 6694 // TableGen record rather than the AsmName to choose registers for InlineAsm 6695 // constraints, plus we want to match those names to the widest floating point 6696 // register type available, manually select floating point registers here. 6697 // 6698 // The second case is the ABI name of the register, so that frontends can also 6699 // use the ABI names in register constraint lists. 6700 if (Subtarget.hasStdExtF()) { 6701 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 6702 .Cases("{f0}", "{ft0}", RISCV::F0_F) 6703 .Cases("{f1}", "{ft1}", RISCV::F1_F) 6704 .Cases("{f2}", "{ft2}", RISCV::F2_F) 6705 .Cases("{f3}", "{ft3}", RISCV::F3_F) 6706 .Cases("{f4}", "{ft4}", RISCV::F4_F) 6707 .Cases("{f5}", "{ft5}", RISCV::F5_F) 6708 .Cases("{f6}", "{ft6}", RISCV::F6_F) 6709 .Cases("{f7}", "{ft7}", RISCV::F7_F) 6710 .Cases("{f8}", "{fs0}", RISCV::F8_F) 6711 .Cases("{f9}", "{fs1}", RISCV::F9_F) 6712 .Cases("{f10}", "{fa0}", RISCV::F10_F) 6713 .Cases("{f11}", "{fa1}", RISCV::F11_F) 6714 .Cases("{f12}", "{fa2}", RISCV::F12_F) 6715 .Cases("{f13}", "{fa3}", RISCV::F13_F) 6716 .Cases("{f14}", "{fa4}", RISCV::F14_F) 6717 .Cases("{f15}", "{fa5}", RISCV::F15_F) 6718 .Cases("{f16}", "{fa6}", RISCV::F16_F) 6719 .Cases("{f17}", "{fa7}", RISCV::F17_F) 6720 .Cases("{f18}", "{fs2}", RISCV::F18_F) 6721 .Cases("{f19}", "{fs3}", RISCV::F19_F) 6722 .Cases("{f20}", "{fs4}", RISCV::F20_F) 6723 .Cases("{f21}", "{fs5}", RISCV::F21_F) 6724 .Cases("{f22}", "{fs6}", RISCV::F22_F) 6725 .Cases("{f23}", "{fs7}", RISCV::F23_F) 6726 .Cases("{f24}", "{fs8}", RISCV::F24_F) 6727 .Cases("{f25}", "{fs9}", RISCV::F25_F) 6728 .Cases("{f26}", "{fs10}", RISCV::F26_F) 6729 .Cases("{f27}", "{fs11}", RISCV::F27_F) 6730 .Cases("{f28}", "{ft8}", RISCV::F28_F) 6731 .Cases("{f29}", "{ft9}", RISCV::F29_F) 6732 .Cases("{f30}", "{ft10}", RISCV::F30_F) 6733 .Cases("{f31}", "{ft11}", RISCV::F31_F) 6734 .Default(RISCV::NoRegister); 6735 if (FReg != RISCV::NoRegister) { 6736 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 6737 if (Subtarget.hasStdExtD()) { 6738 unsigned RegNo = FReg - RISCV::F0_F; 6739 unsigned DReg = RISCV::F0_D + RegNo; 6740 return std::make_pair(DReg, &RISCV::FPR64RegClass); 6741 } 6742 return std::make_pair(FReg, &RISCV::FPR32RegClass); 6743 } 6744 } 6745 6746 if (Subtarget.hasStdExtV()) { 6747 Register VReg = StringSwitch<Register>(Constraint.lower()) 6748 .Case("{v0}", RISCV::V0) 6749 .Case("{v1}", RISCV::V1) 6750 .Case("{v2}", RISCV::V2) 6751 .Case("{v3}", RISCV::V3) 6752 .Case("{v4}", RISCV::V4) 6753 .Case("{v5}", RISCV::V5) 6754 .Case("{v6}", RISCV::V6) 6755 .Case("{v7}", RISCV::V7) 6756 .Case("{v8}", RISCV::V8) 6757 .Case("{v9}", RISCV::V9) 6758 .Case("{v10}", RISCV::V10) 6759 .Case("{v11}", RISCV::V11) 6760 .Case("{v12}", RISCV::V12) 6761 .Case("{v13}", RISCV::V13) 6762 .Case("{v14}", RISCV::V14) 6763 .Case("{v15}", RISCV::V15) 6764 .Case("{v16}", RISCV::V16) 6765 .Case("{v17}", RISCV::V17) 6766 .Case("{v18}", RISCV::V18) 6767 .Case("{v19}", RISCV::V19) 6768 .Case("{v20}", RISCV::V20) 6769 .Case("{v21}", RISCV::V21) 6770 .Case("{v22}", RISCV::V22) 6771 .Case("{v23}", RISCV::V23) 6772 .Case("{v24}", RISCV::V24) 6773 .Case("{v25}", RISCV::V25) 6774 .Case("{v26}", RISCV::V26) 6775 .Case("{v27}", RISCV::V27) 6776 .Case("{v28}", RISCV::V28) 6777 .Case("{v29}", RISCV::V29) 6778 .Case("{v30}", RISCV::V30) 6779 .Case("{v31}", RISCV::V31) 6780 .Default(RISCV::NoRegister); 6781 if (VReg != RISCV::NoRegister) { 6782 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy)) 6783 return std::make_pair(VReg, &RISCV::VMRegClass); 6784 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy)) 6785 return std::make_pair(VReg, &RISCV::VRRegClass); 6786 for (const auto *RC : 6787 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 6788 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) { 6789 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC); 6790 return std::make_pair(VReg, RC); 6791 } 6792 } 6793 } 6794 } 6795 6796 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 6797 } 6798 6799 unsigned 6800 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 6801 // Currently only support length 1 constraints. 6802 if (ConstraintCode.size() == 1) { 6803 switch (ConstraintCode[0]) { 6804 case 'A': 6805 return InlineAsm::Constraint_A; 6806 default: 6807 break; 6808 } 6809 } 6810 6811 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 6812 } 6813 6814 void RISCVTargetLowering::LowerAsmOperandForConstraint( 6815 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 6816 SelectionDAG &DAG) const { 6817 // Currently only support length 1 constraints. 6818 if (Constraint.length() == 1) { 6819 switch (Constraint[0]) { 6820 case 'I': 6821 // Validate & create a 12-bit signed immediate operand. 6822 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 6823 uint64_t CVal = C->getSExtValue(); 6824 if (isInt<12>(CVal)) 6825 Ops.push_back( 6826 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 6827 } 6828 return; 6829 case 'J': 6830 // Validate & create an integer zero operand. 6831 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 6832 if (C->getZExtValue() == 0) 6833 Ops.push_back( 6834 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 6835 return; 6836 case 'K': 6837 // Validate & create a 5-bit unsigned immediate operand. 6838 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 6839 uint64_t CVal = C->getZExtValue(); 6840 if (isUInt<5>(CVal)) 6841 Ops.push_back( 6842 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 6843 } 6844 return; 6845 default: 6846 break; 6847 } 6848 } 6849 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 6850 } 6851 6852 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 6853 Instruction *Inst, 6854 AtomicOrdering Ord) const { 6855 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 6856 return Builder.CreateFence(Ord); 6857 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 6858 return Builder.CreateFence(AtomicOrdering::Release); 6859 return nullptr; 6860 } 6861 6862 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 6863 Instruction *Inst, 6864 AtomicOrdering Ord) const { 6865 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 6866 return Builder.CreateFence(AtomicOrdering::Acquire); 6867 return nullptr; 6868 } 6869 6870 TargetLowering::AtomicExpansionKind 6871 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 6872 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 6873 // point operations can't be used in an lr/sc sequence without breaking the 6874 // forward-progress guarantee. 6875 if (AI->isFloatingPointOperation()) 6876 return AtomicExpansionKind::CmpXChg; 6877 6878 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 6879 if (Size == 8 || Size == 16) 6880 return AtomicExpansionKind::MaskedIntrinsic; 6881 return AtomicExpansionKind::None; 6882 } 6883 6884 static Intrinsic::ID 6885 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 6886 if (XLen == 32) { 6887 switch (BinOp) { 6888 default: 6889 llvm_unreachable("Unexpected AtomicRMW BinOp"); 6890 case AtomicRMWInst::Xchg: 6891 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 6892 case AtomicRMWInst::Add: 6893 return Intrinsic::riscv_masked_atomicrmw_add_i32; 6894 case AtomicRMWInst::Sub: 6895 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 6896 case AtomicRMWInst::Nand: 6897 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 6898 case AtomicRMWInst::Max: 6899 return Intrinsic::riscv_masked_atomicrmw_max_i32; 6900 case AtomicRMWInst::Min: 6901 return Intrinsic::riscv_masked_atomicrmw_min_i32; 6902 case AtomicRMWInst::UMax: 6903 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 6904 case AtomicRMWInst::UMin: 6905 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 6906 } 6907 } 6908 6909 if (XLen == 64) { 6910 switch (BinOp) { 6911 default: 6912 llvm_unreachable("Unexpected AtomicRMW BinOp"); 6913 case AtomicRMWInst::Xchg: 6914 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 6915 case AtomicRMWInst::Add: 6916 return Intrinsic::riscv_masked_atomicrmw_add_i64; 6917 case AtomicRMWInst::Sub: 6918 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 6919 case AtomicRMWInst::Nand: 6920 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 6921 case AtomicRMWInst::Max: 6922 return Intrinsic::riscv_masked_atomicrmw_max_i64; 6923 case AtomicRMWInst::Min: 6924 return Intrinsic::riscv_masked_atomicrmw_min_i64; 6925 case AtomicRMWInst::UMax: 6926 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 6927 case AtomicRMWInst::UMin: 6928 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 6929 } 6930 } 6931 6932 llvm_unreachable("Unexpected XLen\n"); 6933 } 6934 6935 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 6936 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 6937 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 6938 unsigned XLen = Subtarget.getXLen(); 6939 Value *Ordering = 6940 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 6941 Type *Tys[] = {AlignedAddr->getType()}; 6942 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 6943 AI->getModule(), 6944 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 6945 6946 if (XLen == 64) { 6947 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 6948 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 6949 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 6950 } 6951 6952 Value *Result; 6953 6954 // Must pass the shift amount needed to sign extend the loaded value prior 6955 // to performing a signed comparison for min/max. ShiftAmt is the number of 6956 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 6957 // is the number of bits to left+right shift the value in order to 6958 // sign-extend. 6959 if (AI->getOperation() == AtomicRMWInst::Min || 6960 AI->getOperation() == AtomicRMWInst::Max) { 6961 const DataLayout &DL = AI->getModule()->getDataLayout(); 6962 unsigned ValWidth = 6963 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 6964 Value *SextShamt = 6965 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 6966 Result = Builder.CreateCall(LrwOpScwLoop, 6967 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 6968 } else { 6969 Result = 6970 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 6971 } 6972 6973 if (XLen == 64) 6974 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 6975 return Result; 6976 } 6977 6978 TargetLowering::AtomicExpansionKind 6979 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 6980 AtomicCmpXchgInst *CI) const { 6981 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 6982 if (Size == 8 || Size == 16) 6983 return AtomicExpansionKind::MaskedIntrinsic; 6984 return AtomicExpansionKind::None; 6985 } 6986 6987 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 6988 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 6989 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 6990 unsigned XLen = Subtarget.getXLen(); 6991 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 6992 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 6993 if (XLen == 64) { 6994 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 6995 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 6996 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 6997 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 6998 } 6999 Type *Tys[] = {AlignedAddr->getType()}; 7000 Function *MaskedCmpXchg = 7001 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 7002 Value *Result = Builder.CreateCall( 7003 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 7004 if (XLen == 64) 7005 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 7006 return Result; 7007 } 7008 7009 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { 7010 return false; 7011 } 7012 7013 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 7014 EVT VT) const { 7015 VT = VT.getScalarType(); 7016 7017 if (!VT.isSimple()) 7018 return false; 7019 7020 switch (VT.getSimpleVT().SimpleTy) { 7021 case MVT::f16: 7022 return Subtarget.hasStdExtZfh(); 7023 case MVT::f32: 7024 return Subtarget.hasStdExtF(); 7025 case MVT::f64: 7026 return Subtarget.hasStdExtD(); 7027 default: 7028 break; 7029 } 7030 7031 return false; 7032 } 7033 7034 Register RISCVTargetLowering::getExceptionPointerRegister( 7035 const Constant *PersonalityFn) const { 7036 return RISCV::X10; 7037 } 7038 7039 Register RISCVTargetLowering::getExceptionSelectorRegister( 7040 const Constant *PersonalityFn) const { 7041 return RISCV::X11; 7042 } 7043 7044 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 7045 // Return false to suppress the unnecessary extensions if the LibCall 7046 // arguments or return value is f32 type for LP64 ABI. 7047 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 7048 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 7049 return false; 7050 7051 return true; 7052 } 7053 7054 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 7055 if (Subtarget.is64Bit() && Type == MVT::i32) 7056 return true; 7057 7058 return IsSigned; 7059 } 7060 7061 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 7062 SDValue C) const { 7063 // Check integral scalar types. 7064 if (VT.isScalarInteger()) { 7065 // Omit the optimization if the sub target has the M extension and the data 7066 // size exceeds XLen. 7067 if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) 7068 return false; 7069 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 7070 // Break the MUL to a SLLI and an ADD/SUB. 7071 const APInt &Imm = ConstNode->getAPIntValue(); 7072 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 7073 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 7074 return true; 7075 // Omit the following optimization if the sub target has the M extension 7076 // and the data size >= XLen. 7077 if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) 7078 return false; 7079 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 7080 // a pair of LUI/ADDI. 7081 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { 7082 APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); 7083 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 7084 (1 - ImmS).isPowerOf2()) 7085 return true; 7086 } 7087 } 7088 } 7089 7090 return false; 7091 } 7092 7093 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { 7094 if (!Subtarget.useRVVForFixedLengthVectors()) 7095 return false; 7096 7097 if (!VT.isFixedLengthVector()) 7098 return false; 7099 7100 // Don't use RVV for vectors we cannot scalarize if required. 7101 switch (VT.getVectorElementType().SimpleTy) { 7102 // i1 is supported but has different rules. 7103 default: 7104 return false; 7105 case MVT::i1: 7106 // Masks can only use a single register. 7107 if (VT.getVectorNumElements() > Subtarget.getMinRVVVectorSizeInBits()) 7108 return false; 7109 break; 7110 case MVT::i8: 7111 case MVT::i16: 7112 case MVT::i32: 7113 case MVT::i64: 7114 break; 7115 case MVT::f16: 7116 if (!Subtarget.hasStdExtZfh()) 7117 return false; 7118 break; 7119 case MVT::f32: 7120 if (!Subtarget.hasStdExtF()) 7121 return false; 7122 break; 7123 case MVT::f64: 7124 if (!Subtarget.hasStdExtD()) 7125 return false; 7126 break; 7127 } 7128 7129 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 7130 // Don't use RVV for types that don't fit. 7131 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) 7132 return false; 7133 7134 // TODO: Perhaps an artificial restriction, but worth having whilst getting 7135 // the base fixed length RVV support in place. 7136 if (!VT.isPow2VectorType()) 7137 return false; 7138 7139 return true; 7140 } 7141 7142 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( 7143 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 7144 bool *Fast) const { 7145 if (!VT.isScalableVector()) 7146 return false; 7147 7148 EVT ElemVT = VT.getVectorElementType(); 7149 if (Alignment >= ElemVT.getStoreSize()) { 7150 if (Fast) 7151 *Fast = true; 7152 return true; 7153 } 7154 7155 return false; 7156 } 7157 7158 bool RISCVTargetLowering::splitValueIntoRegisterParts( 7159 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 7160 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const { 7161 EVT ValueVT = Val.getValueType(); 7162 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 7163 LLVMContext &Context = *DAG.getContext(); 7164 EVT ValueEltVT = ValueVT.getVectorElementType(); 7165 EVT PartEltVT = PartVT.getVectorElementType(); 7166 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize(); 7167 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize(); 7168 if (PartVTBitSize % ValueVTBitSize == 0) { 7169 // If the element types are different, bitcast to the same element type of 7170 // PartVT first. 7171 if (ValueEltVT != PartEltVT) { 7172 unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits(); 7173 assert(Count != 0 && "The number of element should not be zero."); 7174 EVT SameEltTypeVT = 7175 EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true); 7176 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val); 7177 } 7178 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), 7179 Val, DAG.getConstant(0, DL, Subtarget.getXLenVT())); 7180 Parts[0] = Val; 7181 return true; 7182 } 7183 } 7184 return false; 7185 } 7186 7187 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( 7188 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, 7189 MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const { 7190 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 7191 LLVMContext &Context = *DAG.getContext(); 7192 SDValue Val = Parts[0]; 7193 EVT ValueEltVT = ValueVT.getVectorElementType(); 7194 EVT PartEltVT = PartVT.getVectorElementType(); 7195 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize(); 7196 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize(); 7197 if (PartVTBitSize % ValueVTBitSize == 0) { 7198 EVT SameEltTypeVT = ValueVT; 7199 // If the element types are different, convert it to the same element type 7200 // of PartVT. 7201 if (ValueEltVT != PartEltVT) { 7202 unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits(); 7203 assert(Count != 0 && "The number of element should not be zero."); 7204 SameEltTypeVT = 7205 EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true); 7206 } 7207 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SameEltTypeVT, Val, 7208 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 7209 if (ValueEltVT != PartEltVT) 7210 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 7211 return Val; 7212 } 7213 } 7214 return SDValue(); 7215 } 7216 7217 #define GET_REGISTER_MATCHER 7218 #include "RISCVGenAsmMatcher.inc" 7219 7220 Register 7221 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 7222 const MachineFunction &MF) const { 7223 Register Reg = MatchRegisterAltName(RegName); 7224 if (Reg == RISCV::NoRegister) 7225 Reg = MatchRegisterName(RegName); 7226 if (Reg == RISCV::NoRegister) 7227 report_fatal_error( 7228 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 7229 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 7230 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 7231 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 7232 StringRef(RegName) + "\".")); 7233 return Reg; 7234 } 7235 7236 namespace llvm { 7237 namespace RISCVVIntrinsicsTable { 7238 7239 #define GET_RISCVVIntrinsicsTable_IMPL 7240 #include "RISCVGenSearchableTables.inc" 7241 7242 } // namespace RISCVVIntrinsicsTable 7243 7244 } // namespace llvm 7245