1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/IntrinsicsRISCV.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/KnownBits.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "riscv-lower" 42 43 STATISTIC(NumTailCalls, "Number of tail calls"); 44 45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 46 const RISCVSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 if (Subtarget.isRV32E()) 50 report_fatal_error("Codegen not yet implemented for RV32E"); 51 52 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 53 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 54 55 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 56 !Subtarget.hasStdExtF()) { 57 errs() << "Hard-float 'f' ABI can't be used for a target that " 58 "doesn't support the F instruction set extension (ignoring " 59 "target-abi)\n"; 60 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 61 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 62 !Subtarget.hasStdExtD()) { 63 errs() << "Hard-float 'd' ABI can't be used for a target that " 64 "doesn't support the D instruction set extension (ignoring " 65 "target-abi)\n"; 66 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 67 } 68 69 switch (ABI) { 70 default: 71 report_fatal_error("Don't know how to lower this ABI"); 72 case RISCVABI::ABI_ILP32: 73 case RISCVABI::ABI_ILP32F: 74 case RISCVABI::ABI_ILP32D: 75 case RISCVABI::ABI_LP64: 76 case RISCVABI::ABI_LP64F: 77 case RISCVABI::ABI_LP64D: 78 break; 79 } 80 81 MVT XLenVT = Subtarget.getXLenVT(); 82 83 // Set up the register classes. 84 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 85 86 if (Subtarget.hasStdExtZfh()) 87 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 88 if (Subtarget.hasStdExtF()) 89 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 90 if (Subtarget.hasStdExtD()) 91 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 92 93 static const MVT::SimpleValueType BoolVecVTs[] = { 94 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 95 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 96 static const MVT::SimpleValueType IntVecVTs[] = { 97 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 98 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 99 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 100 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 101 MVT::nxv4i64, MVT::nxv8i64}; 102 static const MVT::SimpleValueType F16VecVTs[] = { 103 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 104 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 105 static const MVT::SimpleValueType F32VecVTs[] = { 106 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 107 static const MVT::SimpleValueType F64VecVTs[] = { 108 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 109 110 if (Subtarget.hasStdExtV()) { 111 auto addRegClassForRVV = [this](MVT VT) { 112 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 113 assert(Size <= 512 && isPowerOf2_32(Size)); 114 const TargetRegisterClass *RC; 115 if (Size <= 64) 116 RC = &RISCV::VRRegClass; 117 else if (Size == 128) 118 RC = &RISCV::VRM2RegClass; 119 else if (Size == 256) 120 RC = &RISCV::VRM4RegClass; 121 else 122 RC = &RISCV::VRM8RegClass; 123 124 addRegisterClass(VT, RC); 125 }; 126 127 for (MVT VT : BoolVecVTs) 128 addRegClassForRVV(VT); 129 for (MVT VT : IntVecVTs) 130 addRegClassForRVV(VT); 131 132 if (Subtarget.hasStdExtZfh()) 133 for (MVT VT : F16VecVTs) 134 addRegClassForRVV(VT); 135 136 if (Subtarget.hasStdExtF()) 137 for (MVT VT : F32VecVTs) 138 addRegClassForRVV(VT); 139 140 if (Subtarget.hasStdExtD()) 141 for (MVT VT : F64VecVTs) 142 addRegClassForRVV(VT); 143 144 if (Subtarget.useRVVForFixedLengthVectors()) { 145 auto addRegClassForFixedVectors = [this](MVT VT) { 146 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 147 const TargetRegisterClass *RC; 148 if (LMul == 1 || VT.getVectorElementType() == MVT::i1) 149 RC = &RISCV::VRRegClass; 150 else if (LMul == 2) 151 RC = &RISCV::VRM2RegClass; 152 else if (LMul == 4) 153 RC = &RISCV::VRM4RegClass; 154 else if (LMul == 8) 155 RC = &RISCV::VRM8RegClass; 156 else 157 llvm_unreachable("Unexpected LMul!"); 158 159 addRegisterClass(VT, RC); 160 }; 161 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 162 if (useRVVForFixedLengthVectorVT(VT)) 163 addRegClassForFixedVectors(VT); 164 165 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) 166 if (useRVVForFixedLengthVectorVT(VT)) 167 addRegClassForFixedVectors(VT); 168 } 169 } 170 171 // Compute derived properties from the register classes. 172 computeRegisterProperties(STI.getRegisterInfo()); 173 174 setStackPointerRegisterToSaveRestore(RISCV::X2); 175 176 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 177 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 178 179 // TODO: add all necessary setOperationAction calls. 180 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 181 182 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 183 setOperationAction(ISD::BR_CC, XLenVT, Expand); 184 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 185 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 186 187 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 188 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 189 190 setOperationAction(ISD::VASTART, MVT::Other, Custom); 191 setOperationAction(ISD::VAARG, MVT::Other, Expand); 192 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 193 setOperationAction(ISD::VAEND, MVT::Other, Expand); 194 195 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 196 if (!Subtarget.hasStdExtZbb()) { 197 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 198 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 199 } 200 201 if (Subtarget.is64Bit()) { 202 setOperationAction(ISD::ADD, MVT::i32, Custom); 203 setOperationAction(ISD::SUB, MVT::i32, Custom); 204 setOperationAction(ISD::SHL, MVT::i32, Custom); 205 setOperationAction(ISD::SRA, MVT::i32, Custom); 206 setOperationAction(ISD::SRL, MVT::i32, Custom); 207 208 setOperationAction(ISD::UADDO, MVT::i32, Custom); 209 setOperationAction(ISD::USUBO, MVT::i32, Custom); 210 setOperationAction(ISD::UADDSAT, MVT::i32, Custom); 211 setOperationAction(ISD::USUBSAT, MVT::i32, Custom); 212 } 213 214 if (!Subtarget.hasStdExtM()) { 215 setOperationAction(ISD::MUL, XLenVT, Expand); 216 setOperationAction(ISD::MULHS, XLenVT, Expand); 217 setOperationAction(ISD::MULHU, XLenVT, Expand); 218 setOperationAction(ISD::SDIV, XLenVT, Expand); 219 setOperationAction(ISD::UDIV, XLenVT, Expand); 220 setOperationAction(ISD::SREM, XLenVT, Expand); 221 setOperationAction(ISD::UREM, XLenVT, Expand); 222 } 223 224 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 225 setOperationAction(ISD::MUL, MVT::i32, Custom); 226 227 setOperationAction(ISD::SDIV, MVT::i8, Custom); 228 setOperationAction(ISD::UDIV, MVT::i8, Custom); 229 setOperationAction(ISD::UREM, MVT::i8, Custom); 230 setOperationAction(ISD::SDIV, MVT::i16, Custom); 231 setOperationAction(ISD::UDIV, MVT::i16, Custom); 232 setOperationAction(ISD::UREM, MVT::i16, Custom); 233 setOperationAction(ISD::SDIV, MVT::i32, Custom); 234 setOperationAction(ISD::UDIV, MVT::i32, Custom); 235 setOperationAction(ISD::UREM, MVT::i32, Custom); 236 } 237 238 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 239 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 240 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 241 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 242 243 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 244 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 245 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 246 247 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { 248 if (Subtarget.is64Bit()) { 249 setOperationAction(ISD::ROTL, MVT::i32, Custom); 250 setOperationAction(ISD::ROTR, MVT::i32, Custom); 251 } 252 } else { 253 setOperationAction(ISD::ROTL, XLenVT, Expand); 254 setOperationAction(ISD::ROTR, XLenVT, Expand); 255 } 256 257 if (Subtarget.hasStdExtZbp()) { 258 // Custom lower bswap/bitreverse so we can convert them to GREVI to enable 259 // more combining. 260 setOperationAction(ISD::BITREVERSE, XLenVT, Custom); 261 setOperationAction(ISD::BSWAP, XLenVT, Custom); 262 263 if (Subtarget.is64Bit()) { 264 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 265 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 266 } 267 } else { 268 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 269 // pattern match it directly in isel. 270 setOperationAction(ISD::BSWAP, XLenVT, 271 Subtarget.hasStdExtZbb() ? Legal : Expand); 272 } 273 274 if (Subtarget.hasStdExtZbb()) { 275 setOperationAction(ISD::SMIN, XLenVT, Legal); 276 setOperationAction(ISD::SMAX, XLenVT, Legal); 277 setOperationAction(ISD::UMIN, XLenVT, Legal); 278 setOperationAction(ISD::UMAX, XLenVT, Legal); 279 } else { 280 setOperationAction(ISD::CTTZ, XLenVT, Expand); 281 setOperationAction(ISD::CTLZ, XLenVT, Expand); 282 setOperationAction(ISD::CTPOP, XLenVT, Expand); 283 } 284 285 if (Subtarget.hasStdExtZbt()) { 286 setOperationAction(ISD::FSHL, XLenVT, Custom); 287 setOperationAction(ISD::FSHR, XLenVT, Custom); 288 setOperationAction(ISD::SELECT, XLenVT, Legal); 289 290 if (Subtarget.is64Bit()) { 291 setOperationAction(ISD::FSHL, MVT::i32, Custom); 292 setOperationAction(ISD::FSHR, MVT::i32, Custom); 293 } 294 } else { 295 setOperationAction(ISD::SELECT, XLenVT, Custom); 296 } 297 298 ISD::CondCode FPCCToExpand[] = { 299 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 300 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 301 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 302 303 ISD::NodeType FPOpToExpand[] = { 304 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, 305 ISD::FP_TO_FP16}; 306 307 if (Subtarget.hasStdExtZfh()) 308 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 309 310 if (Subtarget.hasStdExtZfh()) { 311 setOperationAction(ISD::FMINNUM, MVT::f16, Legal); 312 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); 313 for (auto CC : FPCCToExpand) 314 setCondCodeAction(CC, MVT::f16, Expand); 315 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 316 setOperationAction(ISD::SELECT, MVT::f16, Custom); 317 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 318 for (auto Op : FPOpToExpand) 319 setOperationAction(Op, MVT::f16, Expand); 320 } 321 322 if (Subtarget.hasStdExtF()) { 323 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 324 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 325 for (auto CC : FPCCToExpand) 326 setCondCodeAction(CC, MVT::f32, Expand); 327 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 328 setOperationAction(ISD::SELECT, MVT::f32, Custom); 329 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 330 for (auto Op : FPOpToExpand) 331 setOperationAction(Op, MVT::f32, Expand); 332 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 333 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 334 } 335 336 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 337 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 338 339 if (Subtarget.hasStdExtD()) { 340 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 341 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 342 for (auto CC : FPCCToExpand) 343 setCondCodeAction(CC, MVT::f64, Expand); 344 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 345 setOperationAction(ISD::SELECT, MVT::f64, Custom); 346 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 347 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 348 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 349 for (auto Op : FPOpToExpand) 350 setOperationAction(Op, MVT::f64, Expand); 351 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 352 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 353 } 354 355 if (Subtarget.is64Bit()) { 356 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 357 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 358 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 359 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 360 } 361 362 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 363 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 364 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 365 setOperationAction(ISD::JumpTable, XLenVT, Custom); 366 367 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 368 369 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 370 // Unfortunately this can't be determined just from the ISA naming string. 371 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 372 Subtarget.is64Bit() ? Legal : Custom); 373 374 setOperationAction(ISD::TRAP, MVT::Other, Legal); 375 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 376 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 377 378 if (Subtarget.hasStdExtA()) { 379 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 380 setMinCmpXchgSizeInBits(32); 381 } else { 382 setMaxAtomicSizeInBitsSupported(0); 383 } 384 385 setBooleanContents(ZeroOrOneBooleanContent); 386 387 if (Subtarget.hasStdExtV()) { 388 setBooleanVectorContents(ZeroOrOneBooleanContent); 389 390 setOperationAction(ISD::VSCALE, XLenVT, Custom); 391 392 // RVV intrinsics may have illegal operands. 393 // We also need to custom legalize vmv.x.s. 394 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); 395 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); 396 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 397 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); 398 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 399 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 400 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 401 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 402 403 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 404 405 if (!Subtarget.is64Bit()) { 406 // We must custom-lower certain vXi64 operations on RV32 due to the vector 407 // element type being illegal. 408 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); 409 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); 410 411 setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom); 412 setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom); 413 setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom); 414 setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom); 415 setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom); 416 setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom); 417 setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom); 418 setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom); 419 } 420 421 for (MVT VT : BoolVecVTs) { 422 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 423 424 // Mask VTs are custom-expanded into a series of standard nodes 425 setOperationAction(ISD::TRUNCATE, VT, Custom); 426 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 427 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 428 429 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 430 } 431 432 for (MVT VT : IntVecVTs) { 433 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 434 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); 435 436 setOperationAction(ISD::SMIN, VT, Legal); 437 setOperationAction(ISD::SMAX, VT, Legal); 438 setOperationAction(ISD::UMIN, VT, Legal); 439 setOperationAction(ISD::UMAX, VT, Legal); 440 441 setOperationAction(ISD::ROTL, VT, Expand); 442 setOperationAction(ISD::ROTR, VT, Expand); 443 444 // Custom-lower extensions and truncations from/to mask types. 445 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 446 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 447 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 448 449 // RVV has native int->float & float->int conversions where the 450 // element type sizes are within one power-of-two of each other. Any 451 // wider distances between type sizes have to be lowered as sequences 452 // which progressively narrow the gap in stages. 453 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 454 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 455 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 456 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 457 458 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" 459 // nodes which truncate by one power of two at a time. 460 setOperationAction(ISD::TRUNCATE, VT, Custom); 461 462 // Custom-lower insert/extract operations to simplify patterns. 463 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 464 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 465 466 // Custom-lower reduction operations to set up the corresponding custom 467 // nodes' operands. 468 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 469 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 470 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 471 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 472 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 473 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 474 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 475 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 476 477 setOperationAction(ISD::MLOAD, VT, Custom); 478 setOperationAction(ISD::MSTORE, VT, Custom); 479 setOperationAction(ISD::MGATHER, VT, Custom); 480 setOperationAction(ISD::MSCATTER, VT, Custom); 481 482 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 483 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 484 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 485 486 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 487 } 488 489 // Expand various CCs to best match the RVV ISA, which natively supports UNE 490 // but no other unordered comparisons, and supports all ordered comparisons 491 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 492 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 493 // and we pattern-match those back to the "original", swapping operands once 494 // more. This way we catch both operations and both "vf" and "fv" forms with 495 // fewer patterns. 496 ISD::CondCode VFPCCToExpand[] = { 497 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 498 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 499 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 500 }; 501 502 // Sets common operation actions on RVV floating-point vector types. 503 const auto SetCommonVFPActions = [&](MVT VT) { 504 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 505 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 506 // sizes are within one power-of-two of each other. Therefore conversions 507 // between vXf16 and vXf64 must be lowered as sequences which convert via 508 // vXf32. 509 setOperationAction(ISD::FP_ROUND, VT, Custom); 510 setOperationAction(ISD::FP_EXTEND, VT, Custom); 511 // Custom-lower insert/extract operations to simplify patterns. 512 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 513 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 514 // Expand various condition codes (explained above). 515 for (auto CC : VFPCCToExpand) 516 setCondCodeAction(CC, VT, Expand); 517 518 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 519 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 520 setOperationAction(ISD::FCOPYSIGN, VT, Legal); 521 522 setOperationAction(ISD::MLOAD, VT, Custom); 523 setOperationAction(ISD::MSTORE, VT, Custom); 524 setOperationAction(ISD::MGATHER, VT, Custom); 525 setOperationAction(ISD::MSCATTER, VT, Custom); 526 527 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 528 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 529 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 530 531 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 532 }; 533 534 if (Subtarget.hasStdExtZfh()) 535 for (MVT VT : F16VecVTs) 536 SetCommonVFPActions(VT); 537 538 if (Subtarget.hasStdExtF()) 539 for (MVT VT : F32VecVTs) 540 SetCommonVFPActions(VT); 541 542 if (Subtarget.hasStdExtD()) 543 for (MVT VT : F64VecVTs) 544 SetCommonVFPActions(VT); 545 546 if (Subtarget.useRVVForFixedLengthVectors()) { 547 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 548 if (!useRVVForFixedLengthVectorVT(VT)) 549 continue; 550 551 // By default everything must be expanded. 552 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 553 setOperationAction(Op, VT, Expand); 554 for (MVT OtherVT : MVT::fixedlen_vector_valuetypes()) 555 setTruncStoreAction(VT, OtherVT, Expand); 556 557 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 558 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 559 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 560 561 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 562 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 563 564 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 565 566 setOperationAction(ISD::LOAD, VT, Custom); 567 setOperationAction(ISD::STORE, VT, Custom); 568 569 setOperationAction(ISD::SETCC, VT, Custom); 570 571 setOperationAction(ISD::TRUNCATE, VT, Custom); 572 573 setOperationAction(ISD::BITCAST, VT, Custom); 574 575 // Operations below are different for between masks and other vectors. 576 if (VT.getVectorElementType() == MVT::i1) { 577 setOperationAction(ISD::AND, VT, Custom); 578 setOperationAction(ISD::OR, VT, Custom); 579 setOperationAction(ISD::XOR, VT, Custom); 580 continue; 581 } 582 583 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 584 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 585 586 setOperationAction(ISD::MLOAD, VT, Custom); 587 setOperationAction(ISD::MSTORE, VT, Custom); 588 setOperationAction(ISD::MGATHER, VT, Custom); 589 setOperationAction(ISD::MSCATTER, VT, Custom); 590 setOperationAction(ISD::ADD, VT, Custom); 591 setOperationAction(ISD::MUL, VT, Custom); 592 setOperationAction(ISD::SUB, VT, Custom); 593 setOperationAction(ISD::AND, VT, Custom); 594 setOperationAction(ISD::OR, VT, Custom); 595 setOperationAction(ISD::XOR, VT, Custom); 596 setOperationAction(ISD::SDIV, VT, Custom); 597 setOperationAction(ISD::SREM, VT, Custom); 598 setOperationAction(ISD::UDIV, VT, Custom); 599 setOperationAction(ISD::UREM, VT, Custom); 600 setOperationAction(ISD::SHL, VT, Custom); 601 setOperationAction(ISD::SRA, VT, Custom); 602 setOperationAction(ISD::SRL, VT, Custom); 603 604 setOperationAction(ISD::SMIN, VT, Custom); 605 setOperationAction(ISD::SMAX, VT, Custom); 606 setOperationAction(ISD::UMIN, VT, Custom); 607 setOperationAction(ISD::UMAX, VT, Custom); 608 setOperationAction(ISD::ABS, VT, Custom); 609 610 setOperationAction(ISD::MULHS, VT, Custom); 611 setOperationAction(ISD::MULHU, VT, Custom); 612 613 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 614 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 615 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 616 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 617 618 setOperationAction(ISD::VSELECT, VT, Custom); 619 620 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 621 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 622 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 623 624 // Custom-lower reduction operations to set up the corresponding custom 625 // nodes' operands. 626 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 627 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 628 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 629 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 630 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 631 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 632 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 633 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 634 } 635 636 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { 637 if (!useRVVForFixedLengthVectorVT(VT)) 638 continue; 639 640 // By default everything must be expanded. 641 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 642 setOperationAction(Op, VT, Expand); 643 for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) { 644 setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); 645 setTruncStoreAction(VT, OtherVT, Expand); 646 } 647 648 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 649 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 650 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 651 652 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 653 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 654 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 655 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 656 657 setOperationAction(ISD::LOAD, VT, Custom); 658 setOperationAction(ISD::STORE, VT, Custom); 659 setOperationAction(ISD::MLOAD, VT, Custom); 660 setOperationAction(ISD::MSTORE, VT, Custom); 661 setOperationAction(ISD::MGATHER, VT, Custom); 662 setOperationAction(ISD::MSCATTER, VT, Custom); 663 setOperationAction(ISD::FADD, VT, Custom); 664 setOperationAction(ISD::FSUB, VT, Custom); 665 setOperationAction(ISD::FMUL, VT, Custom); 666 setOperationAction(ISD::FDIV, VT, Custom); 667 setOperationAction(ISD::FNEG, VT, Custom); 668 setOperationAction(ISD::FABS, VT, Custom); 669 setOperationAction(ISD::FCOPYSIGN, VT, Custom); 670 setOperationAction(ISD::FSQRT, VT, Custom); 671 setOperationAction(ISD::FMA, VT, Custom); 672 673 setOperationAction(ISD::FP_ROUND, VT, Custom); 674 setOperationAction(ISD::FP_EXTEND, VT, Custom); 675 676 for (auto CC : VFPCCToExpand) 677 setCondCodeAction(CC, VT, Expand); 678 679 setOperationAction(ISD::VSELECT, VT, Custom); 680 681 setOperationAction(ISD::BITCAST, VT, Custom); 682 683 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 684 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 685 } 686 } 687 } 688 689 // Function alignments. 690 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 691 setMinFunctionAlignment(FunctionAlignment); 692 setPrefFunctionAlignment(FunctionAlignment); 693 694 setMinimumJumpTableEntries(5); 695 696 // Jumps are expensive, compared to logic 697 setJumpIsExpensive(); 698 699 // We can use any register for comparisons 700 setHasMultipleConditionRegisters(); 701 702 if (Subtarget.hasStdExtZbp()) { 703 setTargetDAGCombine(ISD::OR); 704 } 705 if (Subtarget.hasStdExtV()) { 706 setTargetDAGCombine(ISD::FCOPYSIGN); 707 setTargetDAGCombine(ISD::MGATHER); 708 setTargetDAGCombine(ISD::MSCATTER); 709 } 710 } 711 712 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 713 LLVMContext &Context, 714 EVT VT) const { 715 if (!VT.isVector()) 716 return getPointerTy(DL); 717 if (Subtarget.hasStdExtV() && 718 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) 719 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 720 return VT.changeVectorElementTypeToInteger(); 721 } 722 723 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 724 const CallInst &I, 725 MachineFunction &MF, 726 unsigned Intrinsic) const { 727 switch (Intrinsic) { 728 default: 729 return false; 730 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 731 case Intrinsic::riscv_masked_atomicrmw_add_i32: 732 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 733 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 734 case Intrinsic::riscv_masked_atomicrmw_max_i32: 735 case Intrinsic::riscv_masked_atomicrmw_min_i32: 736 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 737 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 738 case Intrinsic::riscv_masked_cmpxchg_i32: 739 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 740 Info.opc = ISD::INTRINSIC_W_CHAIN; 741 Info.memVT = MVT::getVT(PtrTy->getElementType()); 742 Info.ptrVal = I.getArgOperand(0); 743 Info.offset = 0; 744 Info.align = Align(4); 745 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 746 MachineMemOperand::MOVolatile; 747 return true; 748 } 749 } 750 751 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 752 const AddrMode &AM, Type *Ty, 753 unsigned AS, 754 Instruction *I) const { 755 // No global is ever allowed as a base. 756 if (AM.BaseGV) 757 return false; 758 759 // Require a 12-bit signed offset. 760 if (!isInt<12>(AM.BaseOffs)) 761 return false; 762 763 switch (AM.Scale) { 764 case 0: // "r+i" or just "i", depending on HasBaseReg. 765 break; 766 case 1: 767 if (!AM.HasBaseReg) // allow "r+i". 768 break; 769 return false; // disallow "r+r" or "r+r+i". 770 default: 771 return false; 772 } 773 774 return true; 775 } 776 777 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 778 return isInt<12>(Imm); 779 } 780 781 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 782 return isInt<12>(Imm); 783 } 784 785 // On RV32, 64-bit integers are split into their high and low parts and held 786 // in two different registers, so the trunc is free since the low register can 787 // just be used. 788 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 789 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 790 return false; 791 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 792 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 793 return (SrcBits == 64 && DestBits == 32); 794 } 795 796 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 797 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 798 !SrcVT.isInteger() || !DstVT.isInteger()) 799 return false; 800 unsigned SrcBits = SrcVT.getSizeInBits(); 801 unsigned DestBits = DstVT.getSizeInBits(); 802 return (SrcBits == 64 && DestBits == 32); 803 } 804 805 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 806 // Zexts are free if they can be combined with a load. 807 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 808 EVT MemVT = LD->getMemoryVT(); 809 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 810 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 811 (LD->getExtensionType() == ISD::NON_EXTLOAD || 812 LD->getExtensionType() == ISD::ZEXTLOAD)) 813 return true; 814 } 815 816 return TargetLowering::isZExtFree(Val, VT2); 817 } 818 819 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 820 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 821 } 822 823 bool RISCVTargetLowering::isCheapToSpeculateCttz() const { 824 return Subtarget.hasStdExtZbb(); 825 } 826 827 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { 828 return Subtarget.hasStdExtZbb(); 829 } 830 831 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 832 bool ForCodeSize) const { 833 if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) 834 return false; 835 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 836 return false; 837 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 838 return false; 839 if (Imm.isNegZero()) 840 return false; 841 return Imm.isZero(); 842 } 843 844 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 845 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || 846 (VT == MVT::f32 && Subtarget.hasStdExtF()) || 847 (VT == MVT::f64 && Subtarget.hasStdExtD()); 848 } 849 850 // Changes the condition code and swaps operands if necessary, so the SetCC 851 // operation matches one of the comparisons supported directly by branches 852 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare 853 // with 1/-1. 854 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, 855 ISD::CondCode &CC, SelectionDAG &DAG) { 856 // Convert X > -1 to X >= 0. 857 if (CC == ISD::SETGT && isAllOnesConstant(RHS)) { 858 RHS = DAG.getConstant(0, DL, RHS.getValueType()); 859 CC = ISD::SETGE; 860 return; 861 } 862 // Convert X < 1 to 0 >= X. 863 if (CC == ISD::SETLT && isOneConstant(RHS)) { 864 RHS = LHS; 865 LHS = DAG.getConstant(0, DL, RHS.getValueType()); 866 CC = ISD::SETGE; 867 return; 868 } 869 870 switch (CC) { 871 default: 872 break; 873 case ISD::SETGT: 874 case ISD::SETLE: 875 case ISD::SETUGT: 876 case ISD::SETULE: 877 CC = ISD::getSetCCSwappedOperands(CC); 878 std::swap(LHS, RHS); 879 break; 880 } 881 } 882 883 // Return the RISC-V branch opcode that matches the given DAG integer 884 // condition code. The CondCode must be one of those supported by the RISC-V 885 // ISA (see translateSetCCForBranch). 886 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 887 switch (CC) { 888 default: 889 llvm_unreachable("Unsupported CondCode"); 890 case ISD::SETEQ: 891 return RISCV::BEQ; 892 case ISD::SETNE: 893 return RISCV::BNE; 894 case ISD::SETLT: 895 return RISCV::BLT; 896 case ISD::SETGE: 897 return RISCV::BGE; 898 case ISD::SETULT: 899 return RISCV::BLTU; 900 case ISD::SETUGE: 901 return RISCV::BGEU; 902 } 903 } 904 905 RISCVVLMUL RISCVTargetLowering::getLMUL(MVT VT) { 906 assert(VT.isScalableVector() && "Expecting a scalable vector type"); 907 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); 908 if (VT.getVectorElementType() == MVT::i1) 909 KnownSize *= 8; 910 911 switch (KnownSize) { 912 default: 913 llvm_unreachable("Invalid LMUL."); 914 case 8: 915 return RISCVVLMUL::LMUL_F8; 916 case 16: 917 return RISCVVLMUL::LMUL_F4; 918 case 32: 919 return RISCVVLMUL::LMUL_F2; 920 case 64: 921 return RISCVVLMUL::LMUL_1; 922 case 128: 923 return RISCVVLMUL::LMUL_2; 924 case 256: 925 return RISCVVLMUL::LMUL_4; 926 case 512: 927 return RISCVVLMUL::LMUL_8; 928 } 929 } 930 931 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVVLMUL LMul) { 932 switch (LMul) { 933 default: 934 llvm_unreachable("Invalid LMUL."); 935 case RISCVVLMUL::LMUL_F8: 936 case RISCVVLMUL::LMUL_F4: 937 case RISCVVLMUL::LMUL_F2: 938 case RISCVVLMUL::LMUL_1: 939 return RISCV::VRRegClassID; 940 case RISCVVLMUL::LMUL_2: 941 return RISCV::VRM2RegClassID; 942 case RISCVVLMUL::LMUL_4: 943 return RISCV::VRM4RegClassID; 944 case RISCVVLMUL::LMUL_8: 945 return RISCV::VRM8RegClassID; 946 } 947 } 948 949 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { 950 RISCVVLMUL LMUL = getLMUL(VT); 951 if (LMUL == RISCVVLMUL::LMUL_F8 || LMUL == RISCVVLMUL::LMUL_F4 || 952 LMUL == RISCVVLMUL::LMUL_F2 || LMUL == RISCVVLMUL::LMUL_1) { 953 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 954 "Unexpected subreg numbering"); 955 return RISCV::sub_vrm1_0 + Index; 956 } 957 if (LMUL == RISCVVLMUL::LMUL_2) { 958 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 959 "Unexpected subreg numbering"); 960 return RISCV::sub_vrm2_0 + Index; 961 } 962 if (LMUL == RISCVVLMUL::LMUL_4) { 963 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 964 "Unexpected subreg numbering"); 965 return RISCV::sub_vrm4_0 + Index; 966 } 967 llvm_unreachable("Invalid vector type."); 968 } 969 970 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { 971 if (VT.getVectorElementType() == MVT::i1) 972 return RISCV::VRRegClassID; 973 return getRegClassIDForLMUL(getLMUL(VT)); 974 } 975 976 // Attempt to decompose a subvector insert/extract between VecVT and 977 // SubVecVT via subregister indices. Returns the subregister index that 978 // can perform the subvector insert/extract with the given element index, as 979 // well as the index corresponding to any leftover subvectors that must be 980 // further inserted/extracted within the register class for SubVecVT. 981 std::pair<unsigned, unsigned> 982 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 983 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, 984 const RISCVRegisterInfo *TRI) { 985 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && 986 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && 987 RISCV::VRM2RegClassID > RISCV::VRRegClassID), 988 "Register classes not ordered"); 989 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT); 990 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT); 991 // Try to compose a subregister index that takes us from the incoming 992 // LMUL>1 register class down to the outgoing one. At each step we half 993 // the LMUL: 994 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 995 // Note that this is not guaranteed to find a subregister index, such as 996 // when we are extracting from one VR type to another. 997 unsigned SubRegIdx = RISCV::NoSubRegister; 998 for (const unsigned RCID : 999 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) 1000 if (VecRegClassID > RCID && SubRegClassID <= RCID) { 1001 VecVT = VecVT.getHalfNumVectorElementsVT(); 1002 bool IsHi = 1003 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); 1004 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, 1005 getSubregIndexByMVT(VecVT, IsHi)); 1006 if (IsHi) 1007 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); 1008 } 1009 return {SubRegIdx, InsertExtractIdx}; 1010 } 1011 1012 // Return the largest legal scalable vector type that matches VT's element type. 1013 MVT RISCVTargetLowering::getContainerForFixedLengthVector( 1014 const TargetLowering &TLI, MVT VT, const RISCVSubtarget &Subtarget) { 1015 assert(VT.isFixedLengthVector() && TLI.isTypeLegal(VT) && 1016 "Expected legal fixed length vector!"); 1017 1018 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 1019 assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!"); 1020 1021 MVT EltVT = VT.getVectorElementType(); 1022 switch (EltVT.SimpleTy) { 1023 default: 1024 llvm_unreachable("unexpected element type for RVV container"); 1025 case MVT::i1: { 1026 // Masks are calculated assuming 8-bit elements since that's when we need 1027 // the most elements. 1028 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8; 1029 return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock); 1030 } 1031 case MVT::i8: 1032 case MVT::i16: 1033 case MVT::i32: 1034 case MVT::i64: 1035 case MVT::f16: 1036 case MVT::f32: 1037 case MVT::f64: { 1038 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits(); 1039 return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock); 1040 } 1041 } 1042 } 1043 1044 MVT RISCVTargetLowering::getContainerForFixedLengthVector( 1045 SelectionDAG &DAG, MVT VT, const RISCVSubtarget &Subtarget) { 1046 return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT, 1047 Subtarget); 1048 } 1049 1050 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const { 1051 return getContainerForFixedLengthVector(*this, VT, getSubtarget()); 1052 } 1053 1054 // Grow V to consume an entire RVV register. 1055 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 1056 const RISCVSubtarget &Subtarget) { 1057 assert(VT.isScalableVector() && 1058 "Expected to convert into a scalable vector!"); 1059 assert(V.getValueType().isFixedLengthVector() && 1060 "Expected a fixed length vector operand!"); 1061 SDLoc DL(V); 1062 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1063 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); 1064 } 1065 1066 // Shrink V so it's just big enough to maintain a VT's worth of data. 1067 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 1068 const RISCVSubtarget &Subtarget) { 1069 assert(VT.isFixedLengthVector() && 1070 "Expected to convert into a fixed length vector!"); 1071 assert(V.getValueType().isScalableVector() && 1072 "Expected a scalable vector operand!"); 1073 SDLoc DL(V); 1074 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1075 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); 1076 } 1077 1078 // Gets the two common "VL" operands: an all-ones mask and the vector length. 1079 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is 1080 // the vector type that it is contained in. 1081 static std::pair<SDValue, SDValue> 1082 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, 1083 const RISCVSubtarget &Subtarget) { 1084 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 1085 MVT XLenVT = Subtarget.getXLenVT(); 1086 SDValue VL = VecVT.isFixedLengthVector() 1087 ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT) 1088 : DAG.getRegister(RISCV::X0, XLenVT); 1089 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 1090 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 1091 return {Mask, VL}; 1092 } 1093 1094 // As above but assuming the given type is a scalable vector type. 1095 static std::pair<SDValue, SDValue> 1096 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG, 1097 const RISCVSubtarget &Subtarget) { 1098 assert(VecVT.isScalableVector() && "Expecting a scalable vector"); 1099 return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget); 1100 } 1101 1102 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few 1103 // of either is (currently) supported. This can get us into an infinite loop 1104 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR 1105 // as a ..., etc. 1106 // Until either (or both) of these can reliably lower any node, reporting that 1107 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks 1108 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack, 1109 // which is not desirable. 1110 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( 1111 EVT VT, unsigned DefinedValues) const { 1112 return false; 1113 } 1114 1115 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { 1116 // Only splats are currently supported. 1117 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) 1118 return true; 1119 1120 return false; 1121 } 1122 1123 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 1124 const RISCVSubtarget &Subtarget) { 1125 MVT VT = Op.getSimpleValueType(); 1126 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 1127 1128 MVT ContainerVT = 1129 RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); 1130 1131 SDLoc DL(Op); 1132 SDValue Mask, VL; 1133 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1134 1135 if (VT.getVectorElementType() == MVT::i1) { 1136 if (ISD::isBuildVectorAllZeros(Op.getNode())) { 1137 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); 1138 return convertFromScalableVector(VT, VMClr, DAG, Subtarget); 1139 } 1140 1141 if (ISD::isBuildVectorAllOnes(Op.getNode())) { 1142 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 1143 return convertFromScalableVector(VT, VMSet, DAG, Subtarget); 1144 } 1145 1146 return SDValue(); 1147 } 1148 1149 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 1150 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 1151 : RISCVISD::VMV_V_X_VL; 1152 Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL); 1153 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 1154 } 1155 1156 unsigned NumElts = Op.getNumOperands(); 1157 1158 // Try and match an index sequence, which we can lower directly to the vid 1159 // instruction. An all-undef vector is matched by getSplatValue, above. 1160 if (VT.isInteger()) { 1161 bool IsVID = true; 1162 for (unsigned I = 0; I < NumElts && IsVID; I++) 1163 IsVID &= Op.getOperand(I).isUndef() || 1164 (isa<ConstantSDNode>(Op.getOperand(I)) && 1165 Op.getConstantOperandVal(I) == I); 1166 1167 if (IsVID) { 1168 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); 1169 return convertFromScalableVector(VT, VID, DAG, Subtarget); 1170 } 1171 } 1172 1173 // Try and optimize BUILD_VECTORs with "dominant values" - these are values 1174 // which constitute a large proportion of the elements. In such cases we can 1175 // splat a vector with the dominant element and make up the shortfall with 1176 // INSERT_VECTOR_ELTs. 1177 // Note that this includes vectors of 2 elements by association. The 1178 // upper-most element is the "dominant" one, allowing us to use a splat to 1179 // "insert" the upper element, and an insert of the lower element at position 1180 // 0, which improves codegen. 1181 SDValue DominantValue; 1182 DenseMap<SDValue, unsigned> ValueCounts; 1183 // Use a fairly conservative threshold. A future optimization could be to use 1184 // multiple vmerge.vi/vmerge.vx instructions on "partially-dominant" 1185 // elements with more relaxed thresholds. 1186 unsigned NumUndefElts = 1187 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); 1188 unsigned NumDefElts = NumElts - NumUndefElts; 1189 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; 1190 1191 for (SDValue V : Op->op_values()) { 1192 if (V.isUndef()) 1193 continue; 1194 1195 ValueCounts.insert(std::make_pair(V, 0)); 1196 unsigned &Count = ValueCounts[V]; 1197 1198 // Is this value dominant? 1199 if (++Count > DominantValueCountThreshold) 1200 DominantValue = V; 1201 } 1202 1203 // Don't perform this optimization when optimizing for size, since 1204 // materializing elements and inserting them tends to cause code bloat. 1205 if (DominantValue && !DAG.shouldOptForSize()) { 1206 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); 1207 1208 if (ValueCounts.size() != 1) { 1209 MVT XLenVT = Subtarget.getXLenVT(); 1210 for (unsigned I = 0; I < NumElts; ++I) { 1211 if (!Op.getOperand(I).isUndef() && Op.getOperand(I) != DominantValue) 1212 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, 1213 Op.getOperand(I), DAG.getConstant(I, DL, XLenVT)); 1214 } 1215 } 1216 1217 return Vec; 1218 } 1219 1220 return SDValue(); 1221 } 1222 1223 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, 1224 const RISCVSubtarget &Subtarget) { 1225 SDValue V1 = Op.getOperand(0); 1226 SDLoc DL(Op); 1227 MVT VT = Op.getSimpleValueType(); 1228 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 1229 1230 if (SVN->isSplat()) { 1231 int Lane = SVN->getSplatIndex(); 1232 if (Lane >= 0) { 1233 MVT ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( 1234 DAG, VT, Subtarget); 1235 1236 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 1237 assert(Lane < (int)VT.getVectorNumElements() && "Unexpected lane!"); 1238 1239 SDValue Mask, VL; 1240 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1241 MVT XLenVT = Subtarget.getXLenVT(); 1242 SDValue Gather = 1243 DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1, 1244 DAG.getConstant(Lane, DL, XLenVT), Mask, VL); 1245 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 1246 } 1247 } 1248 1249 return SDValue(); 1250 } 1251 1252 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT, 1253 SDLoc DL, SelectionDAG &DAG, 1254 const RISCVSubtarget &Subtarget) { 1255 if (VT.isScalableVector()) 1256 return DAG.getFPExtendOrRound(Op, DL, VT); 1257 assert(VT.isFixedLengthVector() && 1258 "Unexpected value type for RVV FP extend/round lowering"); 1259 SDValue Mask, VL; 1260 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1261 unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType()) 1262 ? RISCVISD::FP_EXTEND_VL 1263 : RISCVISD::FP_ROUND_VL; 1264 return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL); 1265 } 1266 1267 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 1268 SelectionDAG &DAG) const { 1269 switch (Op.getOpcode()) { 1270 default: 1271 report_fatal_error("unimplemented operand"); 1272 case ISD::GlobalAddress: 1273 return lowerGlobalAddress(Op, DAG); 1274 case ISD::BlockAddress: 1275 return lowerBlockAddress(Op, DAG); 1276 case ISD::ConstantPool: 1277 return lowerConstantPool(Op, DAG); 1278 case ISD::JumpTable: 1279 return lowerJumpTable(Op, DAG); 1280 case ISD::GlobalTLSAddress: 1281 return lowerGlobalTLSAddress(Op, DAG); 1282 case ISD::SELECT: 1283 return lowerSELECT(Op, DAG); 1284 case ISD::BRCOND: 1285 return lowerBRCOND(Op, DAG); 1286 case ISD::VASTART: 1287 return lowerVASTART(Op, DAG); 1288 case ISD::FRAMEADDR: 1289 return lowerFRAMEADDR(Op, DAG); 1290 case ISD::RETURNADDR: 1291 return lowerRETURNADDR(Op, DAG); 1292 case ISD::SHL_PARTS: 1293 return lowerShiftLeftParts(Op, DAG); 1294 case ISD::SRA_PARTS: 1295 return lowerShiftRightParts(Op, DAG, true); 1296 case ISD::SRL_PARTS: 1297 return lowerShiftRightParts(Op, DAG, false); 1298 case ISD::BITCAST: { 1299 SDValue Op0 = Op.getOperand(0); 1300 // We can handle fixed length vector bitcasts with a simple replacement 1301 // in isel. 1302 if (Op.getValueType().isFixedLengthVector()) { 1303 if (Op0.getValueType().isFixedLengthVector()) 1304 return Op; 1305 return SDValue(); 1306 } 1307 assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) || 1308 Subtarget.hasStdExtZfh()) && 1309 "Unexpected custom legalisation"); 1310 SDLoc DL(Op); 1311 if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) { 1312 if (Op0.getValueType() != MVT::i16) 1313 return SDValue(); 1314 SDValue NewOp0 = 1315 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0); 1316 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 1317 return FPConv; 1318 } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && 1319 Subtarget.hasStdExtF()) { 1320 if (Op0.getValueType() != MVT::i32) 1321 return SDValue(); 1322 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 1323 SDValue FPConv = 1324 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 1325 return FPConv; 1326 } 1327 return SDValue(); 1328 } 1329 case ISD::INTRINSIC_WO_CHAIN: 1330 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 1331 case ISD::INTRINSIC_W_CHAIN: 1332 return LowerINTRINSIC_W_CHAIN(Op, DAG); 1333 case ISD::BSWAP: 1334 case ISD::BITREVERSE: { 1335 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. 1336 assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 1337 MVT VT = Op.getSimpleValueType(); 1338 SDLoc DL(Op); 1339 // Start with the maximum immediate value which is the bitwidth - 1. 1340 unsigned Imm = VT.getSizeInBits() - 1; 1341 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. 1342 if (Op.getOpcode() == ISD::BSWAP) 1343 Imm &= ~0x7U; 1344 return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0), 1345 DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT())); 1346 } 1347 case ISD::FSHL: 1348 case ISD::FSHR: { 1349 MVT VT = Op.getSimpleValueType(); 1350 assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization"); 1351 SDLoc DL(Op); 1352 // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only 1353 // use log(XLen) bits. Mask the shift amount accordingly. 1354 unsigned ShAmtWidth = Subtarget.getXLen() - 1; 1355 SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2), 1356 DAG.getConstant(ShAmtWidth, DL, VT)); 1357 unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR; 1358 return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt); 1359 } 1360 case ISD::TRUNCATE: { 1361 SDLoc DL(Op); 1362 MVT VT = Op.getSimpleValueType(); 1363 // Only custom-lower vector truncates 1364 if (!VT.isVector()) 1365 return Op; 1366 1367 // Truncates to mask types are handled differently 1368 if (VT.getVectorElementType() == MVT::i1) 1369 return lowerVectorMaskTrunc(Op, DAG); 1370 1371 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 1372 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which 1373 // truncate by one power of two at a time. 1374 MVT DstEltVT = VT.getVectorElementType(); 1375 1376 SDValue Src = Op.getOperand(0); 1377 MVT SrcVT = Src.getSimpleValueType(); 1378 MVT SrcEltVT = SrcVT.getVectorElementType(); 1379 1380 assert(DstEltVT.bitsLT(SrcEltVT) && 1381 isPowerOf2_64(DstEltVT.getSizeInBits()) && 1382 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 1383 "Unexpected vector truncate lowering"); 1384 1385 MVT ContainerVT = SrcVT; 1386 if (SrcVT.isFixedLengthVector()) { 1387 ContainerVT = getContainerForFixedLengthVector(SrcVT); 1388 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 1389 } 1390 1391 SDValue Result = Src; 1392 SDValue Mask, VL; 1393 std::tie(Mask, VL) = 1394 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 1395 LLVMContext &Context = *DAG.getContext(); 1396 const ElementCount Count = ContainerVT.getVectorElementCount(); 1397 do { 1398 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); 1399 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 1400 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, 1401 Mask, VL); 1402 } while (SrcEltVT != DstEltVT); 1403 1404 if (SrcVT.isFixedLengthVector()) 1405 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 1406 1407 return Result; 1408 } 1409 case ISD::ANY_EXTEND: 1410 case ISD::ZERO_EXTEND: 1411 if (Op.getOperand(0).getValueType().isVector() && 1412 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 1413 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 1414 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL); 1415 case ISD::SIGN_EXTEND: 1416 if (Op.getOperand(0).getValueType().isVector() && 1417 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 1418 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 1419 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); 1420 case ISD::SPLAT_VECTOR_PARTS: 1421 return lowerSPLAT_VECTOR_PARTS(Op, DAG); 1422 case ISD::INSERT_VECTOR_ELT: 1423 return lowerINSERT_VECTOR_ELT(Op, DAG); 1424 case ISD::EXTRACT_VECTOR_ELT: 1425 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 1426 case ISD::VSCALE: { 1427 MVT VT = Op.getSimpleValueType(); 1428 SDLoc DL(Op); 1429 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); 1430 // We define our scalable vector types for lmul=1 to use a 64 bit known 1431 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 1432 // vscale as VLENB / 8. 1433 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, 1434 DAG.getConstant(3, DL, VT)); 1435 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); 1436 } 1437 case ISD::FP_EXTEND: { 1438 // RVV can only do fp_extend to types double the size as the source. We 1439 // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going 1440 // via f32. 1441 SDLoc DL(Op); 1442 MVT VT = Op.getSimpleValueType(); 1443 SDValue Src = Op.getOperand(0); 1444 MVT SrcVT = Src.getSimpleValueType(); 1445 1446 // Prepare any fixed-length vector operands. 1447 MVT ContainerVT = VT; 1448 if (SrcVT.isFixedLengthVector()) { 1449 ContainerVT = getContainerForFixedLengthVector(VT); 1450 MVT SrcContainerVT = 1451 ContainerVT.changeVectorElementType(SrcVT.getVectorElementType()); 1452 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1453 } 1454 1455 if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 || 1456 SrcVT.getVectorElementType() != MVT::f16) { 1457 // For scalable vectors, we only need to close the gap between 1458 // vXf16->vXf64. 1459 if (!VT.isFixedLengthVector()) 1460 return Op; 1461 // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version. 1462 Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); 1463 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1464 } 1465 1466 MVT InterVT = VT.changeVectorElementType(MVT::f32); 1467 MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32); 1468 SDValue IntermediateExtend = getRVVFPExtendOrRound( 1469 Src, InterVT, InterContainerVT, DL, DAG, Subtarget); 1470 1471 SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT, 1472 DL, DAG, Subtarget); 1473 if (VT.isFixedLengthVector()) 1474 return convertFromScalableVector(VT, Extend, DAG, Subtarget); 1475 return Extend; 1476 } 1477 case ISD::FP_ROUND: { 1478 // RVV can only do fp_round to types half the size as the source. We 1479 // custom-lower f64->f16 rounds via RVV's round-to-odd float 1480 // conversion instruction. 1481 SDLoc DL(Op); 1482 MVT VT = Op.getSimpleValueType(); 1483 SDValue Src = Op.getOperand(0); 1484 MVT SrcVT = Src.getSimpleValueType(); 1485 1486 // Prepare any fixed-length vector operands. 1487 MVT ContainerVT = VT; 1488 if (VT.isFixedLengthVector()) { 1489 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 1490 ContainerVT = 1491 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 1492 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1493 } 1494 1495 if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || 1496 SrcVT.getVectorElementType() != MVT::f64) { 1497 // For scalable vectors, we only need to close the gap between 1498 // vXf64<->vXf16. 1499 if (!VT.isFixedLengthVector()) 1500 return Op; 1501 // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version. 1502 Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); 1503 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1504 } 1505 1506 SDValue Mask, VL; 1507 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1508 1509 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); 1510 SDValue IntermediateRound = 1511 DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL); 1512 SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT, 1513 DL, DAG, Subtarget); 1514 1515 if (VT.isFixedLengthVector()) 1516 return convertFromScalableVector(VT, Round, DAG, Subtarget); 1517 return Round; 1518 } 1519 case ISD::FP_TO_SINT: 1520 case ISD::FP_TO_UINT: 1521 case ISD::SINT_TO_FP: 1522 case ISD::UINT_TO_FP: { 1523 // RVV can only do fp<->int conversions to types half/double the size as 1524 // the source. We custom-lower any conversions that do two hops into 1525 // sequences. 1526 MVT VT = Op.getSimpleValueType(); 1527 if (!VT.isVector()) 1528 return Op; 1529 SDLoc DL(Op); 1530 SDValue Src = Op.getOperand(0); 1531 MVT EltVT = VT.getVectorElementType(); 1532 MVT SrcVT = Src.getSimpleValueType(); 1533 MVT SrcEltVT = SrcVT.getVectorElementType(); 1534 unsigned EltSize = EltVT.getSizeInBits(); 1535 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 1536 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 1537 "Unexpected vector element types"); 1538 1539 bool IsInt2FP = SrcEltVT.isInteger(); 1540 // Widening conversions 1541 if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) { 1542 if (IsInt2FP) { 1543 // Do a regular integer sign/zero extension then convert to float. 1544 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()), 1545 VT.getVectorElementCount()); 1546 unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP 1547 ? ISD::ZERO_EXTEND 1548 : ISD::SIGN_EXTEND; 1549 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 1550 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 1551 } 1552 // FP2Int 1553 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 1554 // Do one doubling fp_extend then complete the operation by converting 1555 // to int. 1556 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1557 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 1558 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 1559 } 1560 1561 // Narrowing conversions 1562 if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) { 1563 if (IsInt2FP) { 1564 // One narrowing int_to_fp, then an fp_round. 1565 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 1566 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1567 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 1568 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 1569 } 1570 // FP2Int 1571 // One narrowing fp_to_int, then truncate the integer. If the float isn't 1572 // representable by the integer, the result is poison. 1573 MVT IVecVT = 1574 MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2), 1575 VT.getVectorElementCount()); 1576 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 1577 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 1578 } 1579 1580 // Scalable vectors can exit here. Patterns will handle equally-sized 1581 // conversions halving/doubling ones. 1582 if (!VT.isFixedLengthVector()) 1583 return Op; 1584 1585 // For fixed-length vectors we lower to a custom "VL" node. 1586 unsigned RVVOpc = 0; 1587 switch (Op.getOpcode()) { 1588 default: 1589 llvm_unreachable("Impossible opcode"); 1590 case ISD::FP_TO_SINT: 1591 RVVOpc = RISCVISD::FP_TO_SINT_VL; 1592 break; 1593 case ISD::FP_TO_UINT: 1594 RVVOpc = RISCVISD::FP_TO_UINT_VL; 1595 break; 1596 case ISD::SINT_TO_FP: 1597 RVVOpc = RISCVISD::SINT_TO_FP_VL; 1598 break; 1599 case ISD::UINT_TO_FP: 1600 RVVOpc = RISCVISD::UINT_TO_FP_VL; 1601 break; 1602 } 1603 1604 MVT ContainerVT, SrcContainerVT; 1605 // Derive the reference container type from the larger vector type. 1606 if (SrcEltSize > EltSize) { 1607 SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 1608 ContainerVT = 1609 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 1610 } else { 1611 ContainerVT = getContainerForFixedLengthVector(VT); 1612 SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT); 1613 } 1614 1615 SDValue Mask, VL; 1616 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1617 1618 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1619 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL); 1620 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1621 } 1622 case ISD::VECREDUCE_ADD: 1623 case ISD::VECREDUCE_UMAX: 1624 case ISD::VECREDUCE_SMAX: 1625 case ISD::VECREDUCE_UMIN: 1626 case ISD::VECREDUCE_SMIN: 1627 case ISD::VECREDUCE_AND: 1628 case ISD::VECREDUCE_OR: 1629 case ISD::VECREDUCE_XOR: 1630 return lowerVECREDUCE(Op, DAG); 1631 case ISD::VECREDUCE_FADD: 1632 case ISD::VECREDUCE_SEQ_FADD: 1633 return lowerFPVECREDUCE(Op, DAG); 1634 case ISD::INSERT_SUBVECTOR: 1635 return lowerINSERT_SUBVECTOR(Op, DAG); 1636 case ISD::EXTRACT_SUBVECTOR: 1637 return lowerEXTRACT_SUBVECTOR(Op, DAG); 1638 case ISD::VECTOR_REVERSE: 1639 return lowerVECTOR_REVERSE(Op, DAG); 1640 case ISD::BUILD_VECTOR: 1641 return lowerBUILD_VECTOR(Op, DAG, Subtarget); 1642 case ISD::VECTOR_SHUFFLE: 1643 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); 1644 case ISD::CONCAT_VECTORS: { 1645 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is 1646 // better than going through the stack, as the default expansion does. 1647 SDLoc DL(Op); 1648 MVT VT = Op.getSimpleValueType(); 1649 unsigned NumOpElts = 1650 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements(); 1651 SDValue Vec = DAG.getUNDEF(VT); 1652 for (const auto &OpIdx : enumerate(Op->ops())) 1653 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(), 1654 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL)); 1655 return Vec; 1656 } 1657 case ISD::LOAD: 1658 return lowerFixedLengthVectorLoadToRVV(Op, DAG); 1659 case ISD::STORE: 1660 return lowerFixedLengthVectorStoreToRVV(Op, DAG); 1661 case ISD::MLOAD: 1662 return lowerMLOAD(Op, DAG); 1663 case ISD::MSTORE: 1664 return lowerMSTORE(Op, DAG); 1665 case ISD::SETCC: 1666 return lowerFixedLengthVectorSetccToRVV(Op, DAG); 1667 case ISD::ADD: 1668 return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL); 1669 case ISD::SUB: 1670 return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL); 1671 case ISD::MUL: 1672 return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL); 1673 case ISD::MULHS: 1674 return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL); 1675 case ISD::MULHU: 1676 return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL); 1677 case ISD::AND: 1678 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL, 1679 RISCVISD::AND_VL); 1680 case ISD::OR: 1681 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL, 1682 RISCVISD::OR_VL); 1683 case ISD::XOR: 1684 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL, 1685 RISCVISD::XOR_VL); 1686 case ISD::SDIV: 1687 return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL); 1688 case ISD::SREM: 1689 return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL); 1690 case ISD::UDIV: 1691 return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL); 1692 case ISD::UREM: 1693 return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL); 1694 case ISD::SHL: 1695 return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL); 1696 case ISD::SRA: 1697 return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL); 1698 case ISD::SRL: 1699 return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL); 1700 case ISD::FADD: 1701 return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL); 1702 case ISD::FSUB: 1703 return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL); 1704 case ISD::FMUL: 1705 return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL); 1706 case ISD::FDIV: 1707 return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL); 1708 case ISD::FNEG: 1709 return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL); 1710 case ISD::FABS: 1711 return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL); 1712 case ISD::FSQRT: 1713 return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL); 1714 case ISD::FMA: 1715 return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL); 1716 case ISD::SMIN: 1717 return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL); 1718 case ISD::SMAX: 1719 return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL); 1720 case ISD::UMIN: 1721 return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL); 1722 case ISD::UMAX: 1723 return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL); 1724 case ISD::ABS: 1725 return lowerABS(Op, DAG); 1726 case ISD::VSELECT: 1727 return lowerFixedLengthVectorSelectToRVV(Op, DAG); 1728 case ISD::FCOPYSIGN: 1729 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); 1730 case ISD::MGATHER: 1731 return lowerMGATHER(Op, DAG); 1732 case ISD::MSCATTER: 1733 return lowerMSCATTER(Op, DAG); 1734 } 1735 } 1736 1737 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 1738 SelectionDAG &DAG, unsigned Flags) { 1739 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 1740 } 1741 1742 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 1743 SelectionDAG &DAG, unsigned Flags) { 1744 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 1745 Flags); 1746 } 1747 1748 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 1749 SelectionDAG &DAG, unsigned Flags) { 1750 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 1751 N->getOffset(), Flags); 1752 } 1753 1754 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 1755 SelectionDAG &DAG, unsigned Flags) { 1756 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 1757 } 1758 1759 template <class NodeTy> 1760 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 1761 bool IsLocal) const { 1762 SDLoc DL(N); 1763 EVT Ty = getPointerTy(DAG.getDataLayout()); 1764 1765 if (isPositionIndependent()) { 1766 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1767 if (IsLocal) 1768 // Use PC-relative addressing to access the symbol. This generates the 1769 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 1770 // %pcrel_lo(auipc)). 1771 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1772 1773 // Use PC-relative addressing to access the GOT for this symbol, then load 1774 // the address from the GOT. This generates the pattern (PseudoLA sym), 1775 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 1776 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 1777 } 1778 1779 switch (getTargetMachine().getCodeModel()) { 1780 default: 1781 report_fatal_error("Unsupported code model for lowering"); 1782 case CodeModel::Small: { 1783 // Generate a sequence for accessing addresses within the first 2 GiB of 1784 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 1785 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 1786 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 1787 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1788 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 1789 } 1790 case CodeModel::Medium: { 1791 // Generate a sequence for accessing addresses within any 2GiB range within 1792 // the address space. This generates the pattern (PseudoLLA sym), which 1793 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 1794 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1795 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1796 } 1797 } 1798 } 1799 1800 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 1801 SelectionDAG &DAG) const { 1802 SDLoc DL(Op); 1803 EVT Ty = Op.getValueType(); 1804 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1805 int64_t Offset = N->getOffset(); 1806 MVT XLenVT = Subtarget.getXLenVT(); 1807 1808 const GlobalValue *GV = N->getGlobal(); 1809 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 1810 SDValue Addr = getAddr(N, DAG, IsLocal); 1811 1812 // In order to maximise the opportunity for common subexpression elimination, 1813 // emit a separate ADD node for the global address offset instead of folding 1814 // it in the global address node. Later peephole optimisations may choose to 1815 // fold it back in when profitable. 1816 if (Offset != 0) 1817 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1818 DAG.getConstant(Offset, DL, XLenVT)); 1819 return Addr; 1820 } 1821 1822 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 1823 SelectionDAG &DAG) const { 1824 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 1825 1826 return getAddr(N, DAG); 1827 } 1828 1829 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 1830 SelectionDAG &DAG) const { 1831 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 1832 1833 return getAddr(N, DAG); 1834 } 1835 1836 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 1837 SelectionDAG &DAG) const { 1838 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 1839 1840 return getAddr(N, DAG); 1841 } 1842 1843 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 1844 SelectionDAG &DAG, 1845 bool UseGOT) const { 1846 SDLoc DL(N); 1847 EVT Ty = getPointerTy(DAG.getDataLayout()); 1848 const GlobalValue *GV = N->getGlobal(); 1849 MVT XLenVT = Subtarget.getXLenVT(); 1850 1851 if (UseGOT) { 1852 // Use PC-relative addressing to access the GOT for this TLS symbol, then 1853 // load the address from the GOT and add the thread pointer. This generates 1854 // the pattern (PseudoLA_TLS_IE sym), which expands to 1855 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 1856 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1857 SDValue Load = 1858 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 1859 1860 // Add the thread pointer. 1861 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1862 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 1863 } 1864 1865 // Generate a sequence for accessing the address relative to the thread 1866 // pointer, with the appropriate adjustment for the thread pointer offset. 1867 // This generates the pattern 1868 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 1869 SDValue AddrHi = 1870 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 1871 SDValue AddrAdd = 1872 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 1873 SDValue AddrLo = 1874 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 1875 1876 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1877 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1878 SDValue MNAdd = SDValue( 1879 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 1880 0); 1881 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 1882 } 1883 1884 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 1885 SelectionDAG &DAG) const { 1886 SDLoc DL(N); 1887 EVT Ty = getPointerTy(DAG.getDataLayout()); 1888 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 1889 const GlobalValue *GV = N->getGlobal(); 1890 1891 // Use a PC-relative addressing mode to access the global dynamic GOT address. 1892 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 1893 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 1894 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1895 SDValue Load = 1896 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 1897 1898 // Prepare argument list to generate call. 1899 ArgListTy Args; 1900 ArgListEntry Entry; 1901 Entry.Node = Load; 1902 Entry.Ty = CallTy; 1903 Args.push_back(Entry); 1904 1905 // Setup call to __tls_get_addr. 1906 TargetLowering::CallLoweringInfo CLI(DAG); 1907 CLI.setDebugLoc(DL) 1908 .setChain(DAG.getEntryNode()) 1909 .setLibCallee(CallingConv::C, CallTy, 1910 DAG.getExternalSymbol("__tls_get_addr", Ty), 1911 std::move(Args)); 1912 1913 return LowerCallTo(CLI).first; 1914 } 1915 1916 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 1917 SelectionDAG &DAG) const { 1918 SDLoc DL(Op); 1919 EVT Ty = Op.getValueType(); 1920 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1921 int64_t Offset = N->getOffset(); 1922 MVT XLenVT = Subtarget.getXLenVT(); 1923 1924 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 1925 1926 if (DAG.getMachineFunction().getFunction().getCallingConv() == 1927 CallingConv::GHC) 1928 report_fatal_error("In GHC calling convention TLS is not supported"); 1929 1930 SDValue Addr; 1931 switch (Model) { 1932 case TLSModel::LocalExec: 1933 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 1934 break; 1935 case TLSModel::InitialExec: 1936 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 1937 break; 1938 case TLSModel::LocalDynamic: 1939 case TLSModel::GeneralDynamic: 1940 Addr = getDynamicTLSAddr(N, DAG); 1941 break; 1942 } 1943 1944 // In order to maximise the opportunity for common subexpression elimination, 1945 // emit a separate ADD node for the global address offset instead of folding 1946 // it in the global address node. Later peephole optimisations may choose to 1947 // fold it back in when profitable. 1948 if (Offset != 0) 1949 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1950 DAG.getConstant(Offset, DL, XLenVT)); 1951 return Addr; 1952 } 1953 1954 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 1955 SDValue CondV = Op.getOperand(0); 1956 SDValue TrueV = Op.getOperand(1); 1957 SDValue FalseV = Op.getOperand(2); 1958 SDLoc DL(Op); 1959 MVT XLenVT = Subtarget.getXLenVT(); 1960 1961 // If the result type is XLenVT and CondV is the output of a SETCC node 1962 // which also operated on XLenVT inputs, then merge the SETCC node into the 1963 // lowered RISCVISD::SELECT_CC to take advantage of the integer 1964 // compare+branch instructions. i.e.: 1965 // (select (setcc lhs, rhs, cc), truev, falsev) 1966 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 1967 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 1968 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 1969 SDValue LHS = CondV.getOperand(0); 1970 SDValue RHS = CondV.getOperand(1); 1971 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 1972 ISD::CondCode CCVal = CC->get(); 1973 1974 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 1975 1976 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 1977 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 1978 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1979 } 1980 1981 // Otherwise: 1982 // (select condv, truev, falsev) 1983 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 1984 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 1985 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 1986 1987 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 1988 1989 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1990 } 1991 1992 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { 1993 SDValue CondV = Op.getOperand(1); 1994 SDLoc DL(Op); 1995 MVT XLenVT = Subtarget.getXLenVT(); 1996 1997 if (CondV.getOpcode() == ISD::SETCC && 1998 CondV.getOperand(0).getValueType() == XLenVT) { 1999 SDValue LHS = CondV.getOperand(0); 2000 SDValue RHS = CondV.getOperand(1); 2001 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 2002 2003 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 2004 2005 SDValue TargetCC = DAG.getCondCode(CCVal); 2006 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 2007 LHS, RHS, TargetCC, Op.getOperand(2)); 2008 } 2009 2010 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 2011 CondV, DAG.getConstant(0, DL, XLenVT), 2012 DAG.getCondCode(ISD::SETNE), Op.getOperand(2)); 2013 } 2014 2015 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 2016 MachineFunction &MF = DAG.getMachineFunction(); 2017 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 2018 2019 SDLoc DL(Op); 2020 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 2021 getPointerTy(MF.getDataLayout())); 2022 2023 // vastart just stores the address of the VarArgsFrameIndex slot into the 2024 // memory location argument. 2025 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2026 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 2027 MachinePointerInfo(SV)); 2028 } 2029 2030 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 2031 SelectionDAG &DAG) const { 2032 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 2033 MachineFunction &MF = DAG.getMachineFunction(); 2034 MachineFrameInfo &MFI = MF.getFrameInfo(); 2035 MFI.setFrameAddressIsTaken(true); 2036 Register FrameReg = RI.getFrameRegister(MF); 2037 int XLenInBytes = Subtarget.getXLen() / 8; 2038 2039 EVT VT = Op.getValueType(); 2040 SDLoc DL(Op); 2041 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 2042 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2043 while (Depth--) { 2044 int Offset = -(XLenInBytes * 2); 2045 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 2046 DAG.getIntPtrConstant(Offset, DL)); 2047 FrameAddr = 2048 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 2049 } 2050 return FrameAddr; 2051 } 2052 2053 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 2054 SelectionDAG &DAG) const { 2055 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 2056 MachineFunction &MF = DAG.getMachineFunction(); 2057 MachineFrameInfo &MFI = MF.getFrameInfo(); 2058 MFI.setReturnAddressIsTaken(true); 2059 MVT XLenVT = Subtarget.getXLenVT(); 2060 int XLenInBytes = Subtarget.getXLen() / 8; 2061 2062 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 2063 return SDValue(); 2064 2065 EVT VT = Op.getValueType(); 2066 SDLoc DL(Op); 2067 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2068 if (Depth) { 2069 int Off = -XLenInBytes; 2070 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 2071 SDValue Offset = DAG.getConstant(Off, DL, VT); 2072 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 2073 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 2074 MachinePointerInfo()); 2075 } 2076 2077 // Return the value of the return address register, marking it an implicit 2078 // live-in. 2079 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 2080 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 2081 } 2082 2083 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 2084 SelectionDAG &DAG) const { 2085 SDLoc DL(Op); 2086 SDValue Lo = Op.getOperand(0); 2087 SDValue Hi = Op.getOperand(1); 2088 SDValue Shamt = Op.getOperand(2); 2089 EVT VT = Lo.getValueType(); 2090 2091 // if Shamt-XLEN < 0: // Shamt < XLEN 2092 // Lo = Lo << Shamt 2093 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 2094 // else: 2095 // Lo = 0 2096 // Hi = Lo << (Shamt-XLEN) 2097 2098 SDValue Zero = DAG.getConstant(0, DL, VT); 2099 SDValue One = DAG.getConstant(1, DL, VT); 2100 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 2101 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 2102 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 2103 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 2104 2105 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 2106 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 2107 SDValue ShiftRightLo = 2108 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 2109 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 2110 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 2111 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 2112 2113 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 2114 2115 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 2116 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 2117 2118 SDValue Parts[2] = {Lo, Hi}; 2119 return DAG.getMergeValues(Parts, DL); 2120 } 2121 2122 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 2123 bool IsSRA) const { 2124 SDLoc DL(Op); 2125 SDValue Lo = Op.getOperand(0); 2126 SDValue Hi = Op.getOperand(1); 2127 SDValue Shamt = Op.getOperand(2); 2128 EVT VT = Lo.getValueType(); 2129 2130 // SRA expansion: 2131 // if Shamt-XLEN < 0: // Shamt < XLEN 2132 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 2133 // Hi = Hi >>s Shamt 2134 // else: 2135 // Lo = Hi >>s (Shamt-XLEN); 2136 // Hi = Hi >>s (XLEN-1) 2137 // 2138 // SRL expansion: 2139 // if Shamt-XLEN < 0: // Shamt < XLEN 2140 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 2141 // Hi = Hi >>u Shamt 2142 // else: 2143 // Lo = Hi >>u (Shamt-XLEN); 2144 // Hi = 0; 2145 2146 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 2147 2148 SDValue Zero = DAG.getConstant(0, DL, VT); 2149 SDValue One = DAG.getConstant(1, DL, VT); 2150 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 2151 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 2152 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 2153 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 2154 2155 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 2156 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 2157 SDValue ShiftLeftHi = 2158 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 2159 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 2160 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 2161 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 2162 SDValue HiFalse = 2163 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 2164 2165 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 2166 2167 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 2168 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 2169 2170 SDValue Parts[2] = {Lo, Hi}; 2171 return DAG.getMergeValues(Parts, DL); 2172 } 2173 2174 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is 2175 // illegal (currently only vXi64 RV32). 2176 // FIXME: We could also catch non-constant sign-extended i32 values and lower 2177 // them to SPLAT_VECTOR_I64 2178 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op, 2179 SelectionDAG &DAG) const { 2180 SDLoc DL(Op); 2181 EVT VecVT = Op.getValueType(); 2182 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 2183 "Unexpected SPLAT_VECTOR_PARTS lowering"); 2184 2185 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!"); 2186 SDValue Lo = Op.getOperand(0); 2187 SDValue Hi = Op.getOperand(1); 2188 2189 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) { 2190 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue(); 2191 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue(); 2192 // If Hi constant is all the same sign bit as Lo, lower this as a custom 2193 // node in order to try and match RVV vector/scalar instructions. 2194 if ((LoC >> 31) == HiC) 2195 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 2196 } 2197 2198 // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not 2199 // to accidentally sign-extend the 32-bit halves to the e64 SEW: 2200 // vmv.v.x vX, hi 2201 // vsll.vx vX, vX, /*32*/ 2202 // vmv.v.x vY, lo 2203 // vsll.vx vY, vY, /*32*/ 2204 // vsrl.vx vY, vY, /*32*/ 2205 // vor.vv vX, vX, vY 2206 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT); 2207 2208 Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 2209 Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV); 2210 Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV); 2211 2212 if (isNullConstant(Hi)) 2213 return Lo; 2214 2215 Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi); 2216 Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV); 2217 2218 return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi); 2219 } 2220 2221 // Custom-lower extensions from mask vectors by using a vselect either with 1 2222 // for zero/any-extension or -1 for sign-extension: 2223 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 2224 // Note that any-extension is lowered identically to zero-extension. 2225 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 2226 int64_t ExtTrueVal) const { 2227 SDLoc DL(Op); 2228 MVT VecVT = Op.getSimpleValueType(); 2229 SDValue Src = Op.getOperand(0); 2230 // Only custom-lower extensions from mask types 2231 assert(Src.getValueType().isVector() && 2232 Src.getValueType().getVectorElementType() == MVT::i1); 2233 2234 MVT XLenVT = Subtarget.getXLenVT(); 2235 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 2236 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT); 2237 2238 if (VecVT.isScalableVector()) { 2239 // Be careful not to introduce illegal scalar types at this stage, and be 2240 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 2241 // illegal and must be expanded. Since we know that the constants are 2242 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 2243 bool IsRV32E64 = 2244 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 2245 2246 if (!IsRV32E64) { 2247 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 2248 SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal); 2249 } else { 2250 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 2251 SplatTrueVal = 2252 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal); 2253 } 2254 2255 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 2256 } 2257 2258 MVT ContainerVT = getContainerForFixedLengthVector(VecVT); 2259 MVT I1ContainerVT = 2260 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2261 2262 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget); 2263 2264 SDValue Mask, VL; 2265 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2266 2267 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL); 2268 SplatTrueVal = 2269 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL); 2270 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, 2271 SplatTrueVal, SplatZero, VL); 2272 2273 return convertFromScalableVector(VecVT, Select, DAG, Subtarget); 2274 } 2275 2276 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( 2277 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { 2278 MVT ExtVT = Op.getSimpleValueType(); 2279 // Only custom-lower extensions from fixed-length vector types. 2280 if (!ExtVT.isFixedLengthVector()) 2281 return Op; 2282 MVT VT = Op.getOperand(0).getSimpleValueType(); 2283 // Grab the canonical container type for the extended type. Infer the smaller 2284 // type from that to ensure the same number of vector elements, as we know 2285 // the LMUL will be sufficient to hold the smaller type. 2286 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT); 2287 // Get the extended container type manually to ensure the same number of 2288 // vector elements between source and dest. 2289 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), 2290 ContainerExtVT.getVectorElementCount()); 2291 2292 SDValue Op1 = 2293 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 2294 2295 SDLoc DL(Op); 2296 SDValue Mask, VL; 2297 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2298 2299 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL); 2300 2301 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget); 2302 } 2303 2304 // Custom-lower truncations from vectors to mask vectors by using a mask and a 2305 // setcc operation: 2306 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 2307 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, 2308 SelectionDAG &DAG) const { 2309 SDLoc DL(Op); 2310 EVT MaskVT = Op.getValueType(); 2311 // Only expect to custom-lower truncations to mask types 2312 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 2313 "Unexpected type for vector mask lowering"); 2314 SDValue Src = Op.getOperand(0); 2315 MVT VecVT = Src.getSimpleValueType(); 2316 2317 // If this is a fixed vector, we need to convert it to a scalable vector. 2318 MVT ContainerVT = VecVT; 2319 if (VecVT.isFixedLengthVector()) { 2320 ContainerVT = getContainerForFixedLengthVector(VecVT); 2321 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 2322 } 2323 2324 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 2325 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 2326 2327 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne); 2328 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero); 2329 2330 if (VecVT.isScalableVector()) { 2331 SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); 2332 return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); 2333 } 2334 2335 SDValue Mask, VL; 2336 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2337 2338 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 2339 SDValue Trunc = 2340 DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL); 2341 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero, 2342 DAG.getCondCode(ISD::SETNE), Mask, VL); 2343 return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); 2344 } 2345 2346 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the 2347 // first position of a vector, and that vector is slid up to the insert index. 2348 // By limiting the active vector length to index+1 and merging with the 2349 // original vector (with an undisturbed tail policy for elements >= VL), we 2350 // achieve the desired result of leaving all elements untouched except the one 2351 // at VL-1, which is replaced with the desired value. 2352 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 2353 SelectionDAG &DAG) const { 2354 SDLoc DL(Op); 2355 MVT VecVT = Op.getSimpleValueType(); 2356 SDValue Vec = Op.getOperand(0); 2357 SDValue Val = Op.getOperand(1); 2358 SDValue Idx = Op.getOperand(2); 2359 2360 MVT ContainerVT = VecVT; 2361 // If the operand is a fixed-length vector, convert to a scalable one. 2362 if (VecVT.isFixedLengthVector()) { 2363 ContainerVT = getContainerForFixedLengthVector(VecVT); 2364 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2365 } 2366 2367 MVT XLenVT = Subtarget.getXLenVT(); 2368 2369 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 2370 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64; 2371 // Even i64-element vectors on RV32 can be lowered without scalar 2372 // legalization if the most-significant 32 bits of the value are not affected 2373 // by the sign-extension of the lower 32 bits. 2374 // TODO: We could also catch sign extensions of a 32-bit value. 2375 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) { 2376 const auto *CVal = cast<ConstantSDNode>(Val); 2377 if (isInt<32>(CVal->getSExtValue())) { 2378 IsLegalInsert = true; 2379 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); 2380 } 2381 } 2382 2383 SDValue Mask, VL; 2384 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2385 2386 SDValue ValInVec; 2387 2388 if (IsLegalInsert) { 2389 if (isNullConstant(Idx)) { 2390 Vec = DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, Vec, Val, VL); 2391 if (!VecVT.isFixedLengthVector()) 2392 return Vec; 2393 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget); 2394 } 2395 ValInVec = DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, 2396 DAG.getUNDEF(ContainerVT), Val, VL); 2397 } else { 2398 // On RV32, i64-element vectors must be specially handled to place the 2399 // value at element 0, by using two vslide1up instructions in sequence on 2400 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for 2401 // this. 2402 SDValue One = DAG.getConstant(1, DL, XLenVT); 2403 SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero); 2404 SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One); 2405 MVT I32ContainerVT = 2406 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2); 2407 SDValue I32Mask = 2408 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first; 2409 // Limit the active VL to two. 2410 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT); 2411 // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied 2412 // undef doesn't obey the earlyclobber constraint. Just splat a zero value. 2413 ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero, 2414 InsertI64VL); 2415 // First slide in the hi value, then the lo in underneath it. 2416 ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec, 2417 ValHi, I32Mask, InsertI64VL); 2418 ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec, 2419 ValLo, I32Mask, InsertI64VL); 2420 // Bitcast back to the right container type. 2421 ValInVec = DAG.getBitcast(ContainerVT, ValInVec); 2422 } 2423 2424 // Now that the value is in a vector, slide it into position. 2425 SDValue InsertVL = 2426 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT)); 2427 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, 2428 ValInVec, Idx, Mask, InsertVL); 2429 if (!VecVT.isFixedLengthVector()) 2430 return Slideup; 2431 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 2432 } 2433 2434 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 2435 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer 2436 // types this is done using VMV_X_S to allow us to glean information about the 2437 // sign bits of the result. 2438 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 2439 SelectionDAG &DAG) const { 2440 SDLoc DL(Op); 2441 SDValue Idx = Op.getOperand(1); 2442 SDValue Vec = Op.getOperand(0); 2443 EVT EltVT = Op.getValueType(); 2444 MVT VecVT = Vec.getSimpleValueType(); 2445 MVT XLenVT = Subtarget.getXLenVT(); 2446 2447 if (VecVT.getVectorElementType() == MVT::i1) { 2448 // FIXME: For now we just promote to an i8 vector and extract from that, 2449 // but this is probably not optimal. 2450 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 2451 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); 2452 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx); 2453 } 2454 2455 // If this is a fixed vector, we need to convert it to a scalable vector. 2456 MVT ContainerVT = VecVT; 2457 if (VecVT.isFixedLengthVector()) { 2458 ContainerVT = getContainerForFixedLengthVector(VecVT); 2459 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2460 } 2461 2462 // If the index is 0, the vector is already in the right position. 2463 if (!isNullConstant(Idx)) { 2464 // Use a VL of 1 to avoid processing more elements than we need. 2465 SDValue VL = DAG.getConstant(1, DL, XLenVT); 2466 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2467 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2468 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 2469 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 2470 } 2471 2472 if (!EltVT.isInteger()) { 2473 // Floating-point extracts are handled in TableGen. 2474 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 2475 DAG.getConstant(0, DL, XLenVT)); 2476 } 2477 2478 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 2479 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 2480 } 2481 2482 // Called by type legalization to handle splat of i64 on RV32. 2483 // FIXME: We can optimize this when the type has sign or zero bits in one 2484 // of the halves. 2485 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar, 2486 SDValue VL, SelectionDAG &DAG) { 2487 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VT); 2488 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, 2489 DAG.getConstant(0, DL, MVT::i32)); 2490 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, 2491 DAG.getConstant(1, DL, MVT::i32)); 2492 2493 // vmv.v.x vX, hi 2494 // vsll.vx vX, vX, /*32*/ 2495 // vmv.v.x vY, lo 2496 // vsll.vx vY, vY, /*32*/ 2497 // vsrl.vx vY, vY, /*32*/ 2498 // vor.vv vX, vX, vY 2499 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); 2500 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2501 Lo = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL); 2502 Lo = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL); 2503 Lo = DAG.getNode(RISCVISD::SRL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL); 2504 2505 Hi = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Hi, VL); 2506 Hi = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Hi, ThirtyTwoV, Mask, VL); 2507 2508 return DAG.getNode(RISCVISD::OR_VL, DL, VT, Lo, Hi, Mask, VL); 2509 } 2510 2511 // Some RVV intrinsics may claim that they want an integer operand to be 2512 // promoted or expanded. 2513 static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG, 2514 const RISCVSubtarget &Subtarget) { 2515 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 2516 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && 2517 "Unexpected opcode"); 2518 2519 if (!Subtarget.hasStdExtV()) 2520 return SDValue(); 2521 2522 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; 2523 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); 2524 SDLoc DL(Op); 2525 2526 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 2527 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); 2528 if (!II || !II->SplatOperand) 2529 return SDValue(); 2530 2531 unsigned SplatOp = II->SplatOperand + HasChain; 2532 assert(SplatOp < Op.getNumOperands()); 2533 2534 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 2535 SDValue &ScalarOp = Operands[SplatOp]; 2536 MVT OpVT = ScalarOp.getSimpleValueType(); 2537 MVT VT = Op.getSimpleValueType(); 2538 MVT XLenVT = Subtarget.getXLenVT(); 2539 2540 // If this isn't a scalar, or its type is XLenVT we're done. 2541 if (!OpVT.isScalarInteger() || OpVT == XLenVT) 2542 return SDValue(); 2543 2544 // Simplest case is that the operand needs to be promoted to XLenVT. 2545 if (OpVT.bitsLT(XLenVT)) { 2546 // If the operand is a constant, sign extend to increase our chances 2547 // of being able to use a .vi instruction. ANY_EXTEND would become a 2548 // a zero extend and the simm5 check in isel would fail. 2549 // FIXME: Should we ignore the upper bits in isel instead? 2550 unsigned ExtOpc = 2551 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 2552 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp); 2553 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 2554 } 2555 2556 // The more complex case is when the scalar is larger than XLenVT. 2557 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 && 2558 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!"); 2559 2560 // If this is a sign-extended 32-bit constant, we can truncate it and rely 2561 // on the instruction to sign-extend since SEW>XLEN. 2562 if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) { 2563 if (isInt<32>(CVal->getSExtValue())) { 2564 ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); 2565 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 2566 } 2567 } 2568 2569 // We need to convert the scalar to a splat vector. 2570 // FIXME: Can we implicitly truncate the scalar if it is known to 2571 // be sign extended? 2572 // VL should be the last operand. 2573 SDValue VL = Op.getOperand(Op.getNumOperands() - 1); 2574 assert(VL.getValueType() == XLenVT); 2575 ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG); 2576 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 2577 } 2578 2579 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 2580 SelectionDAG &DAG) const { 2581 unsigned IntNo = Op.getConstantOperandVal(0); 2582 SDLoc DL(Op); 2583 MVT XLenVT = Subtarget.getXLenVT(); 2584 2585 switch (IntNo) { 2586 default: 2587 break; // Don't custom lower most intrinsics. 2588 case Intrinsic::thread_pointer: { 2589 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2590 return DAG.getRegister(RISCV::X4, PtrVT); 2591 } 2592 case Intrinsic::riscv_vmv_x_s: 2593 assert(Op.getValueType() == XLenVT && "Unexpected VT!"); 2594 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 2595 Op.getOperand(1)); 2596 case Intrinsic::riscv_vmv_v_x: { 2597 SDValue Scalar = Op.getOperand(1); 2598 if (Scalar.getValueType().bitsLE(XLenVT)) { 2599 unsigned ExtOpc = 2600 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 2601 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); 2602 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), Scalar, 2603 Op.getOperand(2)); 2604 } 2605 2606 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); 2607 2608 // If this is a sign-extended 32-bit constant, we can truncate it and rely 2609 // on the instruction to sign-extend since SEW>XLEN. 2610 if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar)) { 2611 if (isInt<32>(CVal->getSExtValue())) 2612 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), 2613 DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32), 2614 Op.getOperand(2)); 2615 } 2616 2617 // Otherwise use the more complicated splatting algorithm. 2618 return splatSplitI64WithVL(DL, Op.getSimpleValueType(), Scalar, 2619 Op.getOperand(2), DAG); 2620 } 2621 case Intrinsic::riscv_vfmv_v_f: 2622 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), 2623 Op.getOperand(1), Op.getOperand(2)); 2624 case Intrinsic::riscv_vmv_s_x: { 2625 SDValue Scalar = Op.getOperand(2); 2626 2627 if (Scalar.getValueType().bitsLE(XLenVT)) { 2628 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar); 2629 return DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, Op.getValueType(), 2630 Op.getOperand(1), Scalar, Op.getOperand(3)); 2631 } 2632 2633 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); 2634 2635 // This is an i64 value that lives in two scalar registers. We have to 2636 // insert this in a convoluted way. First we build vXi64 splat containing 2637 // the/ two values that we assemble using some bit math. Next we'll use 2638 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask 2639 // to merge element 0 from our splat into the source vector. 2640 // FIXME: This is probably not the best way to do this, but it is 2641 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting 2642 // point. 2643 // vmv.v.x vX, hi 2644 // vsll.vx vX, vX, /*32*/ 2645 // vmv.v.x vY, lo 2646 // vsll.vx vY, vY, /*32*/ 2647 // vsrl.vx vY, vY, /*32*/ 2648 // vor.vv vX, vX, vY 2649 // 2650 // vid.v vVid 2651 // vmseq.vx mMask, vVid, 0 2652 // vmerge.vvm vDest, vSrc, vVal, mMask 2653 MVT VT = Op.getSimpleValueType(); 2654 SDValue Vec = Op.getOperand(1); 2655 SDValue VL = Op.getOperand(3); 2656 2657 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); 2658 SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, 2659 DAG.getConstant(0, DL, MVT::i32), VL); 2660 2661 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); 2662 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2663 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 2664 SDValue SelectCond = 2665 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx, 2666 DAG.getCondCode(ISD::SETEQ), Mask, VL); 2667 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal, 2668 Vec, VL); 2669 } 2670 } 2671 2672 return lowerVectorIntrinsicSplats(Op, DAG, Subtarget); 2673 } 2674 2675 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 2676 SelectionDAG &DAG) const { 2677 return lowerVectorIntrinsicSplats(Op, DAG, Subtarget); 2678 } 2679 2680 static MVT getLMUL1VT(MVT VT) { 2681 assert(VT.getVectorElementType().getSizeInBits() <= 64 && 2682 "Unexpected vector MVT"); 2683 return MVT::getScalableVectorVT( 2684 VT.getVectorElementType(), 2685 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); 2686 } 2687 2688 static unsigned getRVVReductionOp(unsigned ISDOpcode) { 2689 switch (ISDOpcode) { 2690 default: 2691 llvm_unreachable("Unhandled reduction"); 2692 case ISD::VECREDUCE_ADD: 2693 return RISCVISD::VECREDUCE_ADD_VL; 2694 case ISD::VECREDUCE_UMAX: 2695 return RISCVISD::VECREDUCE_UMAX_VL; 2696 case ISD::VECREDUCE_SMAX: 2697 return RISCVISD::VECREDUCE_SMAX_VL; 2698 case ISD::VECREDUCE_UMIN: 2699 return RISCVISD::VECREDUCE_UMIN_VL; 2700 case ISD::VECREDUCE_SMIN: 2701 return RISCVISD::VECREDUCE_SMIN_VL; 2702 case ISD::VECREDUCE_AND: 2703 return RISCVISD::VECREDUCE_AND_VL; 2704 case ISD::VECREDUCE_OR: 2705 return RISCVISD::VECREDUCE_OR_VL; 2706 case ISD::VECREDUCE_XOR: 2707 return RISCVISD::VECREDUCE_XOR_VL; 2708 } 2709 } 2710 2711 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 2712 SelectionDAG &DAG) const { 2713 SDLoc DL(Op); 2714 SDValue Vec = Op.getOperand(0); 2715 EVT VecEVT = Vec.getValueType(); 2716 2717 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode()); 2718 2719 // Due to ordering in legalize types we may have a vector type that needs to 2720 // be split. Do that manually so we can get down to a legal type. 2721 while (getTypeAction(*DAG.getContext(), VecEVT) == 2722 TargetLowering::TypeSplitVector) { 2723 SDValue Lo, Hi; 2724 std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL); 2725 VecEVT = Lo.getValueType(); 2726 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi); 2727 } 2728 2729 // TODO: The type may need to be widened rather than split. Or widened before 2730 // it can be split. 2731 if (!isTypeLegal(VecEVT)) 2732 return SDValue(); 2733 2734 MVT VecVT = VecEVT.getSimpleVT(); 2735 MVT VecEltVT = VecVT.getVectorElementType(); 2736 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); 2737 2738 MVT ContainerVT = VecVT; 2739 if (VecVT.isFixedLengthVector()) { 2740 ContainerVT = getContainerForFixedLengthVector(VecVT); 2741 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2742 } 2743 2744 MVT M1VT = getLMUL1VT(ContainerVT); 2745 2746 SDValue Mask, VL; 2747 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2748 2749 // FIXME: This is a VLMAX splat which might be too large and can prevent 2750 // vsetvli removal. 2751 SDValue NeutralElem = 2752 DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); 2753 SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem); 2754 SDValue Reduction = 2755 DAG.getNode(RVVOpcode, DL, M1VT, Vec, IdentitySplat, Mask, VL); 2756 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 2757 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 2758 return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); 2759 } 2760 2761 // Given a reduction op, this function returns the matching reduction opcode, 2762 // the vector SDValue and the scalar SDValue required to lower this to a 2763 // RISCVISD node. 2764 static std::tuple<unsigned, SDValue, SDValue> 2765 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { 2766 SDLoc DL(Op); 2767 switch (Op.getOpcode()) { 2768 default: 2769 llvm_unreachable("Unhandled reduction"); 2770 case ISD::VECREDUCE_FADD: 2771 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), 2772 DAG.getConstantFP(0.0, DL, EltVT)); 2773 case ISD::VECREDUCE_SEQ_FADD: 2774 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), 2775 Op.getOperand(0)); 2776 } 2777 } 2778 2779 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, 2780 SelectionDAG &DAG) const { 2781 SDLoc DL(Op); 2782 MVT VecEltVT = Op.getSimpleValueType(); 2783 2784 unsigned RVVOpcode; 2785 SDValue VectorVal, ScalarVal; 2786 std::tie(RVVOpcode, VectorVal, ScalarVal) = 2787 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT); 2788 MVT VecVT = VectorVal.getSimpleValueType(); 2789 2790 MVT ContainerVT = VecVT; 2791 if (VecVT.isFixedLengthVector()) { 2792 ContainerVT = getContainerForFixedLengthVector(VecVT); 2793 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget); 2794 } 2795 2796 MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType()); 2797 2798 SDValue Mask, VL; 2799 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2800 2801 // FIXME: This is a VLMAX splat which might be too large and can prevent 2802 // vsetvli removal. 2803 SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal); 2804 SDValue Reduction = 2805 DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat, Mask, VL); 2806 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 2807 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 2808 } 2809 2810 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, 2811 SelectionDAG &DAG) const { 2812 SDValue Vec = Op.getOperand(0); 2813 SDValue SubVec = Op.getOperand(1); 2814 MVT VecVT = Vec.getSimpleValueType(); 2815 MVT SubVecVT = SubVec.getSimpleValueType(); 2816 2817 SDLoc DL(Op); 2818 MVT XLenVT = Subtarget.getXLenVT(); 2819 unsigned OrigIdx = Op.getConstantOperandVal(2); 2820 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2821 2822 // We don't have the ability to slide mask vectors up indexed by their i1 2823 // elements; the smallest we can do is i8. Often we are able to bitcast to 2824 // equivalent i8 vectors. Note that when inserting a fixed-length vector 2825 // into a scalable one, we might not necessarily have enough scalable 2826 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. 2827 if (SubVecVT.getVectorElementType() == MVT::i1 && 2828 (OrigIdx != 0 || !Vec.isUndef())) { 2829 if (VecVT.getVectorMinNumElements() >= 8 && 2830 SubVecVT.getVectorMinNumElements() >= 8) { 2831 assert(OrigIdx % 8 == 0 && "Invalid index"); 2832 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 2833 SubVecVT.getVectorMinNumElements() % 8 == 0 && 2834 "Unexpected mask vector lowering"); 2835 OrigIdx /= 8; 2836 SubVecVT = 2837 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 2838 SubVecVT.isScalableVector()); 2839 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 2840 VecVT.isScalableVector()); 2841 Vec = DAG.getBitcast(VecVT, Vec); 2842 SubVec = DAG.getBitcast(SubVecVT, SubVec); 2843 } else { 2844 // We can't slide this mask vector up indexed by its i1 elements. 2845 // This poses a problem when we wish to insert a scalable vector which 2846 // can't be re-expressed as a larger type. Just choose the slow path and 2847 // extend to a larger type, then truncate back down. 2848 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 2849 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 2850 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 2851 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec); 2852 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec, 2853 Op.getOperand(2)); 2854 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT); 2855 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE); 2856 } 2857 } 2858 2859 // If the subvector vector is a fixed-length type, we cannot use subregister 2860 // manipulation to simplify the codegen; we don't know which register of a 2861 // LMUL group contains the specific subvector as we only know the minimum 2862 // register size. Therefore we must slide the vector group up the full 2863 // amount. 2864 if (SubVecVT.isFixedLengthVector()) { 2865 if (OrigIdx == 0 && Vec.isUndef()) 2866 return Op; 2867 MVT ContainerVT = VecVT; 2868 if (VecVT.isFixedLengthVector()) { 2869 ContainerVT = getContainerForFixedLengthVector(VecVT); 2870 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2871 } 2872 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, 2873 DAG.getUNDEF(ContainerVT), SubVec, 2874 DAG.getConstant(0, DL, XLenVT)); 2875 SDValue Mask = 2876 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 2877 // Set the vector length to only the number of elements we care about. Note 2878 // that for slideup this includes the offset. 2879 SDValue VL = 2880 DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT); 2881 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 2882 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, 2883 SubVec, SlideupAmt, Mask, VL); 2884 if (VecVT.isFixedLengthVector()) 2885 Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 2886 return DAG.getBitcast(Op.getValueType(), Slideup); 2887 } 2888 2889 unsigned SubRegIdx, RemIdx; 2890 std::tie(SubRegIdx, RemIdx) = 2891 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2892 VecVT, SubVecVT, OrigIdx, TRI); 2893 2894 RISCVVLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT); 2895 bool IsSubVecPartReg = SubVecLMUL == RISCVVLMUL::LMUL_F2 || 2896 SubVecLMUL == RISCVVLMUL::LMUL_F4 || 2897 SubVecLMUL == RISCVVLMUL::LMUL_F8; 2898 2899 // 1. If the Idx has been completely eliminated and this subvector's size is 2900 // a vector register or a multiple thereof, or the surrounding elements are 2901 // undef, then this is a subvector insert which naturally aligns to a vector 2902 // register. These can easily be handled using subregister manipulation. 2903 // 2. If the subvector is smaller than a vector register, then the insertion 2904 // must preserve the undisturbed elements of the register. We do this by 2905 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type 2906 // (which resolves to a subregister copy), performing a VSLIDEUP to place the 2907 // subvector within the vector register, and an INSERT_SUBVECTOR of that 2908 // LMUL=1 type back into the larger vector (resolving to another subregister 2909 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type 2910 // to avoid allocating a large register group to hold our subvector. 2911 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef())) 2912 return Op; 2913 2914 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements 2915 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy 2916 // (in our case undisturbed). This means we can set up a subvector insertion 2917 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the 2918 // size of the subvector. 2919 MVT InterSubVT = VecVT; 2920 SDValue AlignedExtract = Vec; 2921 unsigned AlignedIdx = OrigIdx - RemIdx; 2922 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 2923 InterSubVT = getLMUL1VT(VecVT); 2924 // Extract a subvector equal to the nearest full vector register type. This 2925 // should resolve to a EXTRACT_SUBREG instruction. 2926 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 2927 DAG.getConstant(AlignedIdx, DL, XLenVT)); 2928 } 2929 2930 SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT); 2931 // For scalable vectors this must be further multiplied by vscale. 2932 SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt); 2933 2934 SDValue Mask, VL; 2935 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 2936 2937 // Construct the vector length corresponding to RemIdx + length(SubVecVT). 2938 VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT); 2939 VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL); 2940 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL); 2941 2942 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT, 2943 DAG.getUNDEF(InterSubVT), SubVec, 2944 DAG.getConstant(0, DL, XLenVT)); 2945 2946 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT, 2947 AlignedExtract, SubVec, SlideupAmt, Mask, VL); 2948 2949 // If required, insert this subvector back into the correct vector register. 2950 // This should resolve to an INSERT_SUBREG instruction. 2951 if (VecVT.bitsGT(InterSubVT)) 2952 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup, 2953 DAG.getConstant(AlignedIdx, DL, XLenVT)); 2954 2955 // We might have bitcast from a mask type: cast back to the original type if 2956 // required. 2957 return DAG.getBitcast(Op.getSimpleValueType(), Slideup); 2958 } 2959 2960 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, 2961 SelectionDAG &DAG) const { 2962 SDValue Vec = Op.getOperand(0); 2963 MVT SubVecVT = Op.getSimpleValueType(); 2964 MVT VecVT = Vec.getSimpleValueType(); 2965 2966 SDLoc DL(Op); 2967 MVT XLenVT = Subtarget.getXLenVT(); 2968 unsigned OrigIdx = Op.getConstantOperandVal(1); 2969 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2970 2971 // We don't have the ability to slide mask vectors down indexed by their i1 2972 // elements; the smallest we can do is i8. Often we are able to bitcast to 2973 // equivalent i8 vectors. Note that when extracting a fixed-length vector 2974 // from a scalable one, we might not necessarily have enough scalable 2975 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. 2976 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) { 2977 if (VecVT.getVectorMinNumElements() >= 8 && 2978 SubVecVT.getVectorMinNumElements() >= 8) { 2979 assert(OrigIdx % 8 == 0 && "Invalid index"); 2980 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 2981 SubVecVT.getVectorMinNumElements() % 8 == 0 && 2982 "Unexpected mask vector lowering"); 2983 OrigIdx /= 8; 2984 SubVecVT = 2985 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 2986 SubVecVT.isScalableVector()); 2987 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 2988 VecVT.isScalableVector()); 2989 Vec = DAG.getBitcast(VecVT, Vec); 2990 } else { 2991 // We can't slide this mask vector down, indexed by its i1 elements. 2992 // This poses a problem when we wish to extract a scalable vector which 2993 // can't be re-expressed as a larger type. Just choose the slow path and 2994 // extend to a larger type, then truncate back down. 2995 // TODO: We could probably improve this when extracting certain fixed 2996 // from fixed, where we can extract as i8 and shift the correct element 2997 // right to reach the desired subvector? 2998 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 2999 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 3000 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 3001 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec, 3002 Op.getOperand(1)); 3003 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT); 3004 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE); 3005 } 3006 } 3007 3008 // If the subvector vector is a fixed-length type, we cannot use subregister 3009 // manipulation to simplify the codegen; we don't know which register of a 3010 // LMUL group contains the specific subvector as we only know the minimum 3011 // register size. Therefore we must slide the vector group down the full 3012 // amount. 3013 if (SubVecVT.isFixedLengthVector()) { 3014 // With an index of 0 this is a cast-like subvector, which can be performed 3015 // with subregister operations. 3016 if (OrigIdx == 0) 3017 return Op; 3018 MVT ContainerVT = VecVT; 3019 if (VecVT.isFixedLengthVector()) { 3020 ContainerVT = getContainerForFixedLengthVector(VecVT); 3021 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3022 } 3023 SDValue Mask = 3024 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 3025 // Set the vector length to only the number of elements we care about. This 3026 // avoids sliding down elements we're going to discard straight away. 3027 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT); 3028 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 3029 SDValue Slidedown = 3030 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 3031 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL); 3032 // Now we can use a cast-like subvector extract to get the result. 3033 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 3034 DAG.getConstant(0, DL, XLenVT)); 3035 return DAG.getBitcast(Op.getValueType(), Slidedown); 3036 } 3037 3038 unsigned SubRegIdx, RemIdx; 3039 std::tie(SubRegIdx, RemIdx) = 3040 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 3041 VecVT, SubVecVT, OrigIdx, TRI); 3042 3043 // If the Idx has been completely eliminated then this is a subvector extract 3044 // which naturally aligns to a vector register. These can easily be handled 3045 // using subregister manipulation. 3046 if (RemIdx == 0) 3047 return Op; 3048 3049 // Else we must shift our vector register directly to extract the subvector. 3050 // Do this using VSLIDEDOWN. 3051 3052 // If the vector type is an LMUL-group type, extract a subvector equal to the 3053 // nearest full vector register type. This should resolve to a EXTRACT_SUBREG 3054 // instruction. 3055 MVT InterSubVT = VecVT; 3056 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 3057 InterSubVT = getLMUL1VT(VecVT); 3058 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 3059 DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT)); 3060 } 3061 3062 // Slide this vector register down by the desired number of elements in order 3063 // to place the desired subvector starting at element 0. 3064 SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT); 3065 // For scalable vectors this must be further multiplied by vscale. 3066 SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt); 3067 3068 SDValue Mask, VL; 3069 std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); 3070 SDValue Slidedown = 3071 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT, 3072 DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL); 3073 3074 // Now the vector is in the right position, extract our final subvector. This 3075 // should resolve to a COPY. 3076 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 3077 DAG.getConstant(0, DL, XLenVT)); 3078 3079 // We might have bitcast from a mask type: cast back to the original type if 3080 // required. 3081 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown); 3082 } 3083 3084 // Implement vector_reverse using vrgather.vv with indices determined by 3085 // subtracting the id of each element from (VLMAX-1). This will convert 3086 // the indices like so: 3087 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). 3088 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. 3089 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, 3090 SelectionDAG &DAG) const { 3091 SDLoc DL(Op); 3092 MVT VecVT = Op.getSimpleValueType(); 3093 unsigned EltSize = VecVT.getScalarSizeInBits(); 3094 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); 3095 3096 unsigned MaxVLMAX = 0; 3097 unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits(); 3098 if (VectorBitsMax != 0) 3099 MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; 3100 3101 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; 3102 MVT IntVT = VecVT.changeVectorElementTypeToInteger(); 3103 3104 // If this is SEW=8 and VLMAX is unknown or more than 256, we need 3105 // to use vrgatherei16.vv. 3106 // TODO: It's also possible to use vrgatherei16.vv for other types to 3107 // decrease register width for the index calculation. 3108 if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) { 3109 // If this is LMUL=8, we have to split before can use vrgatherei16.vv. 3110 // Reverse each half, then reassemble them in reverse order. 3111 // NOTE: It's also possible that after splitting that VLMAX no longer 3112 // requires vrgatherei16.vv. 3113 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { 3114 SDValue Lo, Hi; 3115 std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0); 3116 EVT LoVT, HiVT; 3117 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); 3118 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); 3119 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); 3120 // Reassemble the low and high pieces reversed. 3121 // FIXME: This is a CONCAT_VECTORS. 3122 SDValue Res = 3123 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi, 3124 DAG.getIntPtrConstant(0, DL)); 3125 return DAG.getNode( 3126 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo, 3127 DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL)); 3128 } 3129 3130 // Just promote the int type to i16 which will double the LMUL. 3131 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount()); 3132 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; 3133 } 3134 3135 MVT XLenVT = Subtarget.getXLenVT(); 3136 SDValue Mask, VL; 3137 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 3138 3139 // Calculate VLMAX-1 for the desired SEW. 3140 unsigned MinElts = VecVT.getVectorMinNumElements(); 3141 SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, 3142 DAG.getConstant(MinElts, DL, XLenVT)); 3143 SDValue VLMinus1 = 3144 DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT)); 3145 3146 // Splat VLMAX-1 taking care to handle SEW==64 on RV32. 3147 bool IsRV32E64 = 3148 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64; 3149 SDValue SplatVL; 3150 if (!IsRV32E64) 3151 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1); 3152 else 3153 SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1); 3154 3155 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL); 3156 SDValue Indices = 3157 DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL); 3158 3159 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL); 3160 } 3161 3162 SDValue 3163 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, 3164 SelectionDAG &DAG) const { 3165 auto *Load = cast<LoadSDNode>(Op); 3166 3167 SDLoc DL(Op); 3168 MVT VT = Op.getSimpleValueType(); 3169 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3170 3171 SDValue VL = 3172 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 3173 3174 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 3175 SDValue NewLoad = DAG.getMemIntrinsicNode( 3176 RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL}, 3177 Load->getMemoryVT(), Load->getMemOperand()); 3178 3179 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 3180 return DAG.getMergeValues({Result, Load->getChain()}, DL); 3181 } 3182 3183 SDValue 3184 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, 3185 SelectionDAG &DAG) const { 3186 auto *Store = cast<StoreSDNode>(Op); 3187 3188 SDLoc DL(Op); 3189 MVT VT = Store->getValue().getSimpleValueType(); 3190 3191 // FIXME: We probably need to zero any extra bits in a byte for mask stores. 3192 // This is tricky to do. 3193 3194 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3195 3196 SDValue VL = 3197 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 3198 3199 SDValue NewValue = 3200 convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget); 3201 return DAG.getMemIntrinsicNode( 3202 RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other), 3203 {Store->getChain(), NewValue, Store->getBasePtr(), VL}, 3204 Store->getMemoryVT(), Store->getMemOperand()); 3205 } 3206 3207 SDValue RISCVTargetLowering::lowerMLOAD(SDValue Op, SelectionDAG &DAG) const { 3208 auto *Load = cast<MaskedLoadSDNode>(Op); 3209 3210 SDLoc DL(Op); 3211 MVT VT = Op.getSimpleValueType(); 3212 MVT XLenVT = Subtarget.getXLenVT(); 3213 3214 SDValue Mask = Load->getMask(); 3215 SDValue PassThru = Load->getPassThru(); 3216 SDValue VL; 3217 3218 MVT ContainerVT = VT; 3219 if (VT.isFixedLengthVector()) { 3220 ContainerVT = getContainerForFixedLengthVector(VT); 3221 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3222 3223 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 3224 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 3225 VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 3226 } else 3227 VL = DAG.getRegister(RISCV::X0, XLenVT); 3228 3229 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 3230 SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vle_mask, DL, XLenVT); 3231 SDValue Ops[] = {Load->getChain(), IntID, PassThru, 3232 Load->getBasePtr(), Mask, VL}; 3233 SDValue Result = 3234 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 3235 Load->getMemoryVT(), Load->getMemOperand()); 3236 SDValue Chain = Result.getValue(1); 3237 3238 if (VT.isFixedLengthVector()) 3239 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 3240 3241 return DAG.getMergeValues({Result, Chain}, DL); 3242 } 3243 3244 SDValue RISCVTargetLowering::lowerMSTORE(SDValue Op, SelectionDAG &DAG) const { 3245 auto *Store = cast<MaskedStoreSDNode>(Op); 3246 3247 SDLoc DL(Op); 3248 SDValue Val = Store->getValue(); 3249 SDValue Mask = Store->getMask(); 3250 MVT VT = Val.getSimpleValueType(); 3251 MVT XLenVT = Subtarget.getXLenVT(); 3252 SDValue VL; 3253 3254 MVT ContainerVT = VT; 3255 if (VT.isFixedLengthVector()) { 3256 ContainerVT = getContainerForFixedLengthVector(VT); 3257 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3258 3259 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 3260 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 3261 VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 3262 } else 3263 VL = DAG.getRegister(RISCV::X0, XLenVT); 3264 3265 SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vse_mask, DL, XLenVT); 3266 return DAG.getMemIntrinsicNode( 3267 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), 3268 {Store->getChain(), IntID, Val, Store->getBasePtr(), Mask, VL}, 3269 Store->getMemoryVT(), Store->getMemOperand()); 3270 } 3271 3272 SDValue 3273 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, 3274 SelectionDAG &DAG) const { 3275 MVT InVT = Op.getOperand(0).getSimpleValueType(); 3276 MVT ContainerVT = getContainerForFixedLengthVector(InVT); 3277 3278 MVT VT = Op.getSimpleValueType(); 3279 3280 SDValue Op1 = 3281 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 3282 SDValue Op2 = 3283 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 3284 3285 SDLoc DL(Op); 3286 SDValue VL = 3287 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 3288 3289 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 3290 3291 bool Invert = false; 3292 Optional<unsigned> LogicOpc; 3293 if (ContainerVT.isFloatingPoint()) { 3294 bool Swap = false; 3295 switch (CC) { 3296 default: 3297 break; 3298 case ISD::SETULE: 3299 case ISD::SETULT: 3300 Swap = true; 3301 LLVM_FALLTHROUGH; 3302 case ISD::SETUGE: 3303 case ISD::SETUGT: 3304 CC = getSetCCInverse(CC, ContainerVT); 3305 Invert = true; 3306 break; 3307 case ISD::SETOGE: 3308 case ISD::SETOGT: 3309 case ISD::SETGE: 3310 case ISD::SETGT: 3311 Swap = true; 3312 break; 3313 case ISD::SETUEQ: 3314 // Use !((OLT Op1, Op2) || (OLT Op2, Op1)) 3315 Invert = true; 3316 LogicOpc = RISCVISD::VMOR_VL; 3317 CC = ISD::SETOLT; 3318 break; 3319 case ISD::SETONE: 3320 // Use ((OLT Op1, Op2) || (OLT Op2, Op1)) 3321 LogicOpc = RISCVISD::VMOR_VL; 3322 CC = ISD::SETOLT; 3323 break; 3324 case ISD::SETO: 3325 // Use (OEQ Op1, Op1) && (OEQ Op2, Op2) 3326 LogicOpc = RISCVISD::VMAND_VL; 3327 CC = ISD::SETOEQ; 3328 break; 3329 case ISD::SETUO: 3330 // Use (UNE Op1, Op1) || (UNE Op2, Op2) 3331 LogicOpc = RISCVISD::VMOR_VL; 3332 CC = ISD::SETUNE; 3333 break; 3334 } 3335 3336 if (Swap) { 3337 CC = getSetCCSwappedOperands(CC); 3338 std::swap(Op1, Op2); 3339 } 3340 } 3341 3342 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3343 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 3344 3345 // There are 3 cases we need to emit. 3346 // 1. For (OEQ Op1, Op1) && (OEQ Op2, Op2) or (UNE Op1, Op1) || (UNE Op2, Op2) 3347 // we need to compare each operand with itself. 3348 // 2. For (OLT Op1, Op2) || (OLT Op2, Op1) we need to compare Op1 and Op2 in 3349 // both orders. 3350 // 3. For any other case we just need one compare with Op1 and Op2. 3351 SDValue Cmp; 3352 if (LogicOpc && (CC == ISD::SETOEQ || CC == ISD::SETUNE)) { 3353 Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op1, 3354 DAG.getCondCode(CC), Mask, VL); 3355 SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op2, 3356 DAG.getCondCode(CC), Mask, VL); 3357 Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL); 3358 } else { 3359 Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2, 3360 DAG.getCondCode(CC), Mask, VL); 3361 if (LogicOpc) { 3362 SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op1, 3363 DAG.getCondCode(CC), Mask, VL); 3364 Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL); 3365 } 3366 } 3367 3368 if (Invert) { 3369 SDValue AllOnes = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 3370 Cmp = DAG.getNode(RISCVISD::VMXOR_VL, DL, MaskVT, Cmp, AllOnes, VL); 3371 } 3372 3373 return convertFromScalableVector(VT, Cmp, DAG, Subtarget); 3374 } 3375 3376 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV( 3377 SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const { 3378 MVT VT = Op.getSimpleValueType(); 3379 3380 if (VT.getVectorElementType() == MVT::i1) 3381 return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false); 3382 3383 return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true); 3384 } 3385 3386 // Lower vector ABS to smax(X, sub(0, X)). 3387 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { 3388 SDLoc DL(Op); 3389 MVT VT = Op.getSimpleValueType(); 3390 SDValue X = Op.getOperand(0); 3391 3392 assert(VT.isFixedLengthVector() && "Unexpected type"); 3393 3394 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3395 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); 3396 3397 SDValue Mask, VL; 3398 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3399 3400 SDValue SplatZero = 3401 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 3402 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 3403 SDValue NegX = 3404 DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL); 3405 SDValue Max = 3406 DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL); 3407 3408 return convertFromScalableVector(VT, Max, DAG, Subtarget); 3409 } 3410 3411 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV( 3412 SDValue Op, SelectionDAG &DAG) const { 3413 SDLoc DL(Op); 3414 MVT VT = Op.getSimpleValueType(); 3415 SDValue Mag = Op.getOperand(0); 3416 SDValue Sign = Op.getOperand(1); 3417 assert(Mag.getValueType() == Sign.getValueType() && 3418 "Can only handle COPYSIGN with matching types."); 3419 3420 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3421 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget); 3422 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget); 3423 3424 SDValue Mask, VL; 3425 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3426 3427 SDValue CopySign = 3428 DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL); 3429 3430 return convertFromScalableVector(VT, CopySign, DAG, Subtarget); 3431 } 3432 3433 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( 3434 SDValue Op, SelectionDAG &DAG) const { 3435 MVT VT = Op.getSimpleValueType(); 3436 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3437 3438 MVT I1ContainerVT = 3439 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3440 3441 SDValue CC = 3442 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget); 3443 SDValue Op1 = 3444 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 3445 SDValue Op2 = 3446 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget); 3447 3448 SDLoc DL(Op); 3449 SDValue Mask, VL; 3450 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3451 3452 SDValue Select = 3453 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL); 3454 3455 return convertFromScalableVector(VT, Select, DAG, Subtarget); 3456 } 3457 3458 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, 3459 unsigned NewOpc, 3460 bool HasMask) const { 3461 MVT VT = Op.getSimpleValueType(); 3462 assert(useRVVForFixedLengthVectorVT(VT) && 3463 "Only expected to lower fixed length vector operation!"); 3464 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3465 3466 // Create list of operands by converting existing ones to scalable types. 3467 SmallVector<SDValue, 6> Ops; 3468 for (const SDValue &V : Op->op_values()) { 3469 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 3470 3471 // Pass through non-vector operands. 3472 if (!V.getValueType().isVector()) { 3473 Ops.push_back(V); 3474 continue; 3475 } 3476 3477 // "cast" fixed length vector to a scalable vector. 3478 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && 3479 "Only fixed length vectors are supported!"); 3480 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 3481 } 3482 3483 SDLoc DL(Op); 3484 SDValue Mask, VL; 3485 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3486 if (HasMask) 3487 Ops.push_back(Mask); 3488 Ops.push_back(VL); 3489 3490 SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops); 3491 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 3492 } 3493 3494 // Custom lower MGATHER to a legalized form for RVV. It will then be matched to 3495 // a RVV indexed load. The RVV indexed load instructions only support the 3496 // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or 3497 // truncated to XLEN and are treated as byte offsets. Any signed or scaled 3498 // indexing is extended to the XLEN value type and scaled accordingly. 3499 SDValue RISCVTargetLowering::lowerMGATHER(SDValue Op, SelectionDAG &DAG) const { 3500 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode()); 3501 SDLoc DL(Op); 3502 3503 SDValue Index = MGN->getIndex(); 3504 SDValue Mask = MGN->getMask(); 3505 SDValue PassThru = MGN->getPassThru(); 3506 3507 MVT VT = Op.getSimpleValueType(); 3508 MVT IndexVT = Index.getSimpleValueType(); 3509 MVT XLenVT = Subtarget.getXLenVT(); 3510 3511 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 3512 "Unexpected VTs!"); 3513 assert(MGN->getBasePtr().getSimpleValueType() == XLenVT && 3514 "Unexpected pointer type"); 3515 // Targets have to explicitly opt-in for extending vector loads. 3516 assert(MGN->getExtensionType() == ISD::NON_EXTLOAD && 3517 "Unexpected extending MGATHER"); 3518 3519 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 3520 // the selection of the masked intrinsics doesn't do this for us. 3521 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 3522 3523 SDValue VL; 3524 MVT ContainerVT = VT; 3525 if (VT.isFixedLengthVector()) { 3526 // We need to use the larger of the result and index type to determine the 3527 // scalable type to use so we don't increase LMUL for any operand/result. 3528 if (VT.bitsGE(IndexVT)) { 3529 ContainerVT = getContainerForFixedLengthVector(VT); 3530 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 3531 ContainerVT.getVectorElementCount()); 3532 } else { 3533 IndexVT = getContainerForFixedLengthVector(IndexVT); 3534 ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(), 3535 IndexVT.getVectorElementCount()); 3536 } 3537 3538 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 3539 3540 if (!IsUnmasked) { 3541 MVT MaskVT = 3542 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3543 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 3544 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 3545 } 3546 3547 VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 3548 } else 3549 VL = DAG.getRegister(RISCV::X0, XLenVT); 3550 3551 unsigned IntID = 3552 IsUnmasked ? Intrinsic::riscv_vloxei : Intrinsic::riscv_vloxei_mask; 3553 SmallVector<SDValue, 8> Ops{MGN->getChain(), 3554 DAG.getTargetConstant(IntID, DL, XLenVT)}; 3555 if (!IsUnmasked) 3556 Ops.push_back(PassThru); 3557 Ops.push_back(MGN->getBasePtr()); 3558 Ops.push_back(Index); 3559 if (!IsUnmasked) 3560 Ops.push_back(Mask); 3561 Ops.push_back(VL); 3562 3563 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 3564 SDValue Result = 3565 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 3566 MGN->getMemoryVT(), MGN->getMemOperand()); 3567 SDValue Chain = Result.getValue(1); 3568 3569 if (VT.isFixedLengthVector()) 3570 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 3571 3572 return DAG.getMergeValues({Result, Chain}, DL); 3573 } 3574 3575 // Custom lower MSCATTER to a legalized form for RVV. It will then be matched to 3576 // a RVV indexed store. The RVV indexed store instructions only support the 3577 // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or 3578 // truncated to XLEN and are treated as byte offsets. Any signed or scaled 3579 // indexing is extended to the XLEN value type and scaled accordingly. 3580 SDValue RISCVTargetLowering::lowerMSCATTER(SDValue Op, 3581 SelectionDAG &DAG) const { 3582 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode()); 3583 SDLoc DL(Op); 3584 SDValue Index = MSN->getIndex(); 3585 SDValue Mask = MSN->getMask(); 3586 SDValue Val = MSN->getValue(); 3587 3588 MVT VT = Val.getSimpleValueType(); 3589 MVT IndexVT = Index.getSimpleValueType(); 3590 MVT XLenVT = Subtarget.getXLenVT(); 3591 3592 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 3593 "Unexpected VTs!"); 3594 assert(MSN->getBasePtr().getSimpleValueType() == XLenVT && 3595 "Unexpected pointer type"); 3596 // Targets have to explicitly opt-in for extending vector loads and 3597 // truncating vector stores. 3598 assert(!MSN->isTruncatingStore() && "Unexpected extending MSCATTER"); 3599 3600 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 3601 // the selection of the masked intrinsics doesn't do this for us. 3602 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 3603 3604 SDValue VL; 3605 if (VT.isFixedLengthVector()) { 3606 // We need to use the larger of the value and index type to determine the 3607 // scalable type to use so we don't increase LMUL for any operand/result. 3608 if (VT.bitsGE(IndexVT)) { 3609 VT = getContainerForFixedLengthVector(VT); 3610 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 3611 VT.getVectorElementCount()); 3612 } else { 3613 IndexVT = getContainerForFixedLengthVector(IndexVT); 3614 VT = MVT::getVectorVT(VT.getVectorElementType(), 3615 IndexVT.getVectorElementCount()); 3616 } 3617 3618 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 3619 Val = convertToScalableVector(VT, Val, DAG, Subtarget); 3620 3621 if (!IsUnmasked) { 3622 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); 3623 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 3624 } 3625 3626 VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 3627 } else 3628 VL = DAG.getRegister(RISCV::X0, XLenVT); 3629 3630 unsigned IntID = 3631 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask; 3632 SmallVector<SDValue, 8> Ops{MSN->getChain(), 3633 DAG.getTargetConstant(IntID, DL, XLenVT)}; 3634 Ops.push_back(Val); 3635 Ops.push_back(MSN->getBasePtr()); 3636 Ops.push_back(Index); 3637 if (!IsUnmasked) 3638 Ops.push_back(Mask); 3639 Ops.push_back(VL); 3640 3641 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, MSN->getVTList(), Ops, 3642 MSN->getMemoryVT(), MSN->getMemOperand()); 3643 } 3644 3645 // Returns the opcode of the target-specific SDNode that implements the 32-bit 3646 // form of the given Opcode. 3647 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 3648 switch (Opcode) { 3649 default: 3650 llvm_unreachable("Unexpected opcode"); 3651 case ISD::SHL: 3652 return RISCVISD::SLLW; 3653 case ISD::SRA: 3654 return RISCVISD::SRAW; 3655 case ISD::SRL: 3656 return RISCVISD::SRLW; 3657 case ISD::SDIV: 3658 return RISCVISD::DIVW; 3659 case ISD::UDIV: 3660 return RISCVISD::DIVUW; 3661 case ISD::UREM: 3662 return RISCVISD::REMUW; 3663 case ISD::ROTL: 3664 return RISCVISD::ROLW; 3665 case ISD::ROTR: 3666 return RISCVISD::RORW; 3667 case RISCVISD::GREVI: 3668 return RISCVISD::GREVIW; 3669 case RISCVISD::GORCI: 3670 return RISCVISD::GORCIW; 3671 } 3672 } 3673 3674 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 3675 // Because i32 isn't a legal type for RV64, these operations would otherwise 3676 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 3677 // later one because the fact the operation was originally of type i32 is 3678 // lost. 3679 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 3680 unsigned ExtOpc = ISD::ANY_EXTEND) { 3681 SDLoc DL(N); 3682 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 3683 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 3684 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 3685 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 3686 // ReplaceNodeResults requires we maintain the same type for the return value. 3687 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 3688 } 3689 3690 // Converts the given 32-bit operation to a i64 operation with signed extension 3691 // semantic to reduce the signed extension instructions. 3692 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 3693 SDLoc DL(N); 3694 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3695 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 3696 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 3697 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 3698 DAG.getValueType(MVT::i32)); 3699 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 3700 } 3701 3702 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 3703 SmallVectorImpl<SDValue> &Results, 3704 SelectionDAG &DAG) const { 3705 SDLoc DL(N); 3706 switch (N->getOpcode()) { 3707 default: 3708 llvm_unreachable("Don't know how to custom type legalize this operation!"); 3709 case ISD::STRICT_FP_TO_SINT: 3710 case ISD::STRICT_FP_TO_UINT: 3711 case ISD::FP_TO_SINT: 3712 case ISD::FP_TO_UINT: { 3713 bool IsStrict = N->isStrictFPOpcode(); 3714 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3715 "Unexpected custom legalisation"); 3716 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 3717 // If the FP type needs to be softened, emit a library call using the 'si' 3718 // version. If we left it to default legalization we'd end up with 'di'. If 3719 // the FP type doesn't need to be softened just let generic type 3720 // legalization promote the result type. 3721 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 3722 TargetLowering::TypeSoftenFloat) 3723 return; 3724 RTLIB::Libcall LC; 3725 if (N->getOpcode() == ISD::FP_TO_SINT || 3726 N->getOpcode() == ISD::STRICT_FP_TO_SINT) 3727 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 3728 else 3729 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 3730 MakeLibCallOptions CallOptions; 3731 EVT OpVT = Op0.getValueType(); 3732 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 3733 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 3734 SDValue Result; 3735 std::tie(Result, Chain) = 3736 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 3737 Results.push_back(Result); 3738 if (IsStrict) 3739 Results.push_back(Chain); 3740 break; 3741 } 3742 case ISD::READCYCLECOUNTER: { 3743 assert(!Subtarget.is64Bit() && 3744 "READCYCLECOUNTER only has custom type legalization on riscv32"); 3745 3746 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 3747 SDValue RCW = 3748 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 3749 3750 Results.push_back( 3751 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 3752 Results.push_back(RCW.getValue(2)); 3753 break; 3754 } 3755 case ISD::ADD: 3756 case ISD::SUB: 3757 case ISD::MUL: 3758 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3759 "Unexpected custom legalisation"); 3760 if (N->getOperand(1).getOpcode() == ISD::Constant) 3761 return; 3762 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 3763 break; 3764 case ISD::SHL: 3765 case ISD::SRA: 3766 case ISD::SRL: 3767 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3768 "Unexpected custom legalisation"); 3769 if (N->getOperand(1).getOpcode() == ISD::Constant) 3770 return; 3771 Results.push_back(customLegalizeToWOp(N, DAG)); 3772 break; 3773 case ISD::ROTL: 3774 case ISD::ROTR: 3775 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3776 "Unexpected custom legalisation"); 3777 Results.push_back(customLegalizeToWOp(N, DAG)); 3778 break; 3779 case ISD::SDIV: 3780 case ISD::UDIV: 3781 case ISD::UREM: { 3782 MVT VT = N->getSimpleValueType(0); 3783 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 3784 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 3785 "Unexpected custom legalisation"); 3786 if (N->getOperand(0).getOpcode() == ISD::Constant || 3787 N->getOperand(1).getOpcode() == ISD::Constant) 3788 return; 3789 3790 // If the input is i32, use ANY_EXTEND since the W instructions don't read 3791 // the upper 32 bits. For other types we need to sign or zero extend 3792 // based on the opcode. 3793 unsigned ExtOpc = ISD::ANY_EXTEND; 3794 if (VT != MVT::i32) 3795 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 3796 : ISD::ZERO_EXTEND; 3797 3798 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 3799 break; 3800 } 3801 case ISD::UADDO: 3802 case ISD::USUBO: { 3803 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3804 "Unexpected custom legalisation"); 3805 bool IsAdd = N->getOpcode() == ISD::UADDO; 3806 SDLoc DL(N); 3807 // Create an ADDW or SUBW. 3808 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3809 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 3810 SDValue Res = 3811 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS); 3812 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, 3813 DAG.getValueType(MVT::i32)); 3814 3815 // Sign extend the LHS and perform an unsigned compare with the ADDW result. 3816 // Since the inputs are sign extended from i32, this is equivalent to 3817 // comparing the lower 32 bits. 3818 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 3819 SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS, 3820 IsAdd ? ISD::SETULT : ISD::SETUGT); 3821 3822 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 3823 Results.push_back(Overflow); 3824 return; 3825 } 3826 case ISD::UADDSAT: 3827 case ISD::USUBSAT: { 3828 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3829 "Unexpected custom legalisation"); 3830 SDLoc DL(N); 3831 if (Subtarget.hasStdExtZbb()) { 3832 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using 3833 // sign extend allows overflow of the lower 32 bits to be detected on 3834 // the promoted size. 3835 SDValue LHS = 3836 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 3837 SDValue RHS = 3838 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); 3839 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS); 3840 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 3841 return; 3842 } 3843 3844 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom 3845 // promotion for UADDO/USUBO. 3846 Results.push_back(expandAddSubSat(N, DAG)); 3847 return; 3848 } 3849 case ISD::BITCAST: { 3850 assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3851 Subtarget.hasStdExtF()) || 3852 (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) && 3853 "Unexpected custom legalisation"); 3854 SDValue Op0 = N->getOperand(0); 3855 if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) { 3856 if (Op0.getValueType() != MVT::f16) 3857 return; 3858 SDValue FPConv = 3859 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0); 3860 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 3861 } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3862 Subtarget.hasStdExtF()) { 3863 if (Op0.getValueType() != MVT::f32) 3864 return; 3865 SDValue FPConv = 3866 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 3867 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 3868 } 3869 break; 3870 } 3871 case RISCVISD::GREVI: 3872 case RISCVISD::GORCI: { 3873 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3874 "Unexpected custom legalisation"); 3875 // This is similar to customLegalizeToWOp, except that we pass the second 3876 // operand (a TargetConstant) straight through: it is already of type 3877 // XLenVT. 3878 SDLoc DL(N); 3879 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 3880 SDValue NewOp0 = 3881 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3882 SDValue NewRes = 3883 DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1)); 3884 // ReplaceNodeResults requires we maintain the same type for the return 3885 // value. 3886 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 3887 break; 3888 } 3889 case RISCVISD::SHFLI: { 3890 // There is no SHFLIW instruction, but we can just promote the operation. 3891 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3892 "Unexpected custom legalisation"); 3893 SDLoc DL(N); 3894 SDValue NewOp0 = 3895 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3896 SDValue NewRes = 3897 DAG.getNode(RISCVISD::SHFLI, DL, MVT::i64, NewOp0, N->getOperand(1)); 3898 // ReplaceNodeResults requires we maintain the same type for the return 3899 // value. 3900 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 3901 break; 3902 } 3903 case ISD::BSWAP: 3904 case ISD::BITREVERSE: { 3905 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3906 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 3907 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 3908 N->getOperand(0)); 3909 unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24; 3910 SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0, 3911 DAG.getTargetConstant(Imm, DL, 3912 Subtarget.getXLenVT())); 3913 // ReplaceNodeResults requires we maintain the same type for the return 3914 // value. 3915 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW)); 3916 break; 3917 } 3918 case ISD::FSHL: 3919 case ISD::FSHR: { 3920 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3921 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); 3922 SDValue NewOp0 = 3923 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3924 SDValue NewOp1 = 3925 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 3926 SDValue NewOp2 = 3927 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 3928 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. 3929 // Mask the shift amount to 5 bits. 3930 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, 3931 DAG.getConstant(0x1f, DL, MVT::i64)); 3932 unsigned Opc = 3933 N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; 3934 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); 3935 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); 3936 break; 3937 } 3938 case ISD::EXTRACT_VECTOR_ELT: { 3939 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 3940 // type is illegal (currently only vXi64 RV32). 3941 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 3942 // transferred to the destination register. We issue two of these from the 3943 // upper- and lower- halves of the SEW-bit vector element, slid down to the 3944 // first element. 3945 SDLoc DL(N); 3946 SDValue Vec = N->getOperand(0); 3947 SDValue Idx = N->getOperand(1); 3948 3949 // The vector type hasn't been legalized yet so we can't issue target 3950 // specific nodes if it needs legalization. 3951 // FIXME: We would manually legalize if it's important. 3952 if (!isTypeLegal(Vec.getValueType())) 3953 return; 3954 3955 MVT VecVT = Vec.getSimpleValueType(); 3956 3957 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 3958 VecVT.getVectorElementType() == MVT::i64 && 3959 "Unexpected EXTRACT_VECTOR_ELT legalization"); 3960 3961 // If this is a fixed vector, we need to convert it to a scalable vector. 3962 MVT ContainerVT = VecVT; 3963 if (VecVT.isFixedLengthVector()) { 3964 ContainerVT = getContainerForFixedLengthVector(VecVT); 3965 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3966 } 3967 3968 MVT XLenVT = Subtarget.getXLenVT(); 3969 3970 // Use a VL of 1 to avoid processing more elements than we need. 3971 MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); 3972 SDValue VL = DAG.getConstant(1, DL, XLenVT); 3973 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 3974 3975 // Unless the index is known to be 0, we must slide the vector down to get 3976 // the desired element into index 0. 3977 if (!isNullConstant(Idx)) { 3978 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 3979 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 3980 } 3981 3982 // Extract the lower XLEN bits of the correct vector element. 3983 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 3984 3985 // To extract the upper XLEN bits of the vector element, shift the first 3986 // element right by 32 bits and re-extract the lower XLEN bits. 3987 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 3988 DAG.getConstant(32, DL, XLenVT), VL); 3989 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, 3990 ThirtyTwoV, Mask, VL); 3991 3992 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 3993 3994 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 3995 break; 3996 } 3997 case ISD::INTRINSIC_WO_CHAIN: { 3998 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 3999 switch (IntNo) { 4000 default: 4001 llvm_unreachable( 4002 "Don't know how to custom type legalize this intrinsic!"); 4003 case Intrinsic::riscv_vmv_x_s: { 4004 EVT VT = N->getValueType(0); 4005 MVT XLenVT = Subtarget.getXLenVT(); 4006 if (VT.bitsLT(XLenVT)) { 4007 // Simple case just extract using vmv.x.s and truncate. 4008 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 4009 Subtarget.getXLenVT(), N->getOperand(1)); 4010 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 4011 return; 4012 } 4013 4014 assert(VT == MVT::i64 && !Subtarget.is64Bit() && 4015 "Unexpected custom legalization"); 4016 4017 // We need to do the move in two steps. 4018 SDValue Vec = N->getOperand(1); 4019 MVT VecVT = Vec.getSimpleValueType(); 4020 4021 // First extract the lower XLEN bits of the element. 4022 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 4023 4024 // To extract the upper XLEN bits of the vector element, shift the first 4025 // element right by 32 bits and re-extract the lower XLEN bits. 4026 SDValue VL = DAG.getConstant(1, DL, XLenVT); 4027 MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); 4028 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 4029 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, 4030 DAG.getConstant(32, DL, XLenVT), VL); 4031 SDValue LShr32 = 4032 DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL); 4033 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 4034 4035 Results.push_back( 4036 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 4037 break; 4038 } 4039 } 4040 break; 4041 } 4042 case ISD::VECREDUCE_ADD: 4043 case ISD::VECREDUCE_AND: 4044 case ISD::VECREDUCE_OR: 4045 case ISD::VECREDUCE_XOR: 4046 case ISD::VECREDUCE_SMAX: 4047 case ISD::VECREDUCE_UMAX: 4048 case ISD::VECREDUCE_SMIN: 4049 case ISD::VECREDUCE_UMIN: 4050 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG)) 4051 Results.push_back(V); 4052 break; 4053 } 4054 } 4055 4056 // A structure to hold one of the bit-manipulation patterns below. Together, a 4057 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source: 4058 // (or (and (shl x, 1), 0xAAAAAAAA), 4059 // (and (srl x, 1), 0x55555555)) 4060 struct RISCVBitmanipPat { 4061 SDValue Op; 4062 unsigned ShAmt; 4063 bool IsSHL; 4064 4065 bool formsPairWith(const RISCVBitmanipPat &Other) const { 4066 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; 4067 } 4068 }; 4069 4070 // Matches patterns of the form 4071 // (and (shl x, C2), (C1 << C2)) 4072 // (and (srl x, C2), C1) 4073 // (shl (and x, C1), C2) 4074 // (srl (and x, (C1 << C2)), C2) 4075 // Where C2 is a power of 2 and C1 has at least that many leading zeroes. 4076 // The expected masks for each shift amount are specified in BitmanipMasks where 4077 // BitmanipMasks[log2(C2)] specifies the expected C1 value. 4078 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether 4079 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible 4080 // XLen is 64. 4081 static Optional<RISCVBitmanipPat> 4082 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) { 4083 assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) && 4084 "Unexpected number of masks"); 4085 Optional<uint64_t> Mask; 4086 // Optionally consume a mask around the shift operation. 4087 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { 4088 Mask = Op.getConstantOperandVal(1); 4089 Op = Op.getOperand(0); 4090 } 4091 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) 4092 return None; 4093 bool IsSHL = Op.getOpcode() == ISD::SHL; 4094 4095 if (!isa<ConstantSDNode>(Op.getOperand(1))) 4096 return None; 4097 uint64_t ShAmt = Op.getConstantOperandVal(1); 4098 4099 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 4100 if (ShAmt >= Width && !isPowerOf2_64(ShAmt)) 4101 return None; 4102 // If we don't have enough masks for 64 bit, then we must be trying to 4103 // match SHFL so we're only allowed to shift 1/4 of the width. 4104 if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2)) 4105 return None; 4106 4107 SDValue Src = Op.getOperand(0); 4108 4109 // The expected mask is shifted left when the AND is found around SHL 4110 // patterns. 4111 // ((x >> 1) & 0x55555555) 4112 // ((x << 1) & 0xAAAAAAAA) 4113 bool SHLExpMask = IsSHL; 4114 4115 if (!Mask) { 4116 // Sometimes LLVM keeps the mask as an operand of the shift, typically when 4117 // the mask is all ones: consume that now. 4118 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { 4119 Mask = Src.getConstantOperandVal(1); 4120 Src = Src.getOperand(0); 4121 // The expected mask is now in fact shifted left for SRL, so reverse the 4122 // decision. 4123 // ((x & 0xAAAAAAAA) >> 1) 4124 // ((x & 0x55555555) << 1) 4125 SHLExpMask = !SHLExpMask; 4126 } else { 4127 // Use a default shifted mask of all-ones if there's no AND, truncated 4128 // down to the expected width. This simplifies the logic later on. 4129 Mask = maskTrailingOnes<uint64_t>(Width); 4130 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); 4131 } 4132 } 4133 4134 unsigned MaskIdx = Log2_32(ShAmt); 4135 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 4136 4137 if (SHLExpMask) 4138 ExpMask <<= ShAmt; 4139 4140 if (Mask != ExpMask) 4141 return None; 4142 4143 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; 4144 } 4145 4146 // Matches any of the following bit-manipulation patterns: 4147 // (and (shl x, 1), (0x55555555 << 1)) 4148 // (and (srl x, 1), 0x55555555) 4149 // (shl (and x, 0x55555555), 1) 4150 // (srl (and x, (0x55555555 << 1)), 1) 4151 // where the shift amount and mask may vary thus: 4152 // [1] = 0x55555555 / 0xAAAAAAAA 4153 // [2] = 0x33333333 / 0xCCCCCCCC 4154 // [4] = 0x0F0F0F0F / 0xF0F0F0F0 4155 // [8] = 0x00FF00FF / 0xFF00FF00 4156 // [16] = 0x0000FFFF / 0xFFFFFFFF 4157 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) 4158 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) { 4159 // These are the unshifted masks which we use to match bit-manipulation 4160 // patterns. They may be shifted left in certain circumstances. 4161 static const uint64_t BitmanipMasks[] = { 4162 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 4163 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; 4164 4165 return matchRISCVBitmanipPat(Op, BitmanipMasks); 4166 } 4167 4168 // Match the following pattern as a GREVI(W) operation 4169 // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) 4170 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, 4171 const RISCVSubtarget &Subtarget) { 4172 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 4173 EVT VT = Op.getValueType(); 4174 4175 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 4176 auto LHS = matchGREVIPat(Op.getOperand(0)); 4177 auto RHS = matchGREVIPat(Op.getOperand(1)); 4178 if (LHS && RHS && LHS->formsPairWith(*RHS)) { 4179 SDLoc DL(Op); 4180 return DAG.getNode( 4181 RISCVISD::GREVI, DL, VT, LHS->Op, 4182 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 4183 } 4184 } 4185 return SDValue(); 4186 } 4187 4188 // Matches any the following pattern as a GORCI(W) operation 4189 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2 4190 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2 4191 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) 4192 // Note that with the variant of 3., 4193 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) 4194 // the inner pattern will first be matched as GREVI and then the outer 4195 // pattern will be matched to GORC via the first rule above. 4196 // 4. (or (rotl/rotr x, bitwidth/2), x) 4197 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, 4198 const RISCVSubtarget &Subtarget) { 4199 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 4200 EVT VT = Op.getValueType(); 4201 4202 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 4203 SDLoc DL(Op); 4204 SDValue Op0 = Op.getOperand(0); 4205 SDValue Op1 = Op.getOperand(1); 4206 4207 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { 4208 if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X && 4209 isPowerOf2_32(Reverse.getConstantOperandVal(1))) 4210 return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1)); 4211 // We can also form GORCI from ROTL/ROTR by half the bitwidth. 4212 if ((Reverse.getOpcode() == ISD::ROTL || 4213 Reverse.getOpcode() == ISD::ROTR) && 4214 Reverse.getOperand(0) == X && 4215 isa<ConstantSDNode>(Reverse.getOperand(1))) { 4216 uint64_t RotAmt = Reverse.getConstantOperandVal(1); 4217 if (RotAmt == (VT.getSizeInBits() / 2)) 4218 return DAG.getNode( 4219 RISCVISD::GORCI, DL, VT, X, 4220 DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT())); 4221 } 4222 return SDValue(); 4223 }; 4224 4225 // Check for either commutable permutation of (or (GREVI x, shamt), x) 4226 if (SDValue V = MatchOROfReverse(Op0, Op1)) 4227 return V; 4228 if (SDValue V = MatchOROfReverse(Op1, Op0)) 4229 return V; 4230 4231 // OR is commutable so canonicalize its OR operand to the left 4232 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) 4233 std::swap(Op0, Op1); 4234 if (Op0.getOpcode() != ISD::OR) 4235 return SDValue(); 4236 SDValue OrOp0 = Op0.getOperand(0); 4237 SDValue OrOp1 = Op0.getOperand(1); 4238 auto LHS = matchGREVIPat(OrOp0); 4239 // OR is commutable so swap the operands and try again: x might have been 4240 // on the left 4241 if (!LHS) { 4242 std::swap(OrOp0, OrOp1); 4243 LHS = matchGREVIPat(OrOp0); 4244 } 4245 auto RHS = matchGREVIPat(Op1); 4246 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { 4247 return DAG.getNode( 4248 RISCVISD::GORCI, DL, VT, LHS->Op, 4249 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 4250 } 4251 } 4252 return SDValue(); 4253 } 4254 4255 // Matches any of the following bit-manipulation patterns: 4256 // (and (shl x, 1), (0x22222222 << 1)) 4257 // (and (srl x, 1), 0x22222222) 4258 // (shl (and x, 0x22222222), 1) 4259 // (srl (and x, (0x22222222 << 1)), 1) 4260 // where the shift amount and mask may vary thus: 4261 // [1] = 0x22222222 / 0x44444444 4262 // [2] = 0x0C0C0C0C / 0x3C3C3C3C 4263 // [4] = 0x00F000F0 / 0x0F000F00 4264 // [8] = 0x0000FF00 / 0x00FF0000 4265 // [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64) 4266 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) { 4267 // These are the unshifted masks which we use to match bit-manipulation 4268 // patterns. They may be shifted left in certain circumstances. 4269 static const uint64_t BitmanipMasks[] = { 4270 0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL, 4271 0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL}; 4272 4273 return matchRISCVBitmanipPat(Op, BitmanipMasks); 4274 } 4275 4276 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x) 4277 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG, 4278 const RISCVSubtarget &Subtarget) { 4279 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 4280 EVT VT = Op.getValueType(); 4281 4282 if (VT != MVT::i32 && VT != Subtarget.getXLenVT()) 4283 return SDValue(); 4284 4285 SDValue Op0 = Op.getOperand(0); 4286 SDValue Op1 = Op.getOperand(1); 4287 4288 // Or is commutable so canonicalize the second OR to the LHS. 4289 if (Op0.getOpcode() != ISD::OR) 4290 std::swap(Op0, Op1); 4291 if (Op0.getOpcode() != ISD::OR) 4292 return SDValue(); 4293 4294 // We found an inner OR, so our operands are the operands of the inner OR 4295 // and the other operand of the outer OR. 4296 SDValue A = Op0.getOperand(0); 4297 SDValue B = Op0.getOperand(1); 4298 SDValue C = Op1; 4299 4300 auto Match1 = matchSHFLPat(A); 4301 auto Match2 = matchSHFLPat(B); 4302 4303 // If neither matched, we failed. 4304 if (!Match1 && !Match2) 4305 return SDValue(); 4306 4307 // We had at least one match. if one failed, try the remaining C operand. 4308 if (!Match1) { 4309 std::swap(A, C); 4310 Match1 = matchSHFLPat(A); 4311 if (!Match1) 4312 return SDValue(); 4313 } else if (!Match2) { 4314 std::swap(B, C); 4315 Match2 = matchSHFLPat(B); 4316 if (!Match2) 4317 return SDValue(); 4318 } 4319 assert(Match1 && Match2); 4320 4321 // Make sure our matches pair up. 4322 if (!Match1->formsPairWith(*Match2)) 4323 return SDValue(); 4324 4325 // All the remains is to make sure C is an AND with the same input, that masks 4326 // out the bits that are being shuffled. 4327 if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) || 4328 C.getOperand(0) != Match1->Op) 4329 return SDValue(); 4330 4331 uint64_t Mask = C.getConstantOperandVal(1); 4332 4333 static const uint64_t BitmanipMasks[] = { 4334 0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL, 4335 0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL, 4336 }; 4337 4338 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 4339 unsigned MaskIdx = Log2_32(Match1->ShAmt); 4340 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 4341 4342 if (Mask != ExpMask) 4343 return SDValue(); 4344 4345 SDLoc DL(Op); 4346 return DAG.getNode( 4347 RISCVISD::SHFLI, DL, VT, Match1->Op, 4348 DAG.getTargetConstant(Match1->ShAmt, DL, Subtarget.getXLenVT())); 4349 } 4350 4351 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is 4352 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. 4353 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does 4354 // not undo itself, but they are redundant. 4355 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { 4356 unsigned ShAmt1 = N->getConstantOperandVal(1); 4357 SDValue Src = N->getOperand(0); 4358 4359 if (Src.getOpcode() != N->getOpcode()) 4360 return SDValue(); 4361 4362 unsigned ShAmt2 = Src.getConstantOperandVal(1); 4363 Src = Src.getOperand(0); 4364 4365 unsigned CombinedShAmt; 4366 if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW) 4367 CombinedShAmt = ShAmt1 | ShAmt2; 4368 else 4369 CombinedShAmt = ShAmt1 ^ ShAmt2; 4370 4371 if (CombinedShAmt == 0) 4372 return Src; 4373 4374 SDLoc DL(N); 4375 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src, 4376 DAG.getTargetConstant(CombinedShAmt, DL, 4377 N->getOperand(1).getValueType())); 4378 } 4379 4380 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 4381 DAGCombinerInfo &DCI) const { 4382 SelectionDAG &DAG = DCI.DAG; 4383 4384 switch (N->getOpcode()) { 4385 default: 4386 break; 4387 case RISCVISD::SplitF64: { 4388 SDValue Op0 = N->getOperand(0); 4389 // If the input to SplitF64 is just BuildPairF64 then the operation is 4390 // redundant. Instead, use BuildPairF64's operands directly. 4391 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 4392 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 4393 4394 SDLoc DL(N); 4395 4396 // It's cheaper to materialise two 32-bit integers than to load a double 4397 // from the constant pool and transfer it to integer registers through the 4398 // stack. 4399 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 4400 APInt V = C->getValueAPF().bitcastToAPInt(); 4401 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 4402 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 4403 return DCI.CombineTo(N, Lo, Hi); 4404 } 4405 4406 // This is a target-specific version of a DAGCombine performed in 4407 // DAGCombiner::visitBITCAST. It performs the equivalent of: 4408 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 4409 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 4410 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 4411 !Op0.getNode()->hasOneUse()) 4412 break; 4413 SDValue NewSplitF64 = 4414 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 4415 Op0.getOperand(0)); 4416 SDValue Lo = NewSplitF64.getValue(0); 4417 SDValue Hi = NewSplitF64.getValue(1); 4418 APInt SignBit = APInt::getSignMask(32); 4419 if (Op0.getOpcode() == ISD::FNEG) { 4420 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 4421 DAG.getConstant(SignBit, DL, MVT::i32)); 4422 return DCI.CombineTo(N, Lo, NewHi); 4423 } 4424 assert(Op0.getOpcode() == ISD::FABS); 4425 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 4426 DAG.getConstant(~SignBit, DL, MVT::i32)); 4427 return DCI.CombineTo(N, Lo, NewHi); 4428 } 4429 case RISCVISD::SLLW: 4430 case RISCVISD::SRAW: 4431 case RISCVISD::SRLW: 4432 case RISCVISD::ROLW: 4433 case RISCVISD::RORW: { 4434 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 4435 SDValue LHS = N->getOperand(0); 4436 SDValue RHS = N->getOperand(1); 4437 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 4438 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 4439 if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) || 4440 SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) { 4441 if (N->getOpcode() != ISD::DELETED_NODE) 4442 DCI.AddToWorklist(N); 4443 return SDValue(N, 0); 4444 } 4445 break; 4446 } 4447 case RISCVISD::FSL: 4448 case RISCVISD::FSR: { 4449 // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read. 4450 SDValue ShAmt = N->getOperand(2); 4451 unsigned BitWidth = ShAmt.getValueSizeInBits(); 4452 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width"); 4453 APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1); 4454 if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 4455 if (N->getOpcode() != ISD::DELETED_NODE) 4456 DCI.AddToWorklist(N); 4457 return SDValue(N, 0); 4458 } 4459 break; 4460 } 4461 case RISCVISD::FSLW: 4462 case RISCVISD::FSRW: { 4463 // Only the lower 32 bits of Values and lower 6 bits of shift amount are 4464 // read. 4465 SDValue Op0 = N->getOperand(0); 4466 SDValue Op1 = N->getOperand(1); 4467 SDValue ShAmt = N->getOperand(2); 4468 APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 4469 APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6); 4470 if (SimplifyDemandedBits(Op0, OpMask, DCI) || 4471 SimplifyDemandedBits(Op1, OpMask, DCI) || 4472 SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 4473 if (N->getOpcode() != ISD::DELETED_NODE) 4474 DCI.AddToWorklist(N); 4475 return SDValue(N, 0); 4476 } 4477 break; 4478 } 4479 case RISCVISD::GREVIW: 4480 case RISCVISD::GORCIW: { 4481 // Only the lower 32 bits of the first operand are read 4482 SDValue Op0 = N->getOperand(0); 4483 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 4484 if (SimplifyDemandedBits(Op0, Mask, DCI)) { 4485 if (N->getOpcode() != ISD::DELETED_NODE) 4486 DCI.AddToWorklist(N); 4487 return SDValue(N, 0); 4488 } 4489 4490 return combineGREVI_GORCI(N, DCI.DAG); 4491 } 4492 case RISCVISD::FMV_X_ANYEXTW_RV64: { 4493 SDLoc DL(N); 4494 SDValue Op0 = N->getOperand(0); 4495 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 4496 // conversion is unnecessary and can be replaced with an ANY_EXTEND 4497 // of the FMV_W_X_RV64 operand. 4498 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 4499 assert(Op0.getOperand(0).getValueType() == MVT::i64 && 4500 "Unexpected value type!"); 4501 return Op0.getOperand(0); 4502 } 4503 4504 // This is a target-specific version of a DAGCombine performed in 4505 // DAGCombiner::visitBITCAST. It performs the equivalent of: 4506 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 4507 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 4508 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 4509 !Op0.getNode()->hasOneUse()) 4510 break; 4511 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 4512 Op0.getOperand(0)); 4513 APInt SignBit = APInt::getSignMask(32).sext(64); 4514 if (Op0.getOpcode() == ISD::FNEG) 4515 return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 4516 DAG.getConstant(SignBit, DL, MVT::i64)); 4517 4518 assert(Op0.getOpcode() == ISD::FABS); 4519 return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 4520 DAG.getConstant(~SignBit, DL, MVT::i64)); 4521 } 4522 case RISCVISD::GREVI: 4523 case RISCVISD::GORCI: 4524 return combineGREVI_GORCI(N, DCI.DAG); 4525 case ISD::OR: 4526 if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) 4527 return GREV; 4528 if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) 4529 return GORC; 4530 if (auto SHFL = combineORToSHFL(SDValue(N, 0), DCI.DAG, Subtarget)) 4531 return SHFL; 4532 break; 4533 case RISCVISD::SELECT_CC: { 4534 // Transform 4535 SDValue LHS = N->getOperand(0); 4536 SDValue RHS = N->getOperand(1); 4537 auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2)); 4538 if (!ISD::isIntEqualitySetCC(CCVal)) 4539 break; 4540 4541 // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) -> 4542 // (select_cc X, Y, lt, trueV, falseV) 4543 // Sometimes the setcc is introduced after select_cc has been formed. 4544 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && 4545 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { 4546 // If we're looking for eq 0 instead of ne 0, we need to invert the 4547 // condition. 4548 bool Invert = CCVal == ISD::SETEQ; 4549 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); 4550 if (Invert) 4551 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4552 4553 SDLoc DL(N); 4554 RHS = LHS.getOperand(1); 4555 LHS = LHS.getOperand(0); 4556 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 4557 4558 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 4559 return DAG.getNode( 4560 RISCVISD::SELECT_CC, DL, N->getValueType(0), 4561 {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)}); 4562 } 4563 4564 // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) -> 4565 // (select_cc X, Y, eq/ne, trueV, falseV) 4566 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) 4567 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0), 4568 {LHS.getOperand(0), LHS.getOperand(1), 4569 N->getOperand(2), N->getOperand(3), 4570 N->getOperand(4)}); 4571 // (select_cc X, 1, setne, trueV, falseV) -> 4572 // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. 4573 // This can occur when legalizing some floating point comparisons. 4574 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 4575 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { 4576 SDLoc DL(N); 4577 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4578 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 4579 RHS = DAG.getConstant(0, DL, LHS.getValueType()); 4580 return DAG.getNode( 4581 RISCVISD::SELECT_CC, DL, N->getValueType(0), 4582 {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)}); 4583 } 4584 4585 break; 4586 } 4587 case RISCVISD::BR_CC: { 4588 SDValue LHS = N->getOperand(1); 4589 SDValue RHS = N->getOperand(2); 4590 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get(); 4591 if (!ISD::isIntEqualitySetCC(CCVal)) 4592 break; 4593 4594 // Fold (br_cc (setlt X, Y), 0, ne, dest) -> 4595 // (br_cc X, Y, lt, dest) 4596 // Sometimes the setcc is introduced after br_cc has been formed. 4597 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && 4598 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { 4599 // If we're looking for eq 0 instead of ne 0, we need to invert the 4600 // condition. 4601 bool Invert = CCVal == ISD::SETEQ; 4602 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); 4603 if (Invert) 4604 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4605 4606 SDLoc DL(N); 4607 RHS = LHS.getOperand(1); 4608 LHS = LHS.getOperand(0); 4609 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 4610 4611 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), 4612 N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal), 4613 N->getOperand(4)); 4614 } 4615 4616 // Fold (br_cc (xor X, Y), 0, eq/ne, dest) -> 4617 // (br_cc X, Y, eq/ne, trueV, falseV) 4618 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) 4619 return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0), 4620 N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1), 4621 N->getOperand(3), N->getOperand(4)); 4622 4623 // (br_cc X, 1, setne, br_cc) -> 4624 // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1. 4625 // This can occur when legalizing some floating point comparisons. 4626 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 4627 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { 4628 SDLoc DL(N); 4629 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4630 SDValue TargetCC = DAG.getCondCode(CCVal); 4631 RHS = DAG.getConstant(0, DL, LHS.getValueType()); 4632 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), 4633 N->getOperand(0), LHS, RHS, TargetCC, 4634 N->getOperand(4)); 4635 } 4636 break; 4637 } 4638 case ISD::FCOPYSIGN: { 4639 EVT VT = N->getValueType(0); 4640 if (!VT.isVector()) 4641 break; 4642 // There is a form of VFSGNJ which injects the negated sign of its second 4643 // operand. Try and bubble any FNEG up after the extend/round to produce 4644 // this optimized pattern. Avoid modifying cases where FP_ROUND and 4645 // TRUNC=1. 4646 SDValue In2 = N->getOperand(1); 4647 // Avoid cases where the extend/round has multiple uses, as duplicating 4648 // those is typically more expensive than removing a fneg. 4649 if (!In2.hasOneUse()) 4650 break; 4651 if (In2.getOpcode() != ISD::FP_EXTEND && 4652 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0)) 4653 break; 4654 In2 = In2.getOperand(0); 4655 if (In2.getOpcode() != ISD::FNEG) 4656 break; 4657 SDLoc DL(N); 4658 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT); 4659 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), 4660 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); 4661 } 4662 case ISD::MGATHER: 4663 case ISD::MSCATTER: { 4664 if (!DCI.isBeforeLegalize()) 4665 break; 4666 MaskedGatherScatterSDNode *MGSN = cast<MaskedGatherScatterSDNode>(N); 4667 SDValue Index = MGSN->getIndex(); 4668 EVT IndexVT = Index.getValueType(); 4669 MVT XLenVT = Subtarget.getXLenVT(); 4670 // RISCV indexed loads only support the "unsigned unscaled" addressing 4671 // mode, so anything else must be manually legalized. 4672 bool NeedsIdxLegalization = MGSN->isIndexScaled() || 4673 (MGSN->isIndexSigned() && 4674 IndexVT.getVectorElementType().bitsLT(XLenVT)); 4675 if (!NeedsIdxLegalization) 4676 break; 4677 4678 SDLoc DL(N); 4679 4680 // Any index legalization should first promote to XLenVT, so we don't lose 4681 // bits when scaling. This may create an illegal index type so we let 4682 // LLVM's legalization take care of the splitting. 4683 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) { 4684 IndexVT = IndexVT.changeVectorElementType(XLenVT); 4685 Index = DAG.getNode(MGSN->isIndexSigned() ? ISD::SIGN_EXTEND 4686 : ISD::ZERO_EXTEND, 4687 DL, IndexVT, Index); 4688 } 4689 4690 unsigned Scale = N->getConstantOperandVal(5); 4691 if (MGSN->isIndexScaled() && Scale != 1) { 4692 // Manually scale the indices by the element size. 4693 // TODO: Sanitize the scale operand here? 4694 assert(isPowerOf2_32(Scale) && "Expecting power-of-two types"); 4695 SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT); 4696 Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale); 4697 } 4698 4699 ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED; 4700 if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N)) { 4701 return DAG.getMaskedGather( 4702 N->getVTList(), MGSN->getMemoryVT(), DL, 4703 {MGSN->getChain(), MGN->getPassThru(), MGSN->getMask(), 4704 MGSN->getBasePtr(), Index, MGN->getScale()}, 4705 MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType()); 4706 } 4707 const auto *MSN = cast<MaskedScatterSDNode>(N); 4708 return DAG.getMaskedScatter( 4709 N->getVTList(), MGSN->getMemoryVT(), DL, 4710 {MGSN->getChain(), MSN->getValue(), MGSN->getMask(), MGSN->getBasePtr(), 4711 Index, MGSN->getScale()}, 4712 MGSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore()); 4713 } 4714 } 4715 4716 return SDValue(); 4717 } 4718 4719 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 4720 const SDNode *N, CombineLevel Level) const { 4721 // The following folds are only desirable if `(OP _, c1 << c2)` can be 4722 // materialised in fewer instructions than `(OP _, c1)`: 4723 // 4724 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 4725 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 4726 SDValue N0 = N->getOperand(0); 4727 EVT Ty = N0.getValueType(); 4728 if (Ty.isScalarInteger() && 4729 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 4730 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 4731 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 4732 if (C1 && C2) { 4733 const APInt &C1Int = C1->getAPIntValue(); 4734 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 4735 4736 // We can materialise `c1 << c2` into an add immediate, so it's "free", 4737 // and the combine should happen, to potentially allow further combines 4738 // later. 4739 if (ShiftedC1Int.getMinSignedBits() <= 64 && 4740 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 4741 return true; 4742 4743 // We can materialise `c1` in an add immediate, so it's "free", and the 4744 // combine should be prevented. 4745 if (C1Int.getMinSignedBits() <= 64 && 4746 isLegalAddImmediate(C1Int.getSExtValue())) 4747 return false; 4748 4749 // Neither constant will fit into an immediate, so find materialisation 4750 // costs. 4751 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 4752 Subtarget.is64Bit()); 4753 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 4754 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); 4755 4756 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 4757 // combine should be prevented. 4758 if (C1Cost < ShiftedC1Cost) 4759 return false; 4760 } 4761 } 4762 return true; 4763 } 4764 4765 bool RISCVTargetLowering::targetShrinkDemandedConstant( 4766 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 4767 TargetLoweringOpt &TLO) const { 4768 // Delay this optimization as late as possible. 4769 if (!TLO.LegalOps) 4770 return false; 4771 4772 EVT VT = Op.getValueType(); 4773 if (VT.isVector()) 4774 return false; 4775 4776 // Only handle AND for now. 4777 if (Op.getOpcode() != ISD::AND) 4778 return false; 4779 4780 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 4781 if (!C) 4782 return false; 4783 4784 const APInt &Mask = C->getAPIntValue(); 4785 4786 // Clear all non-demanded bits initially. 4787 APInt ShrunkMask = Mask & DemandedBits; 4788 4789 // If the shrunk mask fits in sign extended 12 bits, let the target 4790 // independent code apply it. 4791 if (ShrunkMask.isSignedIntN(12)) 4792 return false; 4793 4794 // Try to make a smaller immediate by setting undemanded bits. 4795 4796 // We need to be able to make a negative number through a combination of mask 4797 // and undemanded bits. 4798 APInt ExpandedMask = Mask | ~DemandedBits; 4799 if (!ExpandedMask.isNegative()) 4800 return false; 4801 4802 // What is the fewest number of bits we need to represent the negative number. 4803 unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); 4804 4805 // Try to make a 12 bit negative immediate. If that fails try to make a 32 4806 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 4807 APInt NewMask = ShrunkMask; 4808 if (MinSignedBits <= 12) 4809 NewMask.setBitsFrom(11); 4810 else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 4811 NewMask.setBitsFrom(31); 4812 else 4813 return false; 4814 4815 // Sanity check that our new mask is a subset of the demanded mask. 4816 assert(NewMask.isSubsetOf(ExpandedMask)); 4817 4818 // If we aren't changing the mask, just return true to keep it and prevent 4819 // the caller from optimizing. 4820 if (NewMask == Mask) 4821 return true; 4822 4823 // Replace the constant with the new mask. 4824 SDLoc DL(Op); 4825 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); 4826 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); 4827 return TLO.CombineTo(Op, NewOp); 4828 } 4829 4830 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 4831 KnownBits &Known, 4832 const APInt &DemandedElts, 4833 const SelectionDAG &DAG, 4834 unsigned Depth) const { 4835 unsigned BitWidth = Known.getBitWidth(); 4836 unsigned Opc = Op.getOpcode(); 4837 assert((Opc >= ISD::BUILTIN_OP_END || 4838 Opc == ISD::INTRINSIC_WO_CHAIN || 4839 Opc == ISD::INTRINSIC_W_CHAIN || 4840 Opc == ISD::INTRINSIC_VOID) && 4841 "Should use MaskedValueIsZero if you don't know whether Op" 4842 " is a target node!"); 4843 4844 Known.resetAll(); 4845 switch (Opc) { 4846 default: break; 4847 case RISCVISD::SELECT_CC: { 4848 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1); 4849 // If we don't know any bits, early out. 4850 if (Known.isUnknown()) 4851 break; 4852 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1); 4853 4854 // Only known if known in both the LHS and RHS. 4855 Known = KnownBits::commonBits(Known, Known2); 4856 break; 4857 } 4858 case RISCVISD::REMUW: { 4859 KnownBits Known2; 4860 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4861 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4862 // We only care about the lower 32 bits. 4863 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 4864 // Restore the original width by sign extending. 4865 Known = Known.sext(BitWidth); 4866 break; 4867 } 4868 case RISCVISD::DIVUW: { 4869 KnownBits Known2; 4870 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4871 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4872 // We only care about the lower 32 bits. 4873 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 4874 // Restore the original width by sign extending. 4875 Known = Known.sext(BitWidth); 4876 break; 4877 } 4878 case RISCVISD::READ_VLENB: 4879 // We assume VLENB is at least 8 bytes. 4880 // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits. 4881 Known.Zero.setLowBits(3); 4882 break; 4883 } 4884 } 4885 4886 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 4887 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 4888 unsigned Depth) const { 4889 switch (Op.getOpcode()) { 4890 default: 4891 break; 4892 case RISCVISD::SLLW: 4893 case RISCVISD::SRAW: 4894 case RISCVISD::SRLW: 4895 case RISCVISD::DIVW: 4896 case RISCVISD::DIVUW: 4897 case RISCVISD::REMUW: 4898 case RISCVISD::ROLW: 4899 case RISCVISD::RORW: 4900 case RISCVISD::GREVIW: 4901 case RISCVISD::GORCIW: 4902 case RISCVISD::FSLW: 4903 case RISCVISD::FSRW: 4904 // TODO: As the result is sign-extended, this is conservatively correct. A 4905 // more precise answer could be calculated for SRAW depending on known 4906 // bits in the shift amount. 4907 return 33; 4908 case RISCVISD::SHFLI: { 4909 // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word 4910 // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but 4911 // will stay within the upper 32 bits. If there were more than 32 sign bits 4912 // before there will be at least 33 sign bits after. 4913 if (Op.getValueType() == MVT::i64 && 4914 (Op.getConstantOperandVal(1) & 0x10) == 0) { 4915 unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); 4916 if (Tmp > 32) 4917 return 33; 4918 } 4919 break; 4920 } 4921 case RISCVISD::VMV_X_S: 4922 // The number of sign bits of the scalar result is computed by obtaining the 4923 // element type of the input vector operand, subtracting its width from the 4924 // XLEN, and then adding one (sign bit within the element type). If the 4925 // element type is wider than XLen, the least-significant XLEN bits are 4926 // taken. 4927 if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen()) 4928 return 1; 4929 return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; 4930 } 4931 4932 return 1; 4933 } 4934 4935 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 4936 MachineBasicBlock *BB) { 4937 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 4938 4939 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 4940 // Should the count have wrapped while it was being read, we need to try 4941 // again. 4942 // ... 4943 // read: 4944 // rdcycleh x3 # load high word of cycle 4945 // rdcycle x2 # load low word of cycle 4946 // rdcycleh x4 # load high word of cycle 4947 // bne x3, x4, read # check if high word reads match, otherwise try again 4948 // ... 4949 4950 MachineFunction &MF = *BB->getParent(); 4951 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4952 MachineFunction::iterator It = ++BB->getIterator(); 4953 4954 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 4955 MF.insert(It, LoopMBB); 4956 4957 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 4958 MF.insert(It, DoneMBB); 4959 4960 // Transfer the remainder of BB and its successor edges to DoneMBB. 4961 DoneMBB->splice(DoneMBB->begin(), BB, 4962 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 4963 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 4964 4965 BB->addSuccessor(LoopMBB); 4966 4967 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4968 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 4969 Register LoReg = MI.getOperand(0).getReg(); 4970 Register HiReg = MI.getOperand(1).getReg(); 4971 DebugLoc DL = MI.getDebugLoc(); 4972 4973 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 4974 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 4975 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 4976 .addReg(RISCV::X0); 4977 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 4978 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 4979 .addReg(RISCV::X0); 4980 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 4981 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 4982 .addReg(RISCV::X0); 4983 4984 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 4985 .addReg(HiReg) 4986 .addReg(ReadAgainReg) 4987 .addMBB(LoopMBB); 4988 4989 LoopMBB->addSuccessor(LoopMBB); 4990 LoopMBB->addSuccessor(DoneMBB); 4991 4992 MI.eraseFromParent(); 4993 4994 return DoneMBB; 4995 } 4996 4997 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 4998 MachineBasicBlock *BB) { 4999 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 5000 5001 MachineFunction &MF = *BB->getParent(); 5002 DebugLoc DL = MI.getDebugLoc(); 5003 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 5004 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 5005 Register LoReg = MI.getOperand(0).getReg(); 5006 Register HiReg = MI.getOperand(1).getReg(); 5007 Register SrcReg = MI.getOperand(2).getReg(); 5008 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 5009 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 5010 5011 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 5012 RI); 5013 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 5014 MachineMemOperand *MMOLo = 5015 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 5016 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 5017 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 5018 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 5019 .addFrameIndex(FI) 5020 .addImm(0) 5021 .addMemOperand(MMOLo); 5022 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 5023 .addFrameIndex(FI) 5024 .addImm(4) 5025 .addMemOperand(MMOHi); 5026 MI.eraseFromParent(); // The pseudo instruction is gone now. 5027 return BB; 5028 } 5029 5030 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 5031 MachineBasicBlock *BB) { 5032 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 5033 "Unexpected instruction"); 5034 5035 MachineFunction &MF = *BB->getParent(); 5036 DebugLoc DL = MI.getDebugLoc(); 5037 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 5038 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 5039 Register DstReg = MI.getOperand(0).getReg(); 5040 Register LoReg = MI.getOperand(1).getReg(); 5041 Register HiReg = MI.getOperand(2).getReg(); 5042 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 5043 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 5044 5045 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 5046 MachineMemOperand *MMOLo = 5047 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 5048 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 5049 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 5050 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 5051 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 5052 .addFrameIndex(FI) 5053 .addImm(0) 5054 .addMemOperand(MMOLo); 5055 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 5056 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 5057 .addFrameIndex(FI) 5058 .addImm(4) 5059 .addMemOperand(MMOHi); 5060 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 5061 MI.eraseFromParent(); // The pseudo instruction is gone now. 5062 return BB; 5063 } 5064 5065 static bool isSelectPseudo(MachineInstr &MI) { 5066 switch (MI.getOpcode()) { 5067 default: 5068 return false; 5069 case RISCV::Select_GPR_Using_CC_GPR: 5070 case RISCV::Select_FPR16_Using_CC_GPR: 5071 case RISCV::Select_FPR32_Using_CC_GPR: 5072 case RISCV::Select_FPR64_Using_CC_GPR: 5073 return true; 5074 } 5075 } 5076 5077 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 5078 MachineBasicBlock *BB) { 5079 // To "insert" Select_* instructions, we actually have to insert the triangle 5080 // control-flow pattern. The incoming instructions know the destination vreg 5081 // to set, the condition code register to branch on, the true/false values to 5082 // select between, and the condcode to use to select the appropriate branch. 5083 // 5084 // We produce the following control flow: 5085 // HeadMBB 5086 // | \ 5087 // | IfFalseMBB 5088 // | / 5089 // TailMBB 5090 // 5091 // When we find a sequence of selects we attempt to optimize their emission 5092 // by sharing the control flow. Currently we only handle cases where we have 5093 // multiple selects with the exact same condition (same LHS, RHS and CC). 5094 // The selects may be interleaved with other instructions if the other 5095 // instructions meet some requirements we deem safe: 5096 // - They are debug instructions. Otherwise, 5097 // - They do not have side-effects, do not access memory and their inputs do 5098 // not depend on the results of the select pseudo-instructions. 5099 // The TrueV/FalseV operands of the selects cannot depend on the result of 5100 // previous selects in the sequence. 5101 // These conditions could be further relaxed. See the X86 target for a 5102 // related approach and more information. 5103 Register LHS = MI.getOperand(1).getReg(); 5104 Register RHS = MI.getOperand(2).getReg(); 5105 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 5106 5107 SmallVector<MachineInstr *, 4> SelectDebugValues; 5108 SmallSet<Register, 4> SelectDests; 5109 SelectDests.insert(MI.getOperand(0).getReg()); 5110 5111 MachineInstr *LastSelectPseudo = &MI; 5112 5113 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 5114 SequenceMBBI != E; ++SequenceMBBI) { 5115 if (SequenceMBBI->isDebugInstr()) 5116 continue; 5117 else if (isSelectPseudo(*SequenceMBBI)) { 5118 if (SequenceMBBI->getOperand(1).getReg() != LHS || 5119 SequenceMBBI->getOperand(2).getReg() != RHS || 5120 SequenceMBBI->getOperand(3).getImm() != CC || 5121 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 5122 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 5123 break; 5124 LastSelectPseudo = &*SequenceMBBI; 5125 SequenceMBBI->collectDebugValues(SelectDebugValues); 5126 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 5127 } else { 5128 if (SequenceMBBI->hasUnmodeledSideEffects() || 5129 SequenceMBBI->mayLoadOrStore()) 5130 break; 5131 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 5132 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 5133 })) 5134 break; 5135 } 5136 } 5137 5138 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 5139 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5140 DebugLoc DL = MI.getDebugLoc(); 5141 MachineFunction::iterator I = ++BB->getIterator(); 5142 5143 MachineBasicBlock *HeadMBB = BB; 5144 MachineFunction *F = BB->getParent(); 5145 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 5146 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 5147 5148 F->insert(I, IfFalseMBB); 5149 F->insert(I, TailMBB); 5150 5151 // Transfer debug instructions associated with the selects to TailMBB. 5152 for (MachineInstr *DebugInstr : SelectDebugValues) { 5153 TailMBB->push_back(DebugInstr->removeFromParent()); 5154 } 5155 5156 // Move all instructions after the sequence to TailMBB. 5157 TailMBB->splice(TailMBB->end(), HeadMBB, 5158 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 5159 // Update machine-CFG edges by transferring all successors of the current 5160 // block to the new block which will contain the Phi nodes for the selects. 5161 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 5162 // Set the successors for HeadMBB. 5163 HeadMBB->addSuccessor(IfFalseMBB); 5164 HeadMBB->addSuccessor(TailMBB); 5165 5166 // Insert appropriate branch. 5167 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 5168 5169 BuildMI(HeadMBB, DL, TII.get(Opcode)) 5170 .addReg(LHS) 5171 .addReg(RHS) 5172 .addMBB(TailMBB); 5173 5174 // IfFalseMBB just falls through to TailMBB. 5175 IfFalseMBB->addSuccessor(TailMBB); 5176 5177 // Create PHIs for all of the select pseudo-instructions. 5178 auto SelectMBBI = MI.getIterator(); 5179 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 5180 auto InsertionPoint = TailMBB->begin(); 5181 while (SelectMBBI != SelectEnd) { 5182 auto Next = std::next(SelectMBBI); 5183 if (isSelectPseudo(*SelectMBBI)) { 5184 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 5185 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 5186 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 5187 .addReg(SelectMBBI->getOperand(4).getReg()) 5188 .addMBB(HeadMBB) 5189 .addReg(SelectMBBI->getOperand(5).getReg()) 5190 .addMBB(IfFalseMBB); 5191 SelectMBBI->eraseFromParent(); 5192 } 5193 SelectMBBI = Next; 5194 } 5195 5196 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 5197 return TailMBB; 5198 } 5199 5200 static MachineInstr *elideCopies(MachineInstr *MI, 5201 const MachineRegisterInfo &MRI) { 5202 while (true) { 5203 if (!MI->isFullCopy()) 5204 return MI; 5205 if (!Register::isVirtualRegister(MI->getOperand(1).getReg())) 5206 return nullptr; 5207 MI = MRI.getVRegDef(MI->getOperand(1).getReg()); 5208 if (!MI) 5209 return nullptr; 5210 } 5211 } 5212 5213 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, 5214 int VLIndex, unsigned SEWIndex, 5215 RISCVVLMUL VLMul, bool ForceTailAgnostic) { 5216 MachineFunction &MF = *BB->getParent(); 5217 DebugLoc DL = MI.getDebugLoc(); 5218 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 5219 5220 unsigned SEW = MI.getOperand(SEWIndex).getImm(); 5221 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 5222 RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8)); 5223 5224 MachineRegisterInfo &MRI = MF.getRegInfo(); 5225 5226 auto BuildVSETVLI = [&]() { 5227 if (VLIndex >= 0) { 5228 Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 5229 Register VLReg = MI.getOperand(VLIndex).getReg(); 5230 5231 // VL might be a compile time constant, but isel would have to put it 5232 // in a register. See if VL comes from an ADDI X0, imm. 5233 if (VLReg.isVirtual()) { 5234 MachineInstr *Def = MRI.getVRegDef(VLReg); 5235 if (Def && Def->getOpcode() == RISCV::ADDI && 5236 Def->getOperand(1).getReg() == RISCV::X0 && 5237 Def->getOperand(2).isImm()) { 5238 uint64_t Imm = Def->getOperand(2).getImm(); 5239 // VSETIVLI allows a 5-bit zero extended immediate. 5240 if (isUInt<5>(Imm)) 5241 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI)) 5242 .addReg(DestReg, RegState::Define | RegState::Dead) 5243 .addImm(Imm); 5244 } 5245 } 5246 5247 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) 5248 .addReg(DestReg, RegState::Define | RegState::Dead) 5249 .addReg(VLReg); 5250 } 5251 5252 // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). 5253 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) 5254 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 5255 .addReg(RISCV::X0, RegState::Kill); 5256 }; 5257 5258 MachineInstrBuilder MIB = BuildVSETVLI(); 5259 5260 // Default to tail agnostic unless the destination is tied to a source. In 5261 // that case the user would have some control over the tail values. The tail 5262 // policy is also ignored on instructions that only update element 0 like 5263 // vmv.s.x or reductions so use agnostic there to match the common case. 5264 // FIXME: This is conservatively correct, but we might want to detect that 5265 // the input is undefined. 5266 bool TailAgnostic = true; 5267 unsigned UseOpIdx; 5268 if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 5269 TailAgnostic = false; 5270 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 5271 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 5272 MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); 5273 if (UseMI) { 5274 UseMI = elideCopies(UseMI, MRI); 5275 if (UseMI && UseMI->isImplicitDef()) 5276 TailAgnostic = true; 5277 } 5278 } 5279 5280 // For simplicity we reuse the vtype representation here. 5281 MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth, 5282 /*TailAgnostic*/ TailAgnostic, 5283 /*MaskAgnostic*/ false)); 5284 5285 // Remove (now) redundant operands from pseudo 5286 if (VLIndex >= 0) { 5287 MI.getOperand(VLIndex).setReg(RISCV::NoRegister); 5288 MI.getOperand(VLIndex).setIsKill(false); 5289 } 5290 5291 return BB; 5292 } 5293 5294 MachineBasicBlock * 5295 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 5296 MachineBasicBlock *BB) const { 5297 uint64_t TSFlags = MI.getDesc().TSFlags; 5298 5299 if (TSFlags & RISCVII::HasSEWOpMask) { 5300 unsigned NumOperands = MI.getNumExplicitOperands(); 5301 int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1; 5302 unsigned SEWIndex = NumOperands - 1; 5303 bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask; 5304 5305 RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >> 5306 RISCVII::VLMulShift); 5307 return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic); 5308 } 5309 5310 switch (MI.getOpcode()) { 5311 default: 5312 llvm_unreachable("Unexpected instr type to insert"); 5313 case RISCV::ReadCycleWide: 5314 assert(!Subtarget.is64Bit() && 5315 "ReadCycleWrite is only to be used on riscv32"); 5316 return emitReadCycleWidePseudo(MI, BB); 5317 case RISCV::Select_GPR_Using_CC_GPR: 5318 case RISCV::Select_FPR16_Using_CC_GPR: 5319 case RISCV::Select_FPR32_Using_CC_GPR: 5320 case RISCV::Select_FPR64_Using_CC_GPR: 5321 return emitSelectPseudo(MI, BB); 5322 case RISCV::BuildPairF64Pseudo: 5323 return emitBuildPairF64Pseudo(MI, BB); 5324 case RISCV::SplitF64Pseudo: 5325 return emitSplitF64Pseudo(MI, BB); 5326 } 5327 } 5328 5329 // Calling Convention Implementation. 5330 // The expectations for frontend ABI lowering vary from target to target. 5331 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 5332 // details, but this is a longer term goal. For now, we simply try to keep the 5333 // role of the frontend as simple and well-defined as possible. The rules can 5334 // be summarised as: 5335 // * Never split up large scalar arguments. We handle them here. 5336 // * If a hardfloat calling convention is being used, and the struct may be 5337 // passed in a pair of registers (fp+fp, int+fp), and both registers are 5338 // available, then pass as two separate arguments. If either the GPRs or FPRs 5339 // are exhausted, then pass according to the rule below. 5340 // * If a struct could never be passed in registers or directly in a stack 5341 // slot (as it is larger than 2*XLEN and the floating point rules don't 5342 // apply), then pass it using a pointer with the byval attribute. 5343 // * If a struct is less than 2*XLEN, then coerce to either a two-element 5344 // word-sized array or a 2*XLEN scalar (depending on alignment). 5345 // * The frontend can determine whether a struct is returned by reference or 5346 // not based on its size and fields. If it will be returned by reference, the 5347 // frontend must modify the prototype so a pointer with the sret annotation is 5348 // passed as the first argument. This is not necessary for large scalar 5349 // returns. 5350 // * Struct return values and varargs should be coerced to structs containing 5351 // register-size fields in the same situations they would be for fixed 5352 // arguments. 5353 5354 static const MCPhysReg ArgGPRs[] = { 5355 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 5356 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 5357 }; 5358 static const MCPhysReg ArgFPR16s[] = { 5359 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 5360 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 5361 }; 5362 static const MCPhysReg ArgFPR32s[] = { 5363 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 5364 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 5365 }; 5366 static const MCPhysReg ArgFPR64s[] = { 5367 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 5368 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 5369 }; 5370 // This is an interim calling convention and it may be changed in the future. 5371 static const MCPhysReg ArgVRs[] = { 5372 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 5373 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 5374 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 5375 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 5376 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 5377 RISCV::V20M2, RISCV::V22M2}; 5378 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 5379 RISCV::V20M4}; 5380 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 5381 5382 // Pass a 2*XLEN argument that has been split into two XLEN values through 5383 // registers or the stack as necessary. 5384 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 5385 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 5386 MVT ValVT2, MVT LocVT2, 5387 ISD::ArgFlagsTy ArgFlags2) { 5388 unsigned XLenInBytes = XLen / 8; 5389 if (Register Reg = State.AllocateReg(ArgGPRs)) { 5390 // At least one half can be passed via register. 5391 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 5392 VA1.getLocVT(), CCValAssign::Full)); 5393 } else { 5394 // Both halves must be passed on the stack, with proper alignment. 5395 Align StackAlign = 5396 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 5397 State.addLoc( 5398 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 5399 State.AllocateStack(XLenInBytes, StackAlign), 5400 VA1.getLocVT(), CCValAssign::Full)); 5401 State.addLoc(CCValAssign::getMem( 5402 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 5403 LocVT2, CCValAssign::Full)); 5404 return false; 5405 } 5406 5407 if (Register Reg = State.AllocateReg(ArgGPRs)) { 5408 // The second half can also be passed via register. 5409 State.addLoc( 5410 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 5411 } else { 5412 // The second half is passed via the stack, without additional alignment. 5413 State.addLoc(CCValAssign::getMem( 5414 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 5415 LocVT2, CCValAssign::Full)); 5416 } 5417 5418 return false; 5419 } 5420 5421 // Implements the RISC-V calling convention. Returns true upon failure. 5422 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 5423 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 5424 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 5425 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 5426 Optional<unsigned> FirstMaskArgument) { 5427 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 5428 assert(XLen == 32 || XLen == 64); 5429 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 5430 5431 // Any return value split in to more than two values can't be returned 5432 // directly. Vectors are returned via the available vector registers. 5433 if (!LocVT.isVector() && IsRet && ValNo > 1) 5434 return true; 5435 5436 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 5437 // variadic argument, or if no F16/F32 argument registers are available. 5438 bool UseGPRForF16_F32 = true; 5439 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 5440 // variadic argument, or if no F64 argument registers are available. 5441 bool UseGPRForF64 = true; 5442 5443 switch (ABI) { 5444 default: 5445 llvm_unreachable("Unexpected ABI"); 5446 case RISCVABI::ABI_ILP32: 5447 case RISCVABI::ABI_LP64: 5448 break; 5449 case RISCVABI::ABI_ILP32F: 5450 case RISCVABI::ABI_LP64F: 5451 UseGPRForF16_F32 = !IsFixed; 5452 break; 5453 case RISCVABI::ABI_ILP32D: 5454 case RISCVABI::ABI_LP64D: 5455 UseGPRForF16_F32 = !IsFixed; 5456 UseGPRForF64 = !IsFixed; 5457 break; 5458 } 5459 5460 // FPR16, FPR32, and FPR64 alias each other. 5461 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { 5462 UseGPRForF16_F32 = true; 5463 UseGPRForF64 = true; 5464 } 5465 5466 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 5467 // similar local variables rather than directly checking against the target 5468 // ABI. 5469 5470 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { 5471 LocVT = XLenVT; 5472 LocInfo = CCValAssign::BCvt; 5473 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 5474 LocVT = MVT::i64; 5475 LocInfo = CCValAssign::BCvt; 5476 } 5477 5478 // If this is a variadic argument, the RISC-V calling convention requires 5479 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 5480 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 5481 // be used regardless of whether the original argument was split during 5482 // legalisation or not. The argument will not be passed by registers if the 5483 // original type is larger than 2*XLEN, so the register alignment rule does 5484 // not apply. 5485 unsigned TwoXLenInBytes = (2 * XLen) / 8; 5486 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 5487 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 5488 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 5489 // Skip 'odd' register if necessary. 5490 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 5491 State.AllocateReg(ArgGPRs); 5492 } 5493 5494 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 5495 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 5496 State.getPendingArgFlags(); 5497 5498 assert(PendingLocs.size() == PendingArgFlags.size() && 5499 "PendingLocs and PendingArgFlags out of sync"); 5500 5501 // Handle passing f64 on RV32D with a soft float ABI or when floating point 5502 // registers are exhausted. 5503 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 5504 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 5505 "Can't lower f64 if it is split"); 5506 // Depending on available argument GPRS, f64 may be passed in a pair of 5507 // GPRs, split between a GPR and the stack, or passed completely on the 5508 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 5509 // cases. 5510 Register Reg = State.AllocateReg(ArgGPRs); 5511 LocVT = MVT::i32; 5512 if (!Reg) { 5513 unsigned StackOffset = State.AllocateStack(8, Align(8)); 5514 State.addLoc( 5515 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 5516 return false; 5517 } 5518 if (!State.AllocateReg(ArgGPRs)) 5519 State.AllocateStack(4, Align(4)); 5520 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5521 return false; 5522 } 5523 5524 // Fixed-length vectors are located in the corresponding scalable-vector 5525 // container types. 5526 if (ValVT.isFixedLengthVector()) 5527 LocVT = TLI.getContainerForFixedLengthVector(LocVT); 5528 5529 // Split arguments might be passed indirectly, so keep track of the pending 5530 // values. Split vectors are passed via a mix of registers and indirectly, so 5531 // treat them as we would any other argument. 5532 if (!LocVT.isVector() && (ArgFlags.isSplit() || !PendingLocs.empty())) { 5533 LocVT = XLenVT; 5534 LocInfo = CCValAssign::Indirect; 5535 PendingLocs.push_back( 5536 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 5537 PendingArgFlags.push_back(ArgFlags); 5538 if (!ArgFlags.isSplitEnd()) { 5539 return false; 5540 } 5541 } 5542 5543 // If the split argument only had two elements, it should be passed directly 5544 // in registers or on the stack. 5545 if (!LocVT.isVector() && ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 5546 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 5547 // Apply the normal calling convention rules to the first half of the 5548 // split argument. 5549 CCValAssign VA = PendingLocs[0]; 5550 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 5551 PendingLocs.clear(); 5552 PendingArgFlags.clear(); 5553 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 5554 ArgFlags); 5555 } 5556 5557 // Allocate to a register if possible, or else a stack slot. 5558 Register Reg; 5559 if (ValVT == MVT::f16 && !UseGPRForF16_F32) 5560 Reg = State.AllocateReg(ArgFPR16s); 5561 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 5562 Reg = State.AllocateReg(ArgFPR32s); 5563 else if (ValVT == MVT::f64 && !UseGPRForF64) 5564 Reg = State.AllocateReg(ArgFPR64s); 5565 else if (ValVT.isVector()) { 5566 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 5567 if (RC == &RISCV::VRRegClass) { 5568 // Assign the first mask argument to V0. 5569 // This is an interim calling convention and it may be changed in the 5570 // future. 5571 if (FirstMaskArgument.hasValue() && 5572 ValNo == FirstMaskArgument.getValue()) { 5573 Reg = State.AllocateReg(RISCV::V0); 5574 } else { 5575 Reg = State.AllocateReg(ArgVRs); 5576 } 5577 } else if (RC == &RISCV::VRM2RegClass) { 5578 Reg = State.AllocateReg(ArgVRM2s); 5579 } else if (RC == &RISCV::VRM4RegClass) { 5580 Reg = State.AllocateReg(ArgVRM4s); 5581 } else if (RC == &RISCV::VRM8RegClass) { 5582 Reg = State.AllocateReg(ArgVRM8s); 5583 } else { 5584 llvm_unreachable("Unhandled class register for ValueType"); 5585 } 5586 if (!Reg) { 5587 // For return values, the vector must be passed fully via registers or 5588 // via the stack. 5589 // FIXME: The proposed vector ABI only mandates v8-v15 for return values, 5590 // but we're using all of them. 5591 if (IsRet) 5592 return true; 5593 LocInfo = CCValAssign::Indirect; 5594 // Try using a GPR to pass the address 5595 Reg = State.AllocateReg(ArgGPRs); 5596 LocVT = XLenVT; 5597 } 5598 } else 5599 Reg = State.AllocateReg(ArgGPRs); 5600 unsigned StackOffset = 5601 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); 5602 5603 // If we reach this point and PendingLocs is non-empty, we must be at the 5604 // end of a split argument that must be passed indirectly. 5605 if (!PendingLocs.empty()) { 5606 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 5607 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 5608 5609 for (auto &It : PendingLocs) { 5610 if (Reg) 5611 It.convertToReg(Reg); 5612 else 5613 It.convertToMem(StackOffset); 5614 State.addLoc(It); 5615 } 5616 PendingLocs.clear(); 5617 PendingArgFlags.clear(); 5618 return false; 5619 } 5620 5621 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 5622 (TLI.getSubtarget().hasStdExtV() && ValVT.isVector())) && 5623 "Expected an XLenVT or vector types at this stage"); 5624 5625 if (Reg) { 5626 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5627 return false; 5628 } 5629 5630 // When a floating-point value is passed on the stack, no bit-conversion is 5631 // needed. 5632 if (ValVT.isFloatingPoint()) { 5633 LocVT = ValVT; 5634 LocInfo = CCValAssign::Full; 5635 } 5636 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 5637 return false; 5638 } 5639 5640 template <typename ArgTy> 5641 static Optional<unsigned> preAssignMask(const ArgTy &Args) { 5642 for (const auto &ArgIdx : enumerate(Args)) { 5643 MVT ArgVT = ArgIdx.value().VT; 5644 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1) 5645 return ArgIdx.index(); 5646 } 5647 return None; 5648 } 5649 5650 void RISCVTargetLowering::analyzeInputArgs( 5651 MachineFunction &MF, CCState &CCInfo, 5652 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 5653 unsigned NumArgs = Ins.size(); 5654 FunctionType *FType = MF.getFunction().getFunctionType(); 5655 5656 Optional<unsigned> FirstMaskArgument; 5657 if (Subtarget.hasStdExtV()) 5658 FirstMaskArgument = preAssignMask(Ins); 5659 5660 for (unsigned i = 0; i != NumArgs; ++i) { 5661 MVT ArgVT = Ins[i].VT; 5662 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 5663 5664 Type *ArgTy = nullptr; 5665 if (IsRet) 5666 ArgTy = FType->getReturnType(); 5667 else if (Ins[i].isOrigArg()) 5668 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 5669 5670 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 5671 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 5672 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 5673 FirstMaskArgument)) { 5674 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 5675 << EVT(ArgVT).getEVTString() << '\n'); 5676 llvm_unreachable(nullptr); 5677 } 5678 } 5679 } 5680 5681 void RISCVTargetLowering::analyzeOutputArgs( 5682 MachineFunction &MF, CCState &CCInfo, 5683 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 5684 CallLoweringInfo *CLI) const { 5685 unsigned NumArgs = Outs.size(); 5686 5687 Optional<unsigned> FirstMaskArgument; 5688 if (Subtarget.hasStdExtV()) 5689 FirstMaskArgument = preAssignMask(Outs); 5690 5691 for (unsigned i = 0; i != NumArgs; i++) { 5692 MVT ArgVT = Outs[i].VT; 5693 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 5694 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 5695 5696 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 5697 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 5698 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 5699 FirstMaskArgument)) { 5700 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 5701 << EVT(ArgVT).getEVTString() << "\n"); 5702 llvm_unreachable(nullptr); 5703 } 5704 } 5705 } 5706 5707 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 5708 // values. 5709 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 5710 const CCValAssign &VA, const SDLoc &DL, 5711 const RISCVSubtarget &Subtarget) { 5712 switch (VA.getLocInfo()) { 5713 default: 5714 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5715 case CCValAssign::Full: 5716 if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector()) 5717 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget); 5718 break; 5719 case CCValAssign::BCvt: 5720 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 5721 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); 5722 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 5723 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 5724 else 5725 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 5726 break; 5727 } 5728 return Val; 5729 } 5730 5731 // The caller is responsible for loading the full value if the argument is 5732 // passed with CCValAssign::Indirect. 5733 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 5734 const CCValAssign &VA, const SDLoc &DL, 5735 const RISCVTargetLowering &TLI) { 5736 MachineFunction &MF = DAG.getMachineFunction(); 5737 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5738 EVT LocVT = VA.getLocVT(); 5739 SDValue Val; 5740 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 5741 Register VReg = RegInfo.createVirtualRegister(RC); 5742 RegInfo.addLiveIn(VA.getLocReg(), VReg); 5743 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 5744 5745 if (VA.getLocInfo() == CCValAssign::Indirect) 5746 return Val; 5747 5748 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget()); 5749 } 5750 5751 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 5752 const CCValAssign &VA, const SDLoc &DL, 5753 const RISCVSubtarget &Subtarget) { 5754 EVT LocVT = VA.getLocVT(); 5755 5756 switch (VA.getLocInfo()) { 5757 default: 5758 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5759 case CCValAssign::Full: 5760 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector()) 5761 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget); 5762 break; 5763 case CCValAssign::BCvt: 5764 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 5765 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 5766 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 5767 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 5768 else 5769 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 5770 break; 5771 } 5772 return Val; 5773 } 5774 5775 // The caller is responsible for loading the full value if the argument is 5776 // passed with CCValAssign::Indirect. 5777 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 5778 const CCValAssign &VA, const SDLoc &DL) { 5779 MachineFunction &MF = DAG.getMachineFunction(); 5780 MachineFrameInfo &MFI = MF.getFrameInfo(); 5781 EVT LocVT = VA.getLocVT(); 5782 EVT ValVT = VA.getValVT(); 5783 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 5784 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 5785 VA.getLocMemOffset(), /*Immutable=*/true); 5786 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 5787 SDValue Val; 5788 5789 ISD::LoadExtType ExtType; 5790 switch (VA.getLocInfo()) { 5791 default: 5792 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5793 case CCValAssign::Full: 5794 case CCValAssign::Indirect: 5795 case CCValAssign::BCvt: 5796 ExtType = ISD::NON_EXTLOAD; 5797 break; 5798 } 5799 Val = DAG.getExtLoad( 5800 ExtType, DL, LocVT, Chain, FIN, 5801 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 5802 return Val; 5803 } 5804 5805 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 5806 const CCValAssign &VA, const SDLoc &DL) { 5807 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 5808 "Unexpected VA"); 5809 MachineFunction &MF = DAG.getMachineFunction(); 5810 MachineFrameInfo &MFI = MF.getFrameInfo(); 5811 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5812 5813 if (VA.isMemLoc()) { 5814 // f64 is passed on the stack. 5815 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 5816 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 5817 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 5818 MachinePointerInfo::getFixedStack(MF, FI)); 5819 } 5820 5821 assert(VA.isRegLoc() && "Expected register VA assignment"); 5822 5823 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 5824 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 5825 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 5826 SDValue Hi; 5827 if (VA.getLocReg() == RISCV::X17) { 5828 // Second half of f64 is passed on the stack. 5829 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 5830 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 5831 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 5832 MachinePointerInfo::getFixedStack(MF, FI)); 5833 } else { 5834 // Second half of f64 is passed in another GPR. 5835 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 5836 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 5837 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 5838 } 5839 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 5840 } 5841 5842 // FastCC has less than 1% performance improvement for some particular 5843 // benchmark. But theoretically, it may has benenfit for some cases. 5844 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, 5845 CCValAssign::LocInfo LocInfo, 5846 ISD::ArgFlagsTy ArgFlags, CCState &State) { 5847 5848 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 5849 // X5 and X6 might be used for save-restore libcall. 5850 static const MCPhysReg GPRList[] = { 5851 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 5852 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 5853 RISCV::X29, RISCV::X30, RISCV::X31}; 5854 if (unsigned Reg = State.AllocateReg(GPRList)) { 5855 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5856 return false; 5857 } 5858 } 5859 5860 if (LocVT == MVT::f16) { 5861 static const MCPhysReg FPR16List[] = { 5862 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 5863 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 5864 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 5865 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 5866 if (unsigned Reg = State.AllocateReg(FPR16List)) { 5867 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5868 return false; 5869 } 5870 } 5871 5872 if (LocVT == MVT::f32) { 5873 static const MCPhysReg FPR32List[] = { 5874 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 5875 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 5876 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 5877 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 5878 if (unsigned Reg = State.AllocateReg(FPR32List)) { 5879 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5880 return false; 5881 } 5882 } 5883 5884 if (LocVT == MVT::f64) { 5885 static const MCPhysReg FPR64List[] = { 5886 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 5887 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 5888 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 5889 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 5890 if (unsigned Reg = State.AllocateReg(FPR64List)) { 5891 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5892 return false; 5893 } 5894 } 5895 5896 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 5897 unsigned Offset4 = State.AllocateStack(4, Align(4)); 5898 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 5899 return false; 5900 } 5901 5902 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 5903 unsigned Offset5 = State.AllocateStack(8, Align(8)); 5904 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 5905 return false; 5906 } 5907 5908 return true; // CC didn't match. 5909 } 5910 5911 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 5912 CCValAssign::LocInfo LocInfo, 5913 ISD::ArgFlagsTy ArgFlags, CCState &State) { 5914 5915 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 5916 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 5917 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 5918 static const MCPhysReg GPRList[] = { 5919 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 5920 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 5921 if (unsigned Reg = State.AllocateReg(GPRList)) { 5922 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5923 return false; 5924 } 5925 } 5926 5927 if (LocVT == MVT::f32) { 5928 // Pass in STG registers: F1, ..., F6 5929 // fs0 ... fs5 5930 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 5931 RISCV::F18_F, RISCV::F19_F, 5932 RISCV::F20_F, RISCV::F21_F}; 5933 if (unsigned Reg = State.AllocateReg(FPR32List)) { 5934 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5935 return false; 5936 } 5937 } 5938 5939 if (LocVT == MVT::f64) { 5940 // Pass in STG registers: D1, ..., D6 5941 // fs6 ... fs11 5942 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 5943 RISCV::F24_D, RISCV::F25_D, 5944 RISCV::F26_D, RISCV::F27_D}; 5945 if (unsigned Reg = State.AllocateReg(FPR64List)) { 5946 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5947 return false; 5948 } 5949 } 5950 5951 report_fatal_error("No registers left in GHC calling convention"); 5952 return true; 5953 } 5954 5955 // Transform physical registers into virtual registers. 5956 SDValue RISCVTargetLowering::LowerFormalArguments( 5957 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 5958 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 5959 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 5960 5961 MachineFunction &MF = DAG.getMachineFunction(); 5962 5963 switch (CallConv) { 5964 default: 5965 report_fatal_error("Unsupported calling convention"); 5966 case CallingConv::C: 5967 case CallingConv::Fast: 5968 break; 5969 case CallingConv::GHC: 5970 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || 5971 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) 5972 report_fatal_error( 5973 "GHC calling convention requires the F and D instruction set extensions"); 5974 } 5975 5976 const Function &Func = MF.getFunction(); 5977 if (Func.hasFnAttribute("interrupt")) { 5978 if (!Func.arg_empty()) 5979 report_fatal_error( 5980 "Functions with the interrupt attribute cannot have arguments!"); 5981 5982 StringRef Kind = 5983 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 5984 5985 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 5986 report_fatal_error( 5987 "Function interrupt attribute argument not supported!"); 5988 } 5989 5990 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5991 MVT XLenVT = Subtarget.getXLenVT(); 5992 unsigned XLenInBytes = Subtarget.getXLen() / 8; 5993 // Used with vargs to acumulate store chains. 5994 std::vector<SDValue> OutChains; 5995 5996 // Assign locations to all of the incoming arguments. 5997 SmallVector<CCValAssign, 16> ArgLocs; 5998 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 5999 6000 if (CallConv == CallingConv::Fast) 6001 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); 6002 else if (CallConv == CallingConv::GHC) 6003 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); 6004 else 6005 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 6006 6007 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 6008 CCValAssign &VA = ArgLocs[i]; 6009 SDValue ArgValue; 6010 // Passing f64 on RV32D with a soft float ABI must be handled as a special 6011 // case. 6012 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 6013 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 6014 else if (VA.isRegLoc()) 6015 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 6016 else 6017 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 6018 6019 if (VA.getLocInfo() == CCValAssign::Indirect) { 6020 // If the original argument was split and passed by reference (e.g. i128 6021 // on RV32), we need to load all parts of it here (using the same 6022 // address). Vectors may be partly split to registers and partly to the 6023 // stack, in which case the base address is partly offset and subsequent 6024 // stores are relative to that. 6025 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 6026 MachinePointerInfo())); 6027 unsigned ArgIndex = Ins[i].OrigArgIndex; 6028 unsigned ArgPartOffset = Ins[i].PartOffset; 6029 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 6030 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 6031 CCValAssign &PartVA = ArgLocs[i + 1]; 6032 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; 6033 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 6034 DAG.getIntPtrConstant(PartOffset, DL)); 6035 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 6036 MachinePointerInfo())); 6037 ++i; 6038 } 6039 continue; 6040 } 6041 InVals.push_back(ArgValue); 6042 } 6043 6044 if (IsVarArg) { 6045 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 6046 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 6047 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 6048 MachineFrameInfo &MFI = MF.getFrameInfo(); 6049 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 6050 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 6051 6052 // Offset of the first variable argument from stack pointer, and size of 6053 // the vararg save area. For now, the varargs save area is either zero or 6054 // large enough to hold a0-a7. 6055 int VaArgOffset, VarArgsSaveSize; 6056 6057 // If all registers are allocated, then all varargs must be passed on the 6058 // stack and we don't need to save any argregs. 6059 if (ArgRegs.size() == Idx) { 6060 VaArgOffset = CCInfo.getNextStackOffset(); 6061 VarArgsSaveSize = 0; 6062 } else { 6063 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 6064 VaArgOffset = -VarArgsSaveSize; 6065 } 6066 6067 // Record the frame index of the first variable argument 6068 // which is a value necessary to VASTART. 6069 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 6070 RVFI->setVarArgsFrameIndex(FI); 6071 6072 // If saving an odd number of registers then create an extra stack slot to 6073 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 6074 // offsets to even-numbered registered remain 2*XLEN-aligned. 6075 if (Idx % 2) { 6076 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 6077 VarArgsSaveSize += XLenInBytes; 6078 } 6079 6080 // Copy the integer registers that may have been used for passing varargs 6081 // to the vararg save area. 6082 for (unsigned I = Idx; I < ArgRegs.size(); 6083 ++I, VaArgOffset += XLenInBytes) { 6084 const Register Reg = RegInfo.createVirtualRegister(RC); 6085 RegInfo.addLiveIn(ArgRegs[I], Reg); 6086 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 6087 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 6088 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 6089 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 6090 MachinePointerInfo::getFixedStack(MF, FI)); 6091 cast<StoreSDNode>(Store.getNode()) 6092 ->getMemOperand() 6093 ->setValue((Value *)nullptr); 6094 OutChains.push_back(Store); 6095 } 6096 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 6097 } 6098 6099 // All stores are grouped in one node to allow the matching between 6100 // the size of Ins and InVals. This only happens for vararg functions. 6101 if (!OutChains.empty()) { 6102 OutChains.push_back(Chain); 6103 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 6104 } 6105 6106 return Chain; 6107 } 6108 6109 /// isEligibleForTailCallOptimization - Check whether the call is eligible 6110 /// for tail call optimization. 6111 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 6112 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 6113 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 6114 const SmallVector<CCValAssign, 16> &ArgLocs) const { 6115 6116 auto &Callee = CLI.Callee; 6117 auto CalleeCC = CLI.CallConv; 6118 auto &Outs = CLI.Outs; 6119 auto &Caller = MF.getFunction(); 6120 auto CallerCC = Caller.getCallingConv(); 6121 6122 // Exception-handling functions need a special set of instructions to 6123 // indicate a return to the hardware. Tail-calling another function would 6124 // probably break this. 6125 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 6126 // should be expanded as new function attributes are introduced. 6127 if (Caller.hasFnAttribute("interrupt")) 6128 return false; 6129 6130 // Do not tail call opt if the stack is used to pass parameters. 6131 if (CCInfo.getNextStackOffset() != 0) 6132 return false; 6133 6134 // Do not tail call opt if any parameters need to be passed indirectly. 6135 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 6136 // passed indirectly. So the address of the value will be passed in a 6137 // register, or if not available, then the address is put on the stack. In 6138 // order to pass indirectly, space on the stack often needs to be allocated 6139 // in order to store the value. In this case the CCInfo.getNextStackOffset() 6140 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 6141 // are passed CCValAssign::Indirect. 6142 for (auto &VA : ArgLocs) 6143 if (VA.getLocInfo() == CCValAssign::Indirect) 6144 return false; 6145 6146 // Do not tail call opt if either caller or callee uses struct return 6147 // semantics. 6148 auto IsCallerStructRet = Caller.hasStructRetAttr(); 6149 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 6150 if (IsCallerStructRet || IsCalleeStructRet) 6151 return false; 6152 6153 // Externally-defined functions with weak linkage should not be 6154 // tail-called. The behaviour of branch instructions in this situation (as 6155 // used for tail calls) is implementation-defined, so we cannot rely on the 6156 // linker replacing the tail call with a return. 6157 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 6158 const GlobalValue *GV = G->getGlobal(); 6159 if (GV->hasExternalWeakLinkage()) 6160 return false; 6161 } 6162 6163 // The callee has to preserve all registers the caller needs to preserve. 6164 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 6165 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 6166 if (CalleeCC != CallerCC) { 6167 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 6168 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 6169 return false; 6170 } 6171 6172 // Byval parameters hand the function a pointer directly into the stack area 6173 // we want to reuse during a tail call. Working around this *is* possible 6174 // but less efficient and uglier in LowerCall. 6175 for (auto &Arg : Outs) 6176 if (Arg.Flags.isByVal()) 6177 return false; 6178 6179 return true; 6180 } 6181 6182 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 6183 // and output parameter nodes. 6184 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 6185 SmallVectorImpl<SDValue> &InVals) const { 6186 SelectionDAG &DAG = CLI.DAG; 6187 SDLoc &DL = CLI.DL; 6188 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 6189 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 6190 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 6191 SDValue Chain = CLI.Chain; 6192 SDValue Callee = CLI.Callee; 6193 bool &IsTailCall = CLI.IsTailCall; 6194 CallingConv::ID CallConv = CLI.CallConv; 6195 bool IsVarArg = CLI.IsVarArg; 6196 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6197 MVT XLenVT = Subtarget.getXLenVT(); 6198 6199 MachineFunction &MF = DAG.getMachineFunction(); 6200 6201 // Analyze the operands of the call, assigning locations to each operand. 6202 SmallVector<CCValAssign, 16> ArgLocs; 6203 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 6204 6205 if (CallConv == CallingConv::Fast) 6206 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); 6207 else if (CallConv == CallingConv::GHC) 6208 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); 6209 else 6210 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 6211 6212 // Check if it's really possible to do a tail call. 6213 if (IsTailCall) 6214 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 6215 6216 if (IsTailCall) 6217 ++NumTailCalls; 6218 else if (CLI.CB && CLI.CB->isMustTailCall()) 6219 report_fatal_error("failed to perform tail call elimination on a call " 6220 "site marked musttail"); 6221 6222 // Get a count of how many bytes are to be pushed on the stack. 6223 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 6224 6225 // Create local copies for byval args 6226 SmallVector<SDValue, 8> ByValArgs; 6227 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 6228 ISD::ArgFlagsTy Flags = Outs[i].Flags; 6229 if (!Flags.isByVal()) 6230 continue; 6231 6232 SDValue Arg = OutVals[i]; 6233 unsigned Size = Flags.getByValSize(); 6234 Align Alignment = Flags.getNonZeroByValAlign(); 6235 6236 int FI = 6237 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 6238 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 6239 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 6240 6241 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 6242 /*IsVolatile=*/false, 6243 /*AlwaysInline=*/false, IsTailCall, 6244 MachinePointerInfo(), MachinePointerInfo()); 6245 ByValArgs.push_back(FIPtr); 6246 } 6247 6248 if (!IsTailCall) 6249 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 6250 6251 // Copy argument values to their designated locations. 6252 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 6253 SmallVector<SDValue, 8> MemOpChains; 6254 SDValue StackPtr; 6255 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 6256 CCValAssign &VA = ArgLocs[i]; 6257 SDValue ArgValue = OutVals[i]; 6258 ISD::ArgFlagsTy Flags = Outs[i].Flags; 6259 6260 // Handle passing f64 on RV32D with a soft float ABI as a special case. 6261 bool IsF64OnRV32DSoftABI = 6262 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 6263 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 6264 SDValue SplitF64 = DAG.getNode( 6265 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 6266 SDValue Lo = SplitF64.getValue(0); 6267 SDValue Hi = SplitF64.getValue(1); 6268 6269 Register RegLo = VA.getLocReg(); 6270 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 6271 6272 if (RegLo == RISCV::X17) { 6273 // Second half of f64 is passed on the stack. 6274 // Work out the address of the stack slot. 6275 if (!StackPtr.getNode()) 6276 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 6277 // Emit the store. 6278 MemOpChains.push_back( 6279 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 6280 } else { 6281 // Second half of f64 is passed in another GPR. 6282 assert(RegLo < RISCV::X31 && "Invalid register pair"); 6283 Register RegHigh = RegLo + 1; 6284 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 6285 } 6286 continue; 6287 } 6288 6289 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 6290 // as any other MemLoc. 6291 6292 // Promote the value if needed. 6293 // For now, only handle fully promoted and indirect arguments. 6294 if (VA.getLocInfo() == CCValAssign::Indirect) { 6295 // Store the argument in a stack slot and pass its address. 6296 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 6297 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 6298 MemOpChains.push_back( 6299 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 6300 MachinePointerInfo::getFixedStack(MF, FI))); 6301 // If the original argument was split (e.g. i128), we need 6302 // to store the required parts of it here (and pass just one address). 6303 // Vectors may be partly split to registers and partly to the stack, in 6304 // which case the base address is partly offset and subsequent stores are 6305 // relative to that. 6306 unsigned ArgIndex = Outs[i].OrigArgIndex; 6307 unsigned ArgPartOffset = Outs[i].PartOffset; 6308 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 6309 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 6310 SDValue PartValue = OutVals[i + 1]; 6311 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; 6312 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 6313 DAG.getIntPtrConstant(PartOffset, DL)); 6314 MemOpChains.push_back( 6315 DAG.getStore(Chain, DL, PartValue, Address, 6316 MachinePointerInfo::getFixedStack(MF, FI))); 6317 ++i; 6318 } 6319 ArgValue = SpillSlot; 6320 } else { 6321 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget); 6322 } 6323 6324 // Use local copy if it is a byval arg. 6325 if (Flags.isByVal()) 6326 ArgValue = ByValArgs[j++]; 6327 6328 if (VA.isRegLoc()) { 6329 // Queue up the argument copies and emit them at the end. 6330 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 6331 } else { 6332 assert(VA.isMemLoc() && "Argument not register or memory"); 6333 assert(!IsTailCall && "Tail call not allowed if stack is used " 6334 "for passing parameters"); 6335 6336 // Work out the address of the stack slot. 6337 if (!StackPtr.getNode()) 6338 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 6339 SDValue Address = 6340 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 6341 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 6342 6343 // Emit the store. 6344 MemOpChains.push_back( 6345 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 6346 } 6347 } 6348 6349 // Join the stores, which are independent of one another. 6350 if (!MemOpChains.empty()) 6351 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 6352 6353 SDValue Glue; 6354 6355 // Build a sequence of copy-to-reg nodes, chained and glued together. 6356 for (auto &Reg : RegsToPass) { 6357 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 6358 Glue = Chain.getValue(1); 6359 } 6360 6361 // Validate that none of the argument registers have been marked as 6362 // reserved, if so report an error. Do the same for the return address if this 6363 // is not a tailcall. 6364 validateCCReservedRegs(RegsToPass, MF); 6365 if (!IsTailCall && 6366 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 6367 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 6368 MF.getFunction(), 6369 "Return address register required, but has been reserved."}); 6370 6371 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 6372 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 6373 // split it and then direct call can be matched by PseudoCALL. 6374 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 6375 const GlobalValue *GV = S->getGlobal(); 6376 6377 unsigned OpFlags = RISCVII::MO_CALL; 6378 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 6379 OpFlags = RISCVII::MO_PLT; 6380 6381 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 6382 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 6383 unsigned OpFlags = RISCVII::MO_CALL; 6384 6385 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 6386 nullptr)) 6387 OpFlags = RISCVII::MO_PLT; 6388 6389 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 6390 } 6391 6392 // The first call operand is the chain and the second is the target address. 6393 SmallVector<SDValue, 8> Ops; 6394 Ops.push_back(Chain); 6395 Ops.push_back(Callee); 6396 6397 // Add argument registers to the end of the list so that they are 6398 // known live into the call. 6399 for (auto &Reg : RegsToPass) 6400 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 6401 6402 if (!IsTailCall) { 6403 // Add a register mask operand representing the call-preserved registers. 6404 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 6405 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 6406 assert(Mask && "Missing call preserved mask for calling convention"); 6407 Ops.push_back(DAG.getRegisterMask(Mask)); 6408 } 6409 6410 // Glue the call to the argument copies, if any. 6411 if (Glue.getNode()) 6412 Ops.push_back(Glue); 6413 6414 // Emit the call. 6415 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 6416 6417 if (IsTailCall) { 6418 MF.getFrameInfo().setHasTailCall(); 6419 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 6420 } 6421 6422 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 6423 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 6424 Glue = Chain.getValue(1); 6425 6426 // Mark the end of the call, which is glued to the call itself. 6427 Chain = DAG.getCALLSEQ_END(Chain, 6428 DAG.getConstant(NumBytes, DL, PtrVT, true), 6429 DAG.getConstant(0, DL, PtrVT, true), 6430 Glue, DL); 6431 Glue = Chain.getValue(1); 6432 6433 // Assign locations to each value returned by this call. 6434 SmallVector<CCValAssign, 16> RVLocs; 6435 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 6436 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 6437 6438 // Copy all of the result registers out of their specified physreg. 6439 for (auto &VA : RVLocs) { 6440 // Copy the value out 6441 SDValue RetValue = 6442 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 6443 // Glue the RetValue to the end of the call sequence 6444 Chain = RetValue.getValue(1); 6445 Glue = RetValue.getValue(2); 6446 6447 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 6448 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 6449 SDValue RetValue2 = 6450 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 6451 Chain = RetValue2.getValue(1); 6452 Glue = RetValue2.getValue(2); 6453 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 6454 RetValue2); 6455 } 6456 6457 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget); 6458 6459 InVals.push_back(RetValue); 6460 } 6461 6462 return Chain; 6463 } 6464 6465 bool RISCVTargetLowering::CanLowerReturn( 6466 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 6467 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 6468 SmallVector<CCValAssign, 16> RVLocs; 6469 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 6470 6471 Optional<unsigned> FirstMaskArgument; 6472 if (Subtarget.hasStdExtV()) 6473 FirstMaskArgument = preAssignMask(Outs); 6474 6475 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 6476 MVT VT = Outs[i].VT; 6477 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 6478 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 6479 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 6480 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 6481 *this, FirstMaskArgument)) 6482 return false; 6483 } 6484 return true; 6485 } 6486 6487 SDValue 6488 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 6489 bool IsVarArg, 6490 const SmallVectorImpl<ISD::OutputArg> &Outs, 6491 const SmallVectorImpl<SDValue> &OutVals, 6492 const SDLoc &DL, SelectionDAG &DAG) const { 6493 const MachineFunction &MF = DAG.getMachineFunction(); 6494 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 6495 6496 // Stores the assignment of the return value to a location. 6497 SmallVector<CCValAssign, 16> RVLocs; 6498 6499 // Info about the registers and stack slot. 6500 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 6501 *DAG.getContext()); 6502 6503 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 6504 nullptr); 6505 6506 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 6507 report_fatal_error("GHC functions return void only"); 6508 6509 SDValue Glue; 6510 SmallVector<SDValue, 4> RetOps(1, Chain); 6511 6512 // Copy the result values into the output registers. 6513 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 6514 SDValue Val = OutVals[i]; 6515 CCValAssign &VA = RVLocs[i]; 6516 assert(VA.isRegLoc() && "Can only return in registers!"); 6517 6518 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 6519 // Handle returning f64 on RV32D with a soft float ABI. 6520 assert(VA.isRegLoc() && "Expected return via registers"); 6521 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 6522 DAG.getVTList(MVT::i32, MVT::i32), Val); 6523 SDValue Lo = SplitF64.getValue(0); 6524 SDValue Hi = SplitF64.getValue(1); 6525 Register RegLo = VA.getLocReg(); 6526 assert(RegLo < RISCV::X31 && "Invalid register pair"); 6527 Register RegHi = RegLo + 1; 6528 6529 if (STI.isRegisterReservedByUser(RegLo) || 6530 STI.isRegisterReservedByUser(RegHi)) 6531 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 6532 MF.getFunction(), 6533 "Return value register required, but has been reserved."}); 6534 6535 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 6536 Glue = Chain.getValue(1); 6537 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 6538 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 6539 Glue = Chain.getValue(1); 6540 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 6541 } else { 6542 // Handle a 'normal' return. 6543 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget); 6544 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 6545 6546 if (STI.isRegisterReservedByUser(VA.getLocReg())) 6547 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 6548 MF.getFunction(), 6549 "Return value register required, but has been reserved."}); 6550 6551 // Guarantee that all emitted copies are stuck together. 6552 Glue = Chain.getValue(1); 6553 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 6554 } 6555 } 6556 6557 RetOps[0] = Chain; // Update chain. 6558 6559 // Add the glue node if we have it. 6560 if (Glue.getNode()) { 6561 RetOps.push_back(Glue); 6562 } 6563 6564 // Interrupt service routines use different return instructions. 6565 const Function &Func = DAG.getMachineFunction().getFunction(); 6566 if (Func.hasFnAttribute("interrupt")) { 6567 if (!Func.getReturnType()->isVoidTy()) 6568 report_fatal_error( 6569 "Functions with the interrupt attribute must have void return type!"); 6570 6571 MachineFunction &MF = DAG.getMachineFunction(); 6572 StringRef Kind = 6573 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 6574 6575 unsigned RetOpc; 6576 if (Kind == "user") 6577 RetOpc = RISCVISD::URET_FLAG; 6578 else if (Kind == "supervisor") 6579 RetOpc = RISCVISD::SRET_FLAG; 6580 else 6581 RetOpc = RISCVISD::MRET_FLAG; 6582 6583 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 6584 } 6585 6586 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 6587 } 6588 6589 void RISCVTargetLowering::validateCCReservedRegs( 6590 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 6591 MachineFunction &MF) const { 6592 const Function &F = MF.getFunction(); 6593 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 6594 6595 if (llvm::any_of(Regs, [&STI](auto Reg) { 6596 return STI.isRegisterReservedByUser(Reg.first); 6597 })) 6598 F.getContext().diagnose(DiagnosticInfoUnsupported{ 6599 F, "Argument register required, but has been reserved."}); 6600 } 6601 6602 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 6603 return CI->isTailCall(); 6604 } 6605 6606 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 6607 #define NODE_NAME_CASE(NODE) \ 6608 case RISCVISD::NODE: \ 6609 return "RISCVISD::" #NODE; 6610 // clang-format off 6611 switch ((RISCVISD::NodeType)Opcode) { 6612 case RISCVISD::FIRST_NUMBER: 6613 break; 6614 NODE_NAME_CASE(RET_FLAG) 6615 NODE_NAME_CASE(URET_FLAG) 6616 NODE_NAME_CASE(SRET_FLAG) 6617 NODE_NAME_CASE(MRET_FLAG) 6618 NODE_NAME_CASE(CALL) 6619 NODE_NAME_CASE(SELECT_CC) 6620 NODE_NAME_CASE(BR_CC) 6621 NODE_NAME_CASE(BuildPairF64) 6622 NODE_NAME_CASE(SplitF64) 6623 NODE_NAME_CASE(TAIL) 6624 NODE_NAME_CASE(SLLW) 6625 NODE_NAME_CASE(SRAW) 6626 NODE_NAME_CASE(SRLW) 6627 NODE_NAME_CASE(DIVW) 6628 NODE_NAME_CASE(DIVUW) 6629 NODE_NAME_CASE(REMUW) 6630 NODE_NAME_CASE(ROLW) 6631 NODE_NAME_CASE(RORW) 6632 NODE_NAME_CASE(FSLW) 6633 NODE_NAME_CASE(FSRW) 6634 NODE_NAME_CASE(FSL) 6635 NODE_NAME_CASE(FSR) 6636 NODE_NAME_CASE(FMV_H_X) 6637 NODE_NAME_CASE(FMV_X_ANYEXTH) 6638 NODE_NAME_CASE(FMV_W_X_RV64) 6639 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 6640 NODE_NAME_CASE(READ_CYCLE_WIDE) 6641 NODE_NAME_CASE(GREVI) 6642 NODE_NAME_CASE(GREVIW) 6643 NODE_NAME_CASE(GORCI) 6644 NODE_NAME_CASE(GORCIW) 6645 NODE_NAME_CASE(SHFLI) 6646 NODE_NAME_CASE(VMV_V_X_VL) 6647 NODE_NAME_CASE(VFMV_V_F_VL) 6648 NODE_NAME_CASE(VMV_X_S) 6649 NODE_NAME_CASE(VMV_S_XF_VL) 6650 NODE_NAME_CASE(SPLAT_VECTOR_I64) 6651 NODE_NAME_CASE(READ_VLENB) 6652 NODE_NAME_CASE(TRUNCATE_VECTOR_VL) 6653 NODE_NAME_CASE(VLEFF) 6654 NODE_NAME_CASE(VLEFF_MASK) 6655 NODE_NAME_CASE(VSLIDEUP_VL) 6656 NODE_NAME_CASE(VSLIDE1UP_VL) 6657 NODE_NAME_CASE(VSLIDEDOWN_VL) 6658 NODE_NAME_CASE(VID_VL) 6659 NODE_NAME_CASE(VFNCVT_ROD_VL) 6660 NODE_NAME_CASE(VECREDUCE_ADD_VL) 6661 NODE_NAME_CASE(VECREDUCE_UMAX_VL) 6662 NODE_NAME_CASE(VECREDUCE_SMAX_VL) 6663 NODE_NAME_CASE(VECREDUCE_UMIN_VL) 6664 NODE_NAME_CASE(VECREDUCE_SMIN_VL) 6665 NODE_NAME_CASE(VECREDUCE_AND_VL) 6666 NODE_NAME_CASE(VECREDUCE_OR_VL) 6667 NODE_NAME_CASE(VECREDUCE_XOR_VL) 6668 NODE_NAME_CASE(VECREDUCE_FADD_VL) 6669 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL) 6670 NODE_NAME_CASE(ADD_VL) 6671 NODE_NAME_CASE(AND_VL) 6672 NODE_NAME_CASE(MUL_VL) 6673 NODE_NAME_CASE(OR_VL) 6674 NODE_NAME_CASE(SDIV_VL) 6675 NODE_NAME_CASE(SHL_VL) 6676 NODE_NAME_CASE(SREM_VL) 6677 NODE_NAME_CASE(SRA_VL) 6678 NODE_NAME_CASE(SRL_VL) 6679 NODE_NAME_CASE(SUB_VL) 6680 NODE_NAME_CASE(UDIV_VL) 6681 NODE_NAME_CASE(UREM_VL) 6682 NODE_NAME_CASE(XOR_VL) 6683 NODE_NAME_CASE(FADD_VL) 6684 NODE_NAME_CASE(FSUB_VL) 6685 NODE_NAME_CASE(FMUL_VL) 6686 NODE_NAME_CASE(FDIV_VL) 6687 NODE_NAME_CASE(FNEG_VL) 6688 NODE_NAME_CASE(FABS_VL) 6689 NODE_NAME_CASE(FSQRT_VL) 6690 NODE_NAME_CASE(FMA_VL) 6691 NODE_NAME_CASE(FCOPYSIGN_VL) 6692 NODE_NAME_CASE(SMIN_VL) 6693 NODE_NAME_CASE(SMAX_VL) 6694 NODE_NAME_CASE(UMIN_VL) 6695 NODE_NAME_CASE(UMAX_VL) 6696 NODE_NAME_CASE(MULHS_VL) 6697 NODE_NAME_CASE(MULHU_VL) 6698 NODE_NAME_CASE(FP_TO_SINT_VL) 6699 NODE_NAME_CASE(FP_TO_UINT_VL) 6700 NODE_NAME_CASE(SINT_TO_FP_VL) 6701 NODE_NAME_CASE(UINT_TO_FP_VL) 6702 NODE_NAME_CASE(FP_EXTEND_VL) 6703 NODE_NAME_CASE(FP_ROUND_VL) 6704 NODE_NAME_CASE(SETCC_VL) 6705 NODE_NAME_CASE(VSELECT_VL) 6706 NODE_NAME_CASE(VMAND_VL) 6707 NODE_NAME_CASE(VMOR_VL) 6708 NODE_NAME_CASE(VMXOR_VL) 6709 NODE_NAME_CASE(VMCLR_VL) 6710 NODE_NAME_CASE(VMSET_VL) 6711 NODE_NAME_CASE(VRGATHER_VX_VL) 6712 NODE_NAME_CASE(VRGATHER_VV_VL) 6713 NODE_NAME_CASE(VRGATHEREI16_VV_VL) 6714 NODE_NAME_CASE(VSEXT_VL) 6715 NODE_NAME_CASE(VZEXT_VL) 6716 NODE_NAME_CASE(VLE_VL) 6717 NODE_NAME_CASE(VSE_VL) 6718 } 6719 // clang-format on 6720 return nullptr; 6721 #undef NODE_NAME_CASE 6722 } 6723 6724 /// getConstraintType - Given a constraint letter, return the type of 6725 /// constraint it is for this target. 6726 RISCVTargetLowering::ConstraintType 6727 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 6728 if (Constraint.size() == 1) { 6729 switch (Constraint[0]) { 6730 default: 6731 break; 6732 case 'f': 6733 case 'v': 6734 return C_RegisterClass; 6735 case 'I': 6736 case 'J': 6737 case 'K': 6738 return C_Immediate; 6739 case 'A': 6740 return C_Memory; 6741 } 6742 } 6743 return TargetLowering::getConstraintType(Constraint); 6744 } 6745 6746 std::pair<unsigned, const TargetRegisterClass *> 6747 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 6748 StringRef Constraint, 6749 MVT VT) const { 6750 // First, see if this is a constraint that directly corresponds to a 6751 // RISCV register class. 6752 if (Constraint.size() == 1) { 6753 switch (Constraint[0]) { 6754 case 'r': 6755 return std::make_pair(0U, &RISCV::GPRRegClass); 6756 case 'f': 6757 if (Subtarget.hasStdExtZfh() && VT == MVT::f16) 6758 return std::make_pair(0U, &RISCV::FPR16RegClass); 6759 if (Subtarget.hasStdExtF() && VT == MVT::f32) 6760 return std::make_pair(0U, &RISCV::FPR32RegClass); 6761 if (Subtarget.hasStdExtD() && VT == MVT::f64) 6762 return std::make_pair(0U, &RISCV::FPR64RegClass); 6763 break; 6764 case 'v': 6765 for (const auto *RC : 6766 {&RISCV::VMRegClass, &RISCV::VRRegClass, &RISCV::VRM2RegClass, 6767 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 6768 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) 6769 return std::make_pair(0U, RC); 6770 } 6771 break; 6772 default: 6773 break; 6774 } 6775 } 6776 6777 // Clang will correctly decode the usage of register name aliases into their 6778 // official names. However, other frontends like `rustc` do not. This allows 6779 // users of these frontends to use the ABI names for registers in LLVM-style 6780 // register constraints. 6781 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 6782 .Case("{zero}", RISCV::X0) 6783 .Case("{ra}", RISCV::X1) 6784 .Case("{sp}", RISCV::X2) 6785 .Case("{gp}", RISCV::X3) 6786 .Case("{tp}", RISCV::X4) 6787 .Case("{t0}", RISCV::X5) 6788 .Case("{t1}", RISCV::X6) 6789 .Case("{t2}", RISCV::X7) 6790 .Cases("{s0}", "{fp}", RISCV::X8) 6791 .Case("{s1}", RISCV::X9) 6792 .Case("{a0}", RISCV::X10) 6793 .Case("{a1}", RISCV::X11) 6794 .Case("{a2}", RISCV::X12) 6795 .Case("{a3}", RISCV::X13) 6796 .Case("{a4}", RISCV::X14) 6797 .Case("{a5}", RISCV::X15) 6798 .Case("{a6}", RISCV::X16) 6799 .Case("{a7}", RISCV::X17) 6800 .Case("{s2}", RISCV::X18) 6801 .Case("{s3}", RISCV::X19) 6802 .Case("{s4}", RISCV::X20) 6803 .Case("{s5}", RISCV::X21) 6804 .Case("{s6}", RISCV::X22) 6805 .Case("{s7}", RISCV::X23) 6806 .Case("{s8}", RISCV::X24) 6807 .Case("{s9}", RISCV::X25) 6808 .Case("{s10}", RISCV::X26) 6809 .Case("{s11}", RISCV::X27) 6810 .Case("{t3}", RISCV::X28) 6811 .Case("{t4}", RISCV::X29) 6812 .Case("{t5}", RISCV::X30) 6813 .Case("{t6}", RISCV::X31) 6814 .Default(RISCV::NoRegister); 6815 if (XRegFromAlias != RISCV::NoRegister) 6816 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 6817 6818 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 6819 // TableGen record rather than the AsmName to choose registers for InlineAsm 6820 // constraints, plus we want to match those names to the widest floating point 6821 // register type available, manually select floating point registers here. 6822 // 6823 // The second case is the ABI name of the register, so that frontends can also 6824 // use the ABI names in register constraint lists. 6825 if (Subtarget.hasStdExtF()) { 6826 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 6827 .Cases("{f0}", "{ft0}", RISCV::F0_F) 6828 .Cases("{f1}", "{ft1}", RISCV::F1_F) 6829 .Cases("{f2}", "{ft2}", RISCV::F2_F) 6830 .Cases("{f3}", "{ft3}", RISCV::F3_F) 6831 .Cases("{f4}", "{ft4}", RISCV::F4_F) 6832 .Cases("{f5}", "{ft5}", RISCV::F5_F) 6833 .Cases("{f6}", "{ft6}", RISCV::F6_F) 6834 .Cases("{f7}", "{ft7}", RISCV::F7_F) 6835 .Cases("{f8}", "{fs0}", RISCV::F8_F) 6836 .Cases("{f9}", "{fs1}", RISCV::F9_F) 6837 .Cases("{f10}", "{fa0}", RISCV::F10_F) 6838 .Cases("{f11}", "{fa1}", RISCV::F11_F) 6839 .Cases("{f12}", "{fa2}", RISCV::F12_F) 6840 .Cases("{f13}", "{fa3}", RISCV::F13_F) 6841 .Cases("{f14}", "{fa4}", RISCV::F14_F) 6842 .Cases("{f15}", "{fa5}", RISCV::F15_F) 6843 .Cases("{f16}", "{fa6}", RISCV::F16_F) 6844 .Cases("{f17}", "{fa7}", RISCV::F17_F) 6845 .Cases("{f18}", "{fs2}", RISCV::F18_F) 6846 .Cases("{f19}", "{fs3}", RISCV::F19_F) 6847 .Cases("{f20}", "{fs4}", RISCV::F20_F) 6848 .Cases("{f21}", "{fs5}", RISCV::F21_F) 6849 .Cases("{f22}", "{fs6}", RISCV::F22_F) 6850 .Cases("{f23}", "{fs7}", RISCV::F23_F) 6851 .Cases("{f24}", "{fs8}", RISCV::F24_F) 6852 .Cases("{f25}", "{fs9}", RISCV::F25_F) 6853 .Cases("{f26}", "{fs10}", RISCV::F26_F) 6854 .Cases("{f27}", "{fs11}", RISCV::F27_F) 6855 .Cases("{f28}", "{ft8}", RISCV::F28_F) 6856 .Cases("{f29}", "{ft9}", RISCV::F29_F) 6857 .Cases("{f30}", "{ft10}", RISCV::F30_F) 6858 .Cases("{f31}", "{ft11}", RISCV::F31_F) 6859 .Default(RISCV::NoRegister); 6860 if (FReg != RISCV::NoRegister) { 6861 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 6862 if (Subtarget.hasStdExtD()) { 6863 unsigned RegNo = FReg - RISCV::F0_F; 6864 unsigned DReg = RISCV::F0_D + RegNo; 6865 return std::make_pair(DReg, &RISCV::FPR64RegClass); 6866 } 6867 return std::make_pair(FReg, &RISCV::FPR32RegClass); 6868 } 6869 } 6870 6871 if (Subtarget.hasStdExtV()) { 6872 Register VReg = StringSwitch<Register>(Constraint.lower()) 6873 .Case("{v0}", RISCV::V0) 6874 .Case("{v1}", RISCV::V1) 6875 .Case("{v2}", RISCV::V2) 6876 .Case("{v3}", RISCV::V3) 6877 .Case("{v4}", RISCV::V4) 6878 .Case("{v5}", RISCV::V5) 6879 .Case("{v6}", RISCV::V6) 6880 .Case("{v7}", RISCV::V7) 6881 .Case("{v8}", RISCV::V8) 6882 .Case("{v9}", RISCV::V9) 6883 .Case("{v10}", RISCV::V10) 6884 .Case("{v11}", RISCV::V11) 6885 .Case("{v12}", RISCV::V12) 6886 .Case("{v13}", RISCV::V13) 6887 .Case("{v14}", RISCV::V14) 6888 .Case("{v15}", RISCV::V15) 6889 .Case("{v16}", RISCV::V16) 6890 .Case("{v17}", RISCV::V17) 6891 .Case("{v18}", RISCV::V18) 6892 .Case("{v19}", RISCV::V19) 6893 .Case("{v20}", RISCV::V20) 6894 .Case("{v21}", RISCV::V21) 6895 .Case("{v22}", RISCV::V22) 6896 .Case("{v23}", RISCV::V23) 6897 .Case("{v24}", RISCV::V24) 6898 .Case("{v25}", RISCV::V25) 6899 .Case("{v26}", RISCV::V26) 6900 .Case("{v27}", RISCV::V27) 6901 .Case("{v28}", RISCV::V28) 6902 .Case("{v29}", RISCV::V29) 6903 .Case("{v30}", RISCV::V30) 6904 .Case("{v31}", RISCV::V31) 6905 .Default(RISCV::NoRegister); 6906 if (VReg != RISCV::NoRegister) { 6907 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy)) 6908 return std::make_pair(VReg, &RISCV::VMRegClass); 6909 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy)) 6910 return std::make_pair(VReg, &RISCV::VRRegClass); 6911 for (const auto *RC : 6912 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 6913 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) { 6914 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC); 6915 return std::make_pair(VReg, RC); 6916 } 6917 } 6918 } 6919 } 6920 6921 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 6922 } 6923 6924 unsigned 6925 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 6926 // Currently only support length 1 constraints. 6927 if (ConstraintCode.size() == 1) { 6928 switch (ConstraintCode[0]) { 6929 case 'A': 6930 return InlineAsm::Constraint_A; 6931 default: 6932 break; 6933 } 6934 } 6935 6936 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 6937 } 6938 6939 void RISCVTargetLowering::LowerAsmOperandForConstraint( 6940 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 6941 SelectionDAG &DAG) const { 6942 // Currently only support length 1 constraints. 6943 if (Constraint.length() == 1) { 6944 switch (Constraint[0]) { 6945 case 'I': 6946 // Validate & create a 12-bit signed immediate operand. 6947 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 6948 uint64_t CVal = C->getSExtValue(); 6949 if (isInt<12>(CVal)) 6950 Ops.push_back( 6951 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 6952 } 6953 return; 6954 case 'J': 6955 // Validate & create an integer zero operand. 6956 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 6957 if (C->getZExtValue() == 0) 6958 Ops.push_back( 6959 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 6960 return; 6961 case 'K': 6962 // Validate & create a 5-bit unsigned immediate operand. 6963 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 6964 uint64_t CVal = C->getZExtValue(); 6965 if (isUInt<5>(CVal)) 6966 Ops.push_back( 6967 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 6968 } 6969 return; 6970 default: 6971 break; 6972 } 6973 } 6974 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 6975 } 6976 6977 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 6978 Instruction *Inst, 6979 AtomicOrdering Ord) const { 6980 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 6981 return Builder.CreateFence(Ord); 6982 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 6983 return Builder.CreateFence(AtomicOrdering::Release); 6984 return nullptr; 6985 } 6986 6987 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 6988 Instruction *Inst, 6989 AtomicOrdering Ord) const { 6990 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 6991 return Builder.CreateFence(AtomicOrdering::Acquire); 6992 return nullptr; 6993 } 6994 6995 TargetLowering::AtomicExpansionKind 6996 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 6997 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 6998 // point operations can't be used in an lr/sc sequence without breaking the 6999 // forward-progress guarantee. 7000 if (AI->isFloatingPointOperation()) 7001 return AtomicExpansionKind::CmpXChg; 7002 7003 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 7004 if (Size == 8 || Size == 16) 7005 return AtomicExpansionKind::MaskedIntrinsic; 7006 return AtomicExpansionKind::None; 7007 } 7008 7009 static Intrinsic::ID 7010 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 7011 if (XLen == 32) { 7012 switch (BinOp) { 7013 default: 7014 llvm_unreachable("Unexpected AtomicRMW BinOp"); 7015 case AtomicRMWInst::Xchg: 7016 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 7017 case AtomicRMWInst::Add: 7018 return Intrinsic::riscv_masked_atomicrmw_add_i32; 7019 case AtomicRMWInst::Sub: 7020 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 7021 case AtomicRMWInst::Nand: 7022 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 7023 case AtomicRMWInst::Max: 7024 return Intrinsic::riscv_masked_atomicrmw_max_i32; 7025 case AtomicRMWInst::Min: 7026 return Intrinsic::riscv_masked_atomicrmw_min_i32; 7027 case AtomicRMWInst::UMax: 7028 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 7029 case AtomicRMWInst::UMin: 7030 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 7031 } 7032 } 7033 7034 if (XLen == 64) { 7035 switch (BinOp) { 7036 default: 7037 llvm_unreachable("Unexpected AtomicRMW BinOp"); 7038 case AtomicRMWInst::Xchg: 7039 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 7040 case AtomicRMWInst::Add: 7041 return Intrinsic::riscv_masked_atomicrmw_add_i64; 7042 case AtomicRMWInst::Sub: 7043 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 7044 case AtomicRMWInst::Nand: 7045 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 7046 case AtomicRMWInst::Max: 7047 return Intrinsic::riscv_masked_atomicrmw_max_i64; 7048 case AtomicRMWInst::Min: 7049 return Intrinsic::riscv_masked_atomicrmw_min_i64; 7050 case AtomicRMWInst::UMax: 7051 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 7052 case AtomicRMWInst::UMin: 7053 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 7054 } 7055 } 7056 7057 llvm_unreachable("Unexpected XLen\n"); 7058 } 7059 7060 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 7061 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 7062 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 7063 unsigned XLen = Subtarget.getXLen(); 7064 Value *Ordering = 7065 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 7066 Type *Tys[] = {AlignedAddr->getType()}; 7067 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 7068 AI->getModule(), 7069 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 7070 7071 if (XLen == 64) { 7072 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 7073 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 7074 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 7075 } 7076 7077 Value *Result; 7078 7079 // Must pass the shift amount needed to sign extend the loaded value prior 7080 // to performing a signed comparison for min/max. ShiftAmt is the number of 7081 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 7082 // is the number of bits to left+right shift the value in order to 7083 // sign-extend. 7084 if (AI->getOperation() == AtomicRMWInst::Min || 7085 AI->getOperation() == AtomicRMWInst::Max) { 7086 const DataLayout &DL = AI->getModule()->getDataLayout(); 7087 unsigned ValWidth = 7088 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 7089 Value *SextShamt = 7090 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 7091 Result = Builder.CreateCall(LrwOpScwLoop, 7092 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 7093 } else { 7094 Result = 7095 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 7096 } 7097 7098 if (XLen == 64) 7099 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 7100 return Result; 7101 } 7102 7103 TargetLowering::AtomicExpansionKind 7104 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 7105 AtomicCmpXchgInst *CI) const { 7106 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 7107 if (Size == 8 || Size == 16) 7108 return AtomicExpansionKind::MaskedIntrinsic; 7109 return AtomicExpansionKind::None; 7110 } 7111 7112 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 7113 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 7114 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 7115 unsigned XLen = Subtarget.getXLen(); 7116 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 7117 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 7118 if (XLen == 64) { 7119 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 7120 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 7121 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 7122 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 7123 } 7124 Type *Tys[] = {AlignedAddr->getType()}; 7125 Function *MaskedCmpXchg = 7126 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 7127 Value *Result = Builder.CreateCall( 7128 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 7129 if (XLen == 64) 7130 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 7131 return Result; 7132 } 7133 7134 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { 7135 return false; 7136 } 7137 7138 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 7139 EVT VT) const { 7140 VT = VT.getScalarType(); 7141 7142 if (!VT.isSimple()) 7143 return false; 7144 7145 switch (VT.getSimpleVT().SimpleTy) { 7146 case MVT::f16: 7147 return Subtarget.hasStdExtZfh(); 7148 case MVT::f32: 7149 return Subtarget.hasStdExtF(); 7150 case MVT::f64: 7151 return Subtarget.hasStdExtD(); 7152 default: 7153 break; 7154 } 7155 7156 return false; 7157 } 7158 7159 Register RISCVTargetLowering::getExceptionPointerRegister( 7160 const Constant *PersonalityFn) const { 7161 return RISCV::X10; 7162 } 7163 7164 Register RISCVTargetLowering::getExceptionSelectorRegister( 7165 const Constant *PersonalityFn) const { 7166 return RISCV::X11; 7167 } 7168 7169 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 7170 // Return false to suppress the unnecessary extensions if the LibCall 7171 // arguments or return value is f32 type for LP64 ABI. 7172 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 7173 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 7174 return false; 7175 7176 return true; 7177 } 7178 7179 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 7180 if (Subtarget.is64Bit() && Type == MVT::i32) 7181 return true; 7182 7183 return IsSigned; 7184 } 7185 7186 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 7187 SDValue C) const { 7188 // Check integral scalar types. 7189 if (VT.isScalarInteger()) { 7190 // Omit the optimization if the sub target has the M extension and the data 7191 // size exceeds XLen. 7192 if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) 7193 return false; 7194 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 7195 // Break the MUL to a SLLI and an ADD/SUB. 7196 const APInt &Imm = ConstNode->getAPIntValue(); 7197 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 7198 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 7199 return true; 7200 // Omit the following optimization if the sub target has the M extension 7201 // and the data size >= XLen. 7202 if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) 7203 return false; 7204 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 7205 // a pair of LUI/ADDI. 7206 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { 7207 APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); 7208 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 7209 (1 - ImmS).isPowerOf2()) 7210 return true; 7211 } 7212 } 7213 } 7214 7215 return false; 7216 } 7217 7218 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { 7219 if (!Subtarget.useRVVForFixedLengthVectors()) 7220 return false; 7221 7222 if (!VT.isFixedLengthVector()) 7223 return false; 7224 7225 // Don't use RVV for vectors we cannot scalarize if required. 7226 switch (VT.getVectorElementType().SimpleTy) { 7227 // i1 is supported but has different rules. 7228 default: 7229 return false; 7230 case MVT::i1: 7231 // Masks can only use a single register. 7232 if (VT.getVectorNumElements() > Subtarget.getMinRVVVectorSizeInBits()) 7233 return false; 7234 break; 7235 case MVT::i8: 7236 case MVT::i16: 7237 case MVT::i32: 7238 case MVT::i64: 7239 break; 7240 case MVT::f16: 7241 if (!Subtarget.hasStdExtZfh()) 7242 return false; 7243 break; 7244 case MVT::f32: 7245 if (!Subtarget.hasStdExtF()) 7246 return false; 7247 break; 7248 case MVT::f64: 7249 if (!Subtarget.hasStdExtD()) 7250 return false; 7251 break; 7252 } 7253 7254 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 7255 // Don't use RVV for types that don't fit. 7256 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) 7257 return false; 7258 7259 // TODO: Perhaps an artificial restriction, but worth having whilst getting 7260 // the base fixed length RVV support in place. 7261 if (!VT.isPow2VectorType()) 7262 return false; 7263 7264 return true; 7265 } 7266 7267 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( 7268 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 7269 bool *Fast) const { 7270 if (!VT.isScalableVector()) 7271 return false; 7272 7273 EVT ElemVT = VT.getVectorElementType(); 7274 if (Alignment >= ElemVT.getStoreSize()) { 7275 if (Fast) 7276 *Fast = true; 7277 return true; 7278 } 7279 7280 return false; 7281 } 7282 7283 bool RISCVTargetLowering::splitValueIntoRegisterParts( 7284 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 7285 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const { 7286 EVT ValueVT = Val.getValueType(); 7287 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 7288 LLVMContext &Context = *DAG.getContext(); 7289 EVT ValueEltVT = ValueVT.getVectorElementType(); 7290 EVT PartEltVT = PartVT.getVectorElementType(); 7291 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize(); 7292 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize(); 7293 if (PartVTBitSize % ValueVTBitSize == 0) { 7294 // If the element types are different, bitcast to the same element type of 7295 // PartVT first. 7296 if (ValueEltVT != PartEltVT) { 7297 unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits(); 7298 assert(Count != 0 && "The number of element should not be zero."); 7299 EVT SameEltTypeVT = 7300 EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true); 7301 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val); 7302 } 7303 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), 7304 Val, DAG.getConstant(0, DL, Subtarget.getXLenVT())); 7305 Parts[0] = Val; 7306 return true; 7307 } 7308 } 7309 return false; 7310 } 7311 7312 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( 7313 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, 7314 MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const { 7315 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 7316 LLVMContext &Context = *DAG.getContext(); 7317 SDValue Val = Parts[0]; 7318 EVT ValueEltVT = ValueVT.getVectorElementType(); 7319 EVT PartEltVT = PartVT.getVectorElementType(); 7320 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize(); 7321 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize(); 7322 if (PartVTBitSize % ValueVTBitSize == 0) { 7323 EVT SameEltTypeVT = ValueVT; 7324 // If the element types are different, convert it to the same element type 7325 // of PartVT. 7326 if (ValueEltVT != PartEltVT) { 7327 unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits(); 7328 assert(Count != 0 && "The number of element should not be zero."); 7329 SameEltTypeVT = 7330 EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true); 7331 } 7332 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SameEltTypeVT, Val, 7333 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 7334 if (ValueEltVT != PartEltVT) 7335 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 7336 return Val; 7337 } 7338 } 7339 return SDValue(); 7340 } 7341 7342 #define GET_REGISTER_MATCHER 7343 #include "RISCVGenAsmMatcher.inc" 7344 7345 Register 7346 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 7347 const MachineFunction &MF) const { 7348 Register Reg = MatchRegisterAltName(RegName); 7349 if (Reg == RISCV::NoRegister) 7350 Reg = MatchRegisterName(RegName); 7351 if (Reg == RISCV::NoRegister) 7352 report_fatal_error( 7353 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 7354 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 7355 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 7356 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 7357 StringRef(RegName) + "\".")); 7358 return Reg; 7359 } 7360 7361 namespace llvm { 7362 namespace RISCVVIntrinsicsTable { 7363 7364 #define GET_RISCVVIntrinsicsTable_IMPL 7365 #include "RISCVGenSearchableTables.inc" 7366 7367 } // namespace RISCVVIntrinsicsTable 7368 7369 } // namespace llvm 7370