1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/IntrinsicsRISCV.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/KnownBits.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "riscv-lower" 42 43 STATISTIC(NumTailCalls, "Number of tail calls"); 44 45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 46 const RISCVSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 if (Subtarget.isRV32E()) 50 report_fatal_error("Codegen not yet implemented for RV32E"); 51 52 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 53 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 54 55 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 56 !Subtarget.hasStdExtF()) { 57 errs() << "Hard-float 'f' ABI can't be used for a target that " 58 "doesn't support the F instruction set extension (ignoring " 59 "target-abi)\n"; 60 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 61 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 62 !Subtarget.hasStdExtD()) { 63 errs() << "Hard-float 'd' ABI can't be used for a target that " 64 "doesn't support the D instruction set extension (ignoring " 65 "target-abi)\n"; 66 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 67 } 68 69 switch (ABI) { 70 default: 71 report_fatal_error("Don't know how to lower this ABI"); 72 case RISCVABI::ABI_ILP32: 73 case RISCVABI::ABI_ILP32F: 74 case RISCVABI::ABI_ILP32D: 75 case RISCVABI::ABI_LP64: 76 case RISCVABI::ABI_LP64F: 77 case RISCVABI::ABI_LP64D: 78 break; 79 } 80 81 MVT XLenVT = Subtarget.getXLenVT(); 82 83 // Set up the register classes. 84 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 85 86 if (Subtarget.hasStdExtZfh()) 87 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 88 if (Subtarget.hasStdExtF()) 89 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 90 if (Subtarget.hasStdExtD()) 91 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 92 93 static const MVT::SimpleValueType BoolVecVTs[] = { 94 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 95 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 96 static const MVT::SimpleValueType IntVecVTs[] = { 97 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 98 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 99 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 100 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 101 MVT::nxv4i64, MVT::nxv8i64}; 102 static const MVT::SimpleValueType F16VecVTs[] = { 103 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 104 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 105 static const MVT::SimpleValueType F32VecVTs[] = { 106 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 107 static const MVT::SimpleValueType F64VecVTs[] = { 108 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 109 110 if (Subtarget.hasStdExtV()) { 111 auto addRegClassForRVV = [this](MVT VT) { 112 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 113 assert(Size <= 512 && isPowerOf2_32(Size)); 114 const TargetRegisterClass *RC; 115 if (Size <= 64) 116 RC = &RISCV::VRRegClass; 117 else if (Size == 128) 118 RC = &RISCV::VRM2RegClass; 119 else if (Size == 256) 120 RC = &RISCV::VRM4RegClass; 121 else 122 RC = &RISCV::VRM8RegClass; 123 124 addRegisterClass(VT, RC); 125 }; 126 127 for (MVT VT : BoolVecVTs) 128 addRegClassForRVV(VT); 129 for (MVT VT : IntVecVTs) 130 addRegClassForRVV(VT); 131 132 if (Subtarget.hasStdExtZfh()) 133 for (MVT VT : F16VecVTs) 134 addRegClassForRVV(VT); 135 136 if (Subtarget.hasStdExtF()) 137 for (MVT VT : F32VecVTs) 138 addRegClassForRVV(VT); 139 140 if (Subtarget.hasStdExtD()) 141 for (MVT VT : F64VecVTs) 142 addRegClassForRVV(VT); 143 144 if (Subtarget.useRVVForFixedLengthVectors()) { 145 auto addRegClassForFixedVectors = [this](MVT VT) { 146 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 147 const TargetRegisterClass *RC; 148 if (LMul == 1) 149 RC = &RISCV::VRRegClass; 150 else if (LMul == 2) 151 RC = &RISCV::VRM2RegClass; 152 else if (LMul == 4) 153 RC = &RISCV::VRM4RegClass; 154 else if (LMul == 8) 155 RC = &RISCV::VRM8RegClass; 156 else 157 llvm_unreachable("Unexpected LMul!"); 158 159 addRegisterClass(VT, RC); 160 }; 161 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 162 if (useRVVForFixedLengthVectorVT(VT)) 163 addRegClassForFixedVectors(VT); 164 165 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) 166 if (useRVVForFixedLengthVectorVT(VT)) 167 addRegClassForFixedVectors(VT); 168 } 169 } 170 171 // Compute derived properties from the register classes. 172 computeRegisterProperties(STI.getRegisterInfo()); 173 174 setStackPointerRegisterToSaveRestore(RISCV::X2); 175 176 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 177 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 178 179 // TODO: add all necessary setOperationAction calls. 180 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 181 182 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 183 setOperationAction(ISD::BR_CC, XLenVT, Expand); 184 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 185 186 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 187 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 188 189 setOperationAction(ISD::VASTART, MVT::Other, Custom); 190 setOperationAction(ISD::VAARG, MVT::Other, Expand); 191 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 192 setOperationAction(ISD::VAEND, MVT::Other, Expand); 193 194 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 195 if (!Subtarget.hasStdExtZbb()) { 196 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 197 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 198 } 199 200 if (Subtarget.is64Bit()) { 201 setOperationAction(ISD::ADD, MVT::i32, Custom); 202 setOperationAction(ISD::SUB, MVT::i32, Custom); 203 setOperationAction(ISD::SHL, MVT::i32, Custom); 204 setOperationAction(ISD::SRA, MVT::i32, Custom); 205 setOperationAction(ISD::SRL, MVT::i32, Custom); 206 } 207 208 if (!Subtarget.hasStdExtM()) { 209 setOperationAction(ISD::MUL, XLenVT, Expand); 210 setOperationAction(ISD::MULHS, XLenVT, Expand); 211 setOperationAction(ISD::MULHU, XLenVT, Expand); 212 setOperationAction(ISD::SDIV, XLenVT, Expand); 213 setOperationAction(ISD::UDIV, XLenVT, Expand); 214 setOperationAction(ISD::SREM, XLenVT, Expand); 215 setOperationAction(ISD::UREM, XLenVT, Expand); 216 } 217 218 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 219 setOperationAction(ISD::MUL, MVT::i32, Custom); 220 221 setOperationAction(ISD::SDIV, MVT::i8, Custom); 222 setOperationAction(ISD::UDIV, MVT::i8, Custom); 223 setOperationAction(ISD::UREM, MVT::i8, Custom); 224 setOperationAction(ISD::SDIV, MVT::i16, Custom); 225 setOperationAction(ISD::UDIV, MVT::i16, Custom); 226 setOperationAction(ISD::UREM, MVT::i16, Custom); 227 setOperationAction(ISD::SDIV, MVT::i32, Custom); 228 setOperationAction(ISD::UDIV, MVT::i32, Custom); 229 setOperationAction(ISD::UREM, MVT::i32, Custom); 230 } 231 232 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 233 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 234 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 235 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 236 237 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 238 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 239 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 240 241 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { 242 if (Subtarget.is64Bit()) { 243 setOperationAction(ISD::ROTL, MVT::i32, Custom); 244 setOperationAction(ISD::ROTR, MVT::i32, Custom); 245 } 246 } else { 247 setOperationAction(ISD::ROTL, XLenVT, Expand); 248 setOperationAction(ISD::ROTR, XLenVT, Expand); 249 } 250 251 if (Subtarget.hasStdExtZbp()) { 252 // Custom lower bswap/bitreverse so we can convert them to GREVI to enable 253 // more combining. 254 setOperationAction(ISD::BITREVERSE, XLenVT, Custom); 255 setOperationAction(ISD::BSWAP, XLenVT, Custom); 256 257 if (Subtarget.is64Bit()) { 258 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 259 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 260 } 261 } else { 262 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 263 // pattern match it directly in isel. 264 setOperationAction(ISD::BSWAP, XLenVT, 265 Subtarget.hasStdExtZbb() ? Legal : Expand); 266 } 267 268 if (Subtarget.hasStdExtZbb()) { 269 setOperationAction(ISD::SMIN, XLenVT, Legal); 270 setOperationAction(ISD::SMAX, XLenVT, Legal); 271 setOperationAction(ISD::UMIN, XLenVT, Legal); 272 setOperationAction(ISD::UMAX, XLenVT, Legal); 273 } else { 274 setOperationAction(ISD::CTTZ, XLenVT, Expand); 275 setOperationAction(ISD::CTLZ, XLenVT, Expand); 276 setOperationAction(ISD::CTPOP, XLenVT, Expand); 277 } 278 279 if (Subtarget.hasStdExtZbt()) { 280 setOperationAction(ISD::FSHL, XLenVT, Custom); 281 setOperationAction(ISD::FSHR, XLenVT, Custom); 282 setOperationAction(ISD::SELECT, XLenVT, Legal); 283 284 if (Subtarget.is64Bit()) { 285 setOperationAction(ISD::FSHL, MVT::i32, Custom); 286 setOperationAction(ISD::FSHR, MVT::i32, Custom); 287 } 288 } else { 289 setOperationAction(ISD::SELECT, XLenVT, Custom); 290 } 291 292 ISD::CondCode FPCCToExpand[] = { 293 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 294 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 295 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 296 297 ISD::NodeType FPOpToExpand[] = { 298 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, 299 ISD::FP_TO_FP16}; 300 301 if (Subtarget.hasStdExtZfh()) 302 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 303 304 if (Subtarget.hasStdExtZfh()) { 305 setOperationAction(ISD::FMINNUM, MVT::f16, Legal); 306 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); 307 for (auto CC : FPCCToExpand) 308 setCondCodeAction(CC, MVT::f16, Expand); 309 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 310 setOperationAction(ISD::SELECT, MVT::f16, Custom); 311 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 312 for (auto Op : FPOpToExpand) 313 setOperationAction(Op, MVT::f16, Expand); 314 } 315 316 if (Subtarget.hasStdExtF()) { 317 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 318 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 319 for (auto CC : FPCCToExpand) 320 setCondCodeAction(CC, MVT::f32, Expand); 321 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 322 setOperationAction(ISD::SELECT, MVT::f32, Custom); 323 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 324 for (auto Op : FPOpToExpand) 325 setOperationAction(Op, MVT::f32, Expand); 326 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 327 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 328 } 329 330 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 331 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 332 333 if (Subtarget.hasStdExtD()) { 334 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 335 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 336 for (auto CC : FPCCToExpand) 337 setCondCodeAction(CC, MVT::f64, Expand); 338 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 339 setOperationAction(ISD::SELECT, MVT::f64, Custom); 340 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 341 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 342 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 343 for (auto Op : FPOpToExpand) 344 setOperationAction(Op, MVT::f64, Expand); 345 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 346 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 347 } 348 349 if (Subtarget.is64Bit()) { 350 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 351 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 352 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 353 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 354 } 355 356 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 357 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 358 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 359 setOperationAction(ISD::JumpTable, XLenVT, Custom); 360 361 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 362 363 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 364 // Unfortunately this can't be determined just from the ISA naming string. 365 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 366 Subtarget.is64Bit() ? Legal : Custom); 367 368 setOperationAction(ISD::TRAP, MVT::Other, Legal); 369 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 370 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 371 372 if (Subtarget.hasStdExtA()) { 373 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 374 setMinCmpXchgSizeInBits(32); 375 } else { 376 setMaxAtomicSizeInBitsSupported(0); 377 } 378 379 setBooleanContents(ZeroOrOneBooleanContent); 380 381 if (Subtarget.hasStdExtV()) { 382 setBooleanVectorContents(ZeroOrOneBooleanContent); 383 384 setOperationAction(ISD::VSCALE, XLenVT, Custom); 385 386 // RVV intrinsics may have illegal operands. 387 // We also need to custom legalize vmv.x.s. 388 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); 389 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); 390 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 391 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); 392 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 393 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 394 395 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 396 397 if (Subtarget.is64Bit()) { 398 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 399 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 400 } else { 401 // We must custom-lower certain vXi64 operations on RV32 due to the vector 402 // element type being illegal. 403 setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom); 404 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); 405 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); 406 407 setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom); 408 setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom); 409 setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom); 410 setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom); 411 setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom); 412 setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom); 413 setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom); 414 setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom); 415 } 416 417 for (MVT VT : BoolVecVTs) { 418 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 419 420 // Mask VTs are custom-expanded into a series of standard nodes 421 setOperationAction(ISD::TRUNCATE, VT, Custom); 422 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 423 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 424 } 425 426 for (MVT VT : IntVecVTs) { 427 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 428 429 setOperationAction(ISD::SMIN, VT, Legal); 430 setOperationAction(ISD::SMAX, VT, Legal); 431 setOperationAction(ISD::UMIN, VT, Legal); 432 setOperationAction(ISD::UMAX, VT, Legal); 433 434 setOperationAction(ISD::ROTL, VT, Expand); 435 setOperationAction(ISD::ROTR, VT, Expand); 436 437 // Custom-lower extensions and truncations from/to mask types. 438 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 439 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 440 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 441 442 // RVV has native int->float & float->int conversions where the 443 // element type sizes are within one power-of-two of each other. Any 444 // wider distances between type sizes have to be lowered as sequences 445 // which progressively narrow the gap in stages. 446 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 447 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 448 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 449 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 450 451 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" 452 // nodes which truncate by one power of two at a time. 453 setOperationAction(ISD::TRUNCATE, VT, Custom); 454 455 // Custom-lower insert/extract operations to simplify patterns. 456 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 457 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 458 459 // Custom-lower reduction operations to set up the corresponding custom 460 // nodes' operands. 461 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 462 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 463 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 464 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 465 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 466 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 467 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 468 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 469 470 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 471 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 472 } 473 474 // Expand various CCs to best match the RVV ISA, which natively supports UNE 475 // but no other unordered comparisons, and supports all ordered comparisons 476 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 477 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 478 // and we pattern-match those back to the "original", swapping operands once 479 // more. This way we catch both operations and both "vf" and "fv" forms with 480 // fewer patterns. 481 ISD::CondCode VFPCCToExpand[] = { 482 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 483 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 484 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 485 }; 486 487 // Sets common operation actions on RVV floating-point vector types. 488 const auto SetCommonVFPActions = [&](MVT VT) { 489 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 490 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 491 // sizes are within one power-of-two of each other. Therefore conversions 492 // between vXf16 and vXf64 must be lowered as sequences which convert via 493 // vXf32. 494 setOperationAction(ISD::FP_ROUND, VT, Custom); 495 setOperationAction(ISD::FP_EXTEND, VT, Custom); 496 // Custom-lower insert/extract operations to simplify patterns. 497 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 498 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 499 // Expand various condition codes (explained above). 500 for (auto CC : VFPCCToExpand) 501 setCondCodeAction(CC, VT, Expand); 502 503 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 504 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 505 setOperationAction(ISD::FCOPYSIGN, VT, Legal); 506 507 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 508 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 509 }; 510 511 if (Subtarget.hasStdExtZfh()) 512 for (MVT VT : F16VecVTs) 513 SetCommonVFPActions(VT); 514 515 if (Subtarget.hasStdExtF()) 516 for (MVT VT : F32VecVTs) 517 SetCommonVFPActions(VT); 518 519 if (Subtarget.hasStdExtD()) 520 for (MVT VT : F64VecVTs) 521 SetCommonVFPActions(VT); 522 523 if (Subtarget.useRVVForFixedLengthVectors()) { 524 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 525 if (!useRVVForFixedLengthVectorVT(VT)) 526 continue; 527 528 // By default everything must be expanded. 529 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 530 setOperationAction(Op, VT, Expand); 531 for (MVT OtherVT : MVT::fixedlen_vector_valuetypes()) 532 setTruncStoreAction(VT, OtherVT, Expand); 533 534 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 535 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 536 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 537 538 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 539 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 540 541 setOperationAction(ISD::LOAD, VT, Custom); 542 setOperationAction(ISD::STORE, VT, Custom); 543 544 setOperationAction(ISD::SETCC, VT, Custom); 545 546 setOperationAction(ISD::TRUNCATE, VT, Custom); 547 548 // Operations below are different for between masks and other vectors. 549 if (VT.getVectorElementType() == MVT::i1) { 550 setOperationAction(ISD::AND, VT, Custom); 551 setOperationAction(ISD::OR, VT, Custom); 552 setOperationAction(ISD::XOR, VT, Custom); 553 continue; 554 } 555 556 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 557 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 558 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 559 560 setOperationAction(ISD::ADD, VT, Custom); 561 setOperationAction(ISD::MUL, VT, Custom); 562 setOperationAction(ISD::SUB, VT, Custom); 563 setOperationAction(ISD::AND, VT, Custom); 564 setOperationAction(ISD::OR, VT, Custom); 565 setOperationAction(ISD::XOR, VT, Custom); 566 setOperationAction(ISD::SDIV, VT, Custom); 567 setOperationAction(ISD::SREM, VT, Custom); 568 setOperationAction(ISD::UDIV, VT, Custom); 569 setOperationAction(ISD::UREM, VT, Custom); 570 setOperationAction(ISD::SHL, VT, Custom); 571 setOperationAction(ISD::SRA, VT, Custom); 572 setOperationAction(ISD::SRL, VT, Custom); 573 574 setOperationAction(ISD::SMIN, VT, Custom); 575 setOperationAction(ISD::SMAX, VT, Custom); 576 setOperationAction(ISD::UMIN, VT, Custom); 577 setOperationAction(ISD::UMAX, VT, Custom); 578 579 setOperationAction(ISD::MULHS, VT, Custom); 580 setOperationAction(ISD::MULHU, VT, Custom); 581 582 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 583 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 584 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 585 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 586 587 setOperationAction(ISD::VSELECT, VT, Custom); 588 589 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 590 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 591 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 592 593 setOperationAction(ISD::BITCAST, VT, Custom); 594 } 595 596 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { 597 if (!useRVVForFixedLengthVectorVT(VT)) 598 continue; 599 600 // By default everything must be expanded. 601 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 602 setOperationAction(Op, VT, Expand); 603 for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) { 604 setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); 605 setTruncStoreAction(VT, OtherVT, Expand); 606 } 607 608 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 609 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 610 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 611 612 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 613 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 614 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 615 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 616 617 setOperationAction(ISD::LOAD, VT, Custom); 618 setOperationAction(ISD::STORE, VT, Custom); 619 setOperationAction(ISD::FADD, VT, Custom); 620 setOperationAction(ISD::FSUB, VT, Custom); 621 setOperationAction(ISD::FMUL, VT, Custom); 622 setOperationAction(ISD::FDIV, VT, Custom); 623 setOperationAction(ISD::FNEG, VT, Custom); 624 setOperationAction(ISD::FABS, VT, Custom); 625 setOperationAction(ISD::FSQRT, VT, Custom); 626 setOperationAction(ISD::FMA, VT, Custom); 627 628 setOperationAction(ISD::FP_ROUND, VT, Custom); 629 setOperationAction(ISD::FP_EXTEND, VT, Custom); 630 631 for (auto CC : VFPCCToExpand) 632 setCondCodeAction(CC, VT, Expand); 633 634 setOperationAction(ISD::VSELECT, VT, Custom); 635 636 setOperationAction(ISD::BITCAST, VT, Custom); 637 } 638 } 639 } 640 641 // Function alignments. 642 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 643 setMinFunctionAlignment(FunctionAlignment); 644 setPrefFunctionAlignment(FunctionAlignment); 645 646 setMinimumJumpTableEntries(5); 647 648 // Jumps are expensive, compared to logic 649 setJumpIsExpensive(); 650 651 // We can use any register for comparisons 652 setHasMultipleConditionRegisters(); 653 654 setTargetDAGCombine(ISD::SETCC); 655 if (Subtarget.hasStdExtZbp()) { 656 setTargetDAGCombine(ISD::OR); 657 } 658 if (Subtarget.hasStdExtV()) 659 setTargetDAGCombine(ISD::FCOPYSIGN); 660 } 661 662 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 663 LLVMContext &Context, 664 EVT VT) const { 665 if (!VT.isVector()) 666 return getPointerTy(DL); 667 if (Subtarget.hasStdExtV() && 668 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) 669 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 670 return VT.changeVectorElementTypeToInteger(); 671 } 672 673 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 674 const CallInst &I, 675 MachineFunction &MF, 676 unsigned Intrinsic) const { 677 switch (Intrinsic) { 678 default: 679 return false; 680 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 681 case Intrinsic::riscv_masked_atomicrmw_add_i32: 682 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 683 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 684 case Intrinsic::riscv_masked_atomicrmw_max_i32: 685 case Intrinsic::riscv_masked_atomicrmw_min_i32: 686 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 687 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 688 case Intrinsic::riscv_masked_cmpxchg_i32: 689 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 690 Info.opc = ISD::INTRINSIC_W_CHAIN; 691 Info.memVT = MVT::getVT(PtrTy->getElementType()); 692 Info.ptrVal = I.getArgOperand(0); 693 Info.offset = 0; 694 Info.align = Align(4); 695 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 696 MachineMemOperand::MOVolatile; 697 return true; 698 } 699 } 700 701 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 702 const AddrMode &AM, Type *Ty, 703 unsigned AS, 704 Instruction *I) const { 705 // No global is ever allowed as a base. 706 if (AM.BaseGV) 707 return false; 708 709 // Require a 12-bit signed offset. 710 if (!isInt<12>(AM.BaseOffs)) 711 return false; 712 713 switch (AM.Scale) { 714 case 0: // "r+i" or just "i", depending on HasBaseReg. 715 break; 716 case 1: 717 if (!AM.HasBaseReg) // allow "r+i". 718 break; 719 return false; // disallow "r+r" or "r+r+i". 720 default: 721 return false; 722 } 723 724 return true; 725 } 726 727 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 728 return isInt<12>(Imm); 729 } 730 731 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 732 return isInt<12>(Imm); 733 } 734 735 // On RV32, 64-bit integers are split into their high and low parts and held 736 // in two different registers, so the trunc is free since the low register can 737 // just be used. 738 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 739 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 740 return false; 741 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 742 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 743 return (SrcBits == 64 && DestBits == 32); 744 } 745 746 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 747 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 748 !SrcVT.isInteger() || !DstVT.isInteger()) 749 return false; 750 unsigned SrcBits = SrcVT.getSizeInBits(); 751 unsigned DestBits = DstVT.getSizeInBits(); 752 return (SrcBits == 64 && DestBits == 32); 753 } 754 755 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 756 // Zexts are free if they can be combined with a load. 757 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 758 EVT MemVT = LD->getMemoryVT(); 759 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 760 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 761 (LD->getExtensionType() == ISD::NON_EXTLOAD || 762 LD->getExtensionType() == ISD::ZEXTLOAD)) 763 return true; 764 } 765 766 return TargetLowering::isZExtFree(Val, VT2); 767 } 768 769 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 770 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 771 } 772 773 bool RISCVTargetLowering::isCheapToSpeculateCttz() const { 774 return Subtarget.hasStdExtZbb(); 775 } 776 777 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { 778 return Subtarget.hasStdExtZbb(); 779 } 780 781 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 782 bool ForCodeSize) const { 783 if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) 784 return false; 785 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 786 return false; 787 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 788 return false; 789 if (Imm.isNegZero()) 790 return false; 791 return Imm.isZero(); 792 } 793 794 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 795 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || 796 (VT == MVT::f32 && Subtarget.hasStdExtF()) || 797 (VT == MVT::f64 && Subtarget.hasStdExtD()); 798 } 799 800 // Changes the condition code and swaps operands if necessary, so the SetCC 801 // operation matches one of the comparisons supported directly in the RISC-V 802 // ISA. 803 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 804 switch (CC) { 805 default: 806 break; 807 case ISD::SETGT: 808 case ISD::SETLE: 809 case ISD::SETUGT: 810 case ISD::SETULE: 811 CC = ISD::getSetCCSwappedOperands(CC); 812 std::swap(LHS, RHS); 813 break; 814 } 815 } 816 817 // Return the RISC-V branch opcode that matches the given DAG integer 818 // condition code. The CondCode must be one of those supported by the RISC-V 819 // ISA (see normaliseSetCC). 820 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 821 switch (CC) { 822 default: 823 llvm_unreachable("Unsupported CondCode"); 824 case ISD::SETEQ: 825 return RISCV::BEQ; 826 case ISD::SETNE: 827 return RISCV::BNE; 828 case ISD::SETLT: 829 return RISCV::BLT; 830 case ISD::SETGE: 831 return RISCV::BGE; 832 case ISD::SETULT: 833 return RISCV::BLTU; 834 case ISD::SETUGE: 835 return RISCV::BGEU; 836 } 837 } 838 839 RISCVVLMUL RISCVTargetLowering::getLMUL(MVT VT) { 840 assert(VT.isScalableVector() && "Expecting a scalable vector type"); 841 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); 842 if (VT.getVectorElementType() == MVT::i1) 843 KnownSize *= 8; 844 845 switch (KnownSize) { 846 default: 847 llvm_unreachable("Invalid LMUL."); 848 case 8: 849 return RISCVVLMUL::LMUL_F8; 850 case 16: 851 return RISCVVLMUL::LMUL_F4; 852 case 32: 853 return RISCVVLMUL::LMUL_F2; 854 case 64: 855 return RISCVVLMUL::LMUL_1; 856 case 128: 857 return RISCVVLMUL::LMUL_2; 858 case 256: 859 return RISCVVLMUL::LMUL_4; 860 case 512: 861 return RISCVVLMUL::LMUL_8; 862 } 863 } 864 865 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVVLMUL LMul) { 866 switch (LMul) { 867 default: 868 llvm_unreachable("Invalid LMUL."); 869 case RISCVVLMUL::LMUL_F8: 870 case RISCVVLMUL::LMUL_F4: 871 case RISCVVLMUL::LMUL_F2: 872 case RISCVVLMUL::LMUL_1: 873 return RISCV::VRRegClassID; 874 case RISCVVLMUL::LMUL_2: 875 return RISCV::VRM2RegClassID; 876 case RISCVVLMUL::LMUL_4: 877 return RISCV::VRM4RegClassID; 878 case RISCVVLMUL::LMUL_8: 879 return RISCV::VRM8RegClassID; 880 } 881 } 882 883 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { 884 RISCVVLMUL LMUL = getLMUL(VT); 885 if (LMUL == RISCVVLMUL::LMUL_F8 || LMUL == RISCVVLMUL::LMUL_F4 || 886 LMUL == RISCVVLMUL::LMUL_F2 || LMUL == RISCVVLMUL::LMUL_1) { 887 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 888 "Unexpected subreg numbering"); 889 return RISCV::sub_vrm1_0 + Index; 890 } 891 if (LMUL == RISCVVLMUL::LMUL_2) { 892 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 893 "Unexpected subreg numbering"); 894 return RISCV::sub_vrm2_0 + Index; 895 } 896 if (LMUL == RISCVVLMUL::LMUL_4) { 897 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 898 "Unexpected subreg numbering"); 899 return RISCV::sub_vrm4_0 + Index; 900 } 901 llvm_unreachable("Invalid vector type."); 902 } 903 904 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { 905 if (VT.getVectorElementType() == MVT::i1) 906 return RISCV::VRRegClassID; 907 return getRegClassIDForLMUL(getLMUL(VT)); 908 } 909 910 // Attempt to decompose a subvector insert/extract between VecVT and 911 // SubVecVT via subregister indices. Returns the subregister index that 912 // can perform the subvector insert/extract with the given element index, as 913 // well as the index corresponding to any leftover subvectors that must be 914 // further inserted/extracted within the register class for SubVecVT. 915 std::pair<unsigned, unsigned> 916 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 917 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, 918 const RISCVRegisterInfo *TRI) { 919 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && 920 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && 921 RISCV::VRM2RegClassID > RISCV::VRRegClassID), 922 "Register classes not ordered"); 923 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT); 924 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT); 925 // Try to compose a subregister index that takes us from the incoming 926 // LMUL>1 register class down to the outgoing one. At each step we half 927 // the LMUL: 928 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 929 // Note that this is not guaranteed to find a subregister index, such as 930 // when we are extracting from one VR type to another. 931 unsigned SubRegIdx = RISCV::NoSubRegister; 932 for (const unsigned RCID : 933 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) 934 if (VecRegClassID > RCID && SubRegClassID <= RCID) { 935 VecVT = VecVT.getHalfNumVectorElementsVT(); 936 bool IsHi = 937 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); 938 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, 939 getSubregIndexByMVT(VecVT, IsHi)); 940 if (IsHi) 941 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); 942 } 943 return {SubRegIdx, InsertExtractIdx}; 944 } 945 946 // Return the largest legal scalable vector type that matches VT's element type. 947 MVT RISCVTargetLowering::getContainerForFixedLengthVector( 948 SelectionDAG &DAG, MVT VT, const RISCVSubtarget &Subtarget) { 949 assert(VT.isFixedLengthVector() && 950 DAG.getTargetLoweringInfo().isTypeLegal(VT) && 951 "Expected legal fixed length vector!"); 952 953 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 954 assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!"); 955 956 MVT EltVT = VT.getVectorElementType(); 957 switch (EltVT.SimpleTy) { 958 default: 959 llvm_unreachable("unexpected element type for RVV container"); 960 case MVT::i1: { 961 // Masks are calculated assuming 8-bit elements since that's when we need 962 // the most elements. 963 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8; 964 return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock); 965 } 966 case MVT::i8: 967 case MVT::i16: 968 case MVT::i32: 969 case MVT::i64: 970 case MVT::f16: 971 case MVT::f32: 972 case MVT::f64: { 973 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits(); 974 return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock); 975 } 976 } 977 } 978 979 // Grow V to consume an entire RVV register. 980 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 981 const RISCVSubtarget &Subtarget) { 982 assert(VT.isScalableVector() && 983 "Expected to convert into a scalable vector!"); 984 assert(V.getValueType().isFixedLengthVector() && 985 "Expected a fixed length vector operand!"); 986 SDLoc DL(V); 987 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 988 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); 989 } 990 991 // Shrink V so it's just big enough to maintain a VT's worth of data. 992 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 993 const RISCVSubtarget &Subtarget) { 994 assert(VT.isFixedLengthVector() && 995 "Expected to convert into a fixed length vector!"); 996 assert(V.getValueType().isScalableVector() && 997 "Expected a scalable vector operand!"); 998 SDLoc DL(V); 999 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1000 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); 1001 } 1002 1003 // Gets the two common "VL" operands: an all-ones mask and the vector length. 1004 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is 1005 // the vector type that it is contained in. 1006 static std::pair<SDValue, SDValue> 1007 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, 1008 const RISCVSubtarget &Subtarget) { 1009 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 1010 MVT XLenVT = Subtarget.getXLenVT(); 1011 SDValue VL = VecVT.isFixedLengthVector() 1012 ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT) 1013 : DAG.getRegister(RISCV::X0, XLenVT); 1014 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 1015 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 1016 return {Mask, VL}; 1017 } 1018 1019 // As above but assuming the given type is a scalable vector type. 1020 static std::pair<SDValue, SDValue> 1021 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG, 1022 const RISCVSubtarget &Subtarget) { 1023 assert(VecVT.isScalableVector() && "Expecting a scalable vector"); 1024 return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget); 1025 } 1026 1027 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few 1028 // of either is (currently) supported. This can get us into an infinite loop 1029 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR 1030 // as a ..., etc. 1031 // Until either (or both) of these can reliably lower any node, reporting that 1032 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks 1033 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack, 1034 // which is not desirable. 1035 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( 1036 EVT VT, unsigned DefinedValues) const { 1037 return false; 1038 } 1039 1040 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 1041 const RISCVSubtarget &Subtarget) { 1042 MVT VT = Op.getSimpleValueType(); 1043 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 1044 1045 MVT ContainerVT = 1046 RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); 1047 1048 SDLoc DL(Op); 1049 SDValue Mask, VL; 1050 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1051 1052 if (VT.getVectorElementType() == MVT::i1) { 1053 if (ISD::isBuildVectorAllZeros(Op.getNode())) { 1054 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); 1055 return convertFromScalableVector(VT, VMClr, DAG, Subtarget); 1056 } 1057 1058 if (ISD::isBuildVectorAllOnes(Op.getNode())) { 1059 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 1060 return convertFromScalableVector(VT, VMSet, DAG, Subtarget); 1061 } 1062 1063 return SDValue(); 1064 } 1065 1066 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 1067 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 1068 : RISCVISD::VMV_V_X_VL; 1069 Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL); 1070 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 1071 } 1072 1073 // Try and match an index sequence, which we can lower directly to the vid 1074 // instruction. An all-undef vector is matched by getSplatValue, above. 1075 if (VT.isInteger()) { 1076 bool IsVID = true; 1077 for (unsigned i = 0, e = Op.getNumOperands(); i < e && IsVID; i++) 1078 IsVID &= Op.getOperand(i).isUndef() || 1079 (isa<ConstantSDNode>(Op.getOperand(i)) && 1080 Op.getConstantOperandVal(i) == i); 1081 1082 if (IsVID) { 1083 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); 1084 return convertFromScalableVector(VT, VID, DAG, Subtarget); 1085 } 1086 } 1087 1088 return SDValue(); 1089 } 1090 1091 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, 1092 const RISCVSubtarget &Subtarget) { 1093 SDValue V1 = Op.getOperand(0); 1094 SDLoc DL(Op); 1095 MVT VT = Op.getSimpleValueType(); 1096 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 1097 1098 if (SVN->isSplat()) { 1099 int Lane = SVN->getSplatIndex(); 1100 if (Lane >= 0) { 1101 MVT ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( 1102 DAG, VT, Subtarget); 1103 1104 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 1105 assert(Lane < (int)VT.getVectorNumElements() && "Unexpected lane!"); 1106 1107 SDValue Mask, VL; 1108 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1109 MVT XLenVT = Subtarget.getXLenVT(); 1110 SDValue Gather = 1111 DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1, 1112 DAG.getConstant(Lane, DL, XLenVT), Mask, VL); 1113 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 1114 } 1115 } 1116 1117 return SDValue(); 1118 } 1119 1120 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT, 1121 SDLoc DL, SelectionDAG &DAG, 1122 const RISCVSubtarget &Subtarget) { 1123 if (VT.isScalableVector()) 1124 return DAG.getFPExtendOrRound(Op, DL, VT); 1125 assert(VT.isFixedLengthVector() && 1126 "Unexpected value type for RVV FP extend/round lowering"); 1127 SDValue Mask, VL; 1128 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1129 unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType()) 1130 ? RISCVISD::FP_EXTEND_VL 1131 : RISCVISD::FP_ROUND_VL; 1132 return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL); 1133 } 1134 1135 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 1136 SelectionDAG &DAG) const { 1137 switch (Op.getOpcode()) { 1138 default: 1139 report_fatal_error("unimplemented operand"); 1140 case ISD::GlobalAddress: 1141 return lowerGlobalAddress(Op, DAG); 1142 case ISD::BlockAddress: 1143 return lowerBlockAddress(Op, DAG); 1144 case ISD::ConstantPool: 1145 return lowerConstantPool(Op, DAG); 1146 case ISD::JumpTable: 1147 return lowerJumpTable(Op, DAG); 1148 case ISD::GlobalTLSAddress: 1149 return lowerGlobalTLSAddress(Op, DAG); 1150 case ISD::SELECT: 1151 return lowerSELECT(Op, DAG); 1152 case ISD::VASTART: 1153 return lowerVASTART(Op, DAG); 1154 case ISD::FRAMEADDR: 1155 return lowerFRAMEADDR(Op, DAG); 1156 case ISD::RETURNADDR: 1157 return lowerRETURNADDR(Op, DAG); 1158 case ISD::SHL_PARTS: 1159 return lowerShiftLeftParts(Op, DAG); 1160 case ISD::SRA_PARTS: 1161 return lowerShiftRightParts(Op, DAG, true); 1162 case ISD::SRL_PARTS: 1163 return lowerShiftRightParts(Op, DAG, false); 1164 case ISD::BITCAST: { 1165 SDValue Op0 = Op.getOperand(0); 1166 // We can handle fixed length vector bitcasts with a simple replacement 1167 // in isel. 1168 if (Op.getValueType().isFixedLengthVector()) { 1169 if (Op0.getValueType().isFixedLengthVector()) 1170 return Op; 1171 return SDValue(); 1172 } 1173 assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) || 1174 Subtarget.hasStdExtZfh()) && 1175 "Unexpected custom legalisation"); 1176 SDLoc DL(Op); 1177 if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) { 1178 if (Op0.getValueType() != MVT::i16) 1179 return SDValue(); 1180 SDValue NewOp0 = 1181 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0); 1182 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 1183 return FPConv; 1184 } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && 1185 Subtarget.hasStdExtF()) { 1186 if (Op0.getValueType() != MVT::i32) 1187 return SDValue(); 1188 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 1189 SDValue FPConv = 1190 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 1191 return FPConv; 1192 } 1193 return SDValue(); 1194 } 1195 case ISD::INTRINSIC_WO_CHAIN: 1196 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 1197 case ISD::INTRINSIC_W_CHAIN: 1198 return LowerINTRINSIC_W_CHAIN(Op, DAG); 1199 case ISD::BSWAP: 1200 case ISD::BITREVERSE: { 1201 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. 1202 assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 1203 MVT VT = Op.getSimpleValueType(); 1204 SDLoc DL(Op); 1205 // Start with the maximum immediate value which is the bitwidth - 1. 1206 unsigned Imm = VT.getSizeInBits() - 1; 1207 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. 1208 if (Op.getOpcode() == ISD::BSWAP) 1209 Imm &= ~0x7U; 1210 return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0), 1211 DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT())); 1212 } 1213 case ISD::FSHL: 1214 case ISD::FSHR: { 1215 MVT VT = Op.getSimpleValueType(); 1216 assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization"); 1217 SDLoc DL(Op); 1218 // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only 1219 // use log(XLen) bits. Mask the shift amount accordingly. 1220 unsigned ShAmtWidth = Subtarget.getXLen() - 1; 1221 SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2), 1222 DAG.getConstant(ShAmtWidth, DL, VT)); 1223 unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR; 1224 return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt); 1225 } 1226 case ISD::TRUNCATE: { 1227 SDLoc DL(Op); 1228 MVT VT = Op.getSimpleValueType(); 1229 // Only custom-lower vector truncates 1230 if (!VT.isVector()) 1231 return Op; 1232 1233 // Truncates to mask types are handled differently 1234 if (VT.getVectorElementType() == MVT::i1) 1235 return lowerVectorMaskTrunc(Op, DAG); 1236 1237 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 1238 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which 1239 // truncate by one power of two at a time. 1240 MVT DstEltVT = VT.getVectorElementType(); 1241 1242 SDValue Src = Op.getOperand(0); 1243 MVT SrcVT = Src.getSimpleValueType(); 1244 MVT SrcEltVT = SrcVT.getVectorElementType(); 1245 1246 assert(DstEltVT.bitsLT(SrcEltVT) && 1247 isPowerOf2_64(DstEltVT.getSizeInBits()) && 1248 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 1249 "Unexpected vector truncate lowering"); 1250 1251 MVT ContainerVT = SrcVT; 1252 if (SrcVT.isFixedLengthVector()) { 1253 ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( 1254 DAG, SrcVT, Subtarget); 1255 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 1256 } 1257 1258 SDValue Result = Src; 1259 SDValue Mask, VL; 1260 std::tie(Mask, VL) = 1261 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 1262 LLVMContext &Context = *DAG.getContext(); 1263 const ElementCount Count = ContainerVT.getVectorElementCount(); 1264 do { 1265 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); 1266 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 1267 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, 1268 Mask, VL); 1269 } while (SrcEltVT != DstEltVT); 1270 1271 if (SrcVT.isFixedLengthVector()) 1272 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 1273 1274 return Result; 1275 } 1276 case ISD::ANY_EXTEND: 1277 case ISD::ZERO_EXTEND: 1278 if (Op.getOperand(0).getValueType().isVector() && 1279 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 1280 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 1281 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL); 1282 case ISD::SIGN_EXTEND: 1283 if (Op.getOperand(0).getValueType().isVector() && 1284 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 1285 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 1286 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); 1287 case ISD::SPLAT_VECTOR: 1288 return lowerSPLATVECTOR(Op, DAG); 1289 case ISD::INSERT_VECTOR_ELT: 1290 return lowerINSERT_VECTOR_ELT(Op, DAG); 1291 case ISD::EXTRACT_VECTOR_ELT: 1292 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 1293 case ISD::VSCALE: { 1294 MVT VT = Op.getSimpleValueType(); 1295 SDLoc DL(Op); 1296 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); 1297 // We define our scalable vector types for lmul=1 to use a 64 bit known 1298 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 1299 // vscale as VLENB / 8. 1300 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, 1301 DAG.getConstant(3, DL, VT)); 1302 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); 1303 } 1304 case ISD::FP_EXTEND: { 1305 // RVV can only do fp_extend to types double the size as the source. We 1306 // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going 1307 // via f32. 1308 SDLoc DL(Op); 1309 MVT VT = Op.getSimpleValueType(); 1310 SDValue Src = Op.getOperand(0); 1311 MVT SrcVT = Src.getSimpleValueType(); 1312 1313 // Prepare any fixed-length vector operands. 1314 MVT ContainerVT = VT; 1315 if (SrcVT.isFixedLengthVector()) { 1316 ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( 1317 DAG, VT, Subtarget); 1318 MVT SrcContainerVT = 1319 ContainerVT.changeVectorElementType(SrcVT.getVectorElementType()); 1320 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1321 } 1322 1323 if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 || 1324 SrcVT.getVectorElementType() != MVT::f16) { 1325 // For scalable vectors, we only need to close the gap between 1326 // vXf16->vXf64. 1327 if (!VT.isFixedLengthVector()) 1328 return Op; 1329 // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version. 1330 Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); 1331 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1332 } 1333 1334 MVT InterVT = VT.changeVectorElementType(MVT::f32); 1335 MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32); 1336 SDValue IntermediateExtend = getRVVFPExtendOrRound( 1337 Src, InterVT, InterContainerVT, DL, DAG, Subtarget); 1338 1339 SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT, 1340 DL, DAG, Subtarget); 1341 if (VT.isFixedLengthVector()) 1342 return convertFromScalableVector(VT, Extend, DAG, Subtarget); 1343 return Extend; 1344 } 1345 case ISD::FP_ROUND: { 1346 // RVV can only do fp_round to types half the size as the source. We 1347 // custom-lower f64->f16 rounds via RVV's round-to-odd float 1348 // conversion instruction. 1349 SDLoc DL(Op); 1350 MVT VT = Op.getSimpleValueType(); 1351 SDValue Src = Op.getOperand(0); 1352 MVT SrcVT = Src.getSimpleValueType(); 1353 1354 // Prepare any fixed-length vector operands. 1355 MVT ContainerVT = VT; 1356 if (VT.isFixedLengthVector()) { 1357 MVT SrcContainerVT = 1358 RISCVTargetLowering::getContainerForFixedLengthVector(DAG, SrcVT, 1359 Subtarget); 1360 ContainerVT = 1361 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 1362 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1363 } 1364 1365 if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || 1366 SrcVT.getVectorElementType() != MVT::f64) { 1367 // For scalable vectors, we only need to close the gap between 1368 // vXf64<->vXf16. 1369 if (!VT.isFixedLengthVector()) 1370 return Op; 1371 // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version. 1372 Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); 1373 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1374 } 1375 1376 SDValue Mask, VL; 1377 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1378 1379 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); 1380 SDValue IntermediateRound = 1381 DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL); 1382 SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT, 1383 DL, DAG, Subtarget); 1384 1385 if (VT.isFixedLengthVector()) 1386 return convertFromScalableVector(VT, Round, DAG, Subtarget); 1387 return Round; 1388 } 1389 case ISD::FP_TO_SINT: 1390 case ISD::FP_TO_UINT: 1391 case ISD::SINT_TO_FP: 1392 case ISD::UINT_TO_FP: { 1393 // RVV can only do fp<->int conversions to types half/double the size as 1394 // the source. We custom-lower any conversions that do two hops into 1395 // sequences. 1396 MVT VT = Op.getSimpleValueType(); 1397 if (!VT.isVector()) 1398 return Op; 1399 SDLoc DL(Op); 1400 SDValue Src = Op.getOperand(0); 1401 MVT EltVT = VT.getVectorElementType(); 1402 MVT SrcVT = Src.getSimpleValueType(); 1403 MVT SrcEltVT = SrcVT.getVectorElementType(); 1404 unsigned EltSize = EltVT.getSizeInBits(); 1405 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 1406 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 1407 "Unexpected vector element types"); 1408 1409 bool IsInt2FP = SrcEltVT.isInteger(); 1410 // Widening conversions 1411 if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) { 1412 if (IsInt2FP) { 1413 // Do a regular integer sign/zero extension then convert to float. 1414 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()), 1415 VT.getVectorElementCount()); 1416 unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP 1417 ? ISD::ZERO_EXTEND 1418 : ISD::SIGN_EXTEND; 1419 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 1420 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 1421 } 1422 // FP2Int 1423 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 1424 // Do one doubling fp_extend then complete the operation by converting 1425 // to int. 1426 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1427 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 1428 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 1429 } 1430 1431 // Narrowing conversions 1432 if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) { 1433 if (IsInt2FP) { 1434 // One narrowing int_to_fp, then an fp_round. 1435 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 1436 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1437 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 1438 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 1439 } 1440 // FP2Int 1441 // One narrowing fp_to_int, then truncate the integer. If the float isn't 1442 // representable by the integer, the result is poison. 1443 MVT IVecVT = 1444 MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2), 1445 VT.getVectorElementCount()); 1446 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 1447 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 1448 } 1449 1450 // Scalable vectors can exit here. Patterns will handle equally-sized 1451 // conversions halving/doubling ones. 1452 if (!VT.isFixedLengthVector()) 1453 return Op; 1454 1455 // For fixed-length vectors we lower to a custom "VL" node. 1456 unsigned RVVOpc = 0; 1457 switch (Op.getOpcode()) { 1458 default: 1459 llvm_unreachable("Impossible opcode"); 1460 case ISD::FP_TO_SINT: 1461 RVVOpc = RISCVISD::FP_TO_SINT_VL; 1462 break; 1463 case ISD::FP_TO_UINT: 1464 RVVOpc = RISCVISD::FP_TO_UINT_VL; 1465 break; 1466 case ISD::SINT_TO_FP: 1467 RVVOpc = RISCVISD::SINT_TO_FP_VL; 1468 break; 1469 case ISD::UINT_TO_FP: 1470 RVVOpc = RISCVISD::UINT_TO_FP_VL; 1471 break; 1472 } 1473 1474 MVT ContainerVT, SrcContainerVT; 1475 // Derive the reference container type from the larger vector type. 1476 if (SrcEltSize > EltSize) { 1477 SrcContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( 1478 DAG, SrcVT, Subtarget); 1479 ContainerVT = 1480 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 1481 } else { 1482 ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( 1483 DAG, VT, Subtarget); 1484 SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT); 1485 } 1486 1487 SDValue Mask, VL; 1488 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1489 1490 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1491 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL); 1492 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1493 } 1494 case ISD::VECREDUCE_ADD: 1495 case ISD::VECREDUCE_UMAX: 1496 case ISD::VECREDUCE_SMAX: 1497 case ISD::VECREDUCE_UMIN: 1498 case ISD::VECREDUCE_SMIN: 1499 case ISD::VECREDUCE_AND: 1500 case ISD::VECREDUCE_OR: 1501 case ISD::VECREDUCE_XOR: 1502 return lowerVECREDUCE(Op, DAG); 1503 case ISD::VECREDUCE_FADD: 1504 case ISD::VECREDUCE_SEQ_FADD: 1505 return lowerFPVECREDUCE(Op, DAG); 1506 case ISD::INSERT_SUBVECTOR: 1507 return lowerINSERT_SUBVECTOR(Op, DAG); 1508 case ISD::EXTRACT_SUBVECTOR: 1509 return lowerEXTRACT_SUBVECTOR(Op, DAG); 1510 case ISD::BUILD_VECTOR: 1511 return lowerBUILD_VECTOR(Op, DAG, Subtarget); 1512 case ISD::VECTOR_SHUFFLE: 1513 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); 1514 case ISD::CONCAT_VECTORS: { 1515 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is 1516 // better than going through the stack, as the default expansion does. 1517 SDLoc DL(Op); 1518 MVT VT = Op.getSimpleValueType(); 1519 assert(VT.isFixedLengthVector() && "Unexpected CONCAT_VECTORS lowering"); 1520 unsigned NumOpElts = 1521 Op.getOperand(0).getSimpleValueType().getVectorNumElements(); 1522 SDValue Vec = DAG.getUNDEF(VT); 1523 for (const auto &OpIdx : enumerate(Op->ops())) 1524 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(), 1525 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL)); 1526 return Vec; 1527 } 1528 case ISD::LOAD: 1529 return lowerFixedLengthVectorLoadToRVV(Op, DAG); 1530 case ISD::STORE: 1531 return lowerFixedLengthVectorStoreToRVV(Op, DAG); 1532 case ISD::SETCC: 1533 return lowerFixedLengthVectorSetccToRVV(Op, DAG); 1534 case ISD::ADD: 1535 return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL); 1536 case ISD::SUB: 1537 return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL); 1538 case ISD::MUL: 1539 return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL); 1540 case ISD::MULHS: 1541 return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL); 1542 case ISD::MULHU: 1543 return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL); 1544 case ISD::AND: 1545 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL, 1546 RISCVISD::AND_VL); 1547 case ISD::OR: 1548 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL, 1549 RISCVISD::OR_VL); 1550 case ISD::XOR: 1551 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL, 1552 RISCVISD::XOR_VL); 1553 case ISD::SDIV: 1554 return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL); 1555 case ISD::SREM: 1556 return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL); 1557 case ISD::UDIV: 1558 return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL); 1559 case ISD::UREM: 1560 return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL); 1561 case ISD::SHL: 1562 return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL); 1563 case ISD::SRA: 1564 return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL); 1565 case ISD::SRL: 1566 return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL); 1567 case ISD::FADD: 1568 return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL); 1569 case ISD::FSUB: 1570 return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL); 1571 case ISD::FMUL: 1572 return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL); 1573 case ISD::FDIV: 1574 return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL); 1575 case ISD::FNEG: 1576 return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL); 1577 case ISD::FABS: 1578 return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL); 1579 case ISD::FSQRT: 1580 return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL); 1581 case ISD::FMA: 1582 return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL); 1583 case ISD::SMIN: 1584 return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL); 1585 case ISD::SMAX: 1586 return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL); 1587 case ISD::UMIN: 1588 return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL); 1589 case ISD::UMAX: 1590 return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL); 1591 case ISD::VSELECT: 1592 return lowerFixedLengthVectorSelectToRVV(Op, DAG); 1593 } 1594 } 1595 1596 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 1597 SelectionDAG &DAG, unsigned Flags) { 1598 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 1599 } 1600 1601 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 1602 SelectionDAG &DAG, unsigned Flags) { 1603 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 1604 Flags); 1605 } 1606 1607 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 1608 SelectionDAG &DAG, unsigned Flags) { 1609 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 1610 N->getOffset(), Flags); 1611 } 1612 1613 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 1614 SelectionDAG &DAG, unsigned Flags) { 1615 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 1616 } 1617 1618 template <class NodeTy> 1619 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 1620 bool IsLocal) const { 1621 SDLoc DL(N); 1622 EVT Ty = getPointerTy(DAG.getDataLayout()); 1623 1624 if (isPositionIndependent()) { 1625 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1626 if (IsLocal) 1627 // Use PC-relative addressing to access the symbol. This generates the 1628 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 1629 // %pcrel_lo(auipc)). 1630 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1631 1632 // Use PC-relative addressing to access the GOT for this symbol, then load 1633 // the address from the GOT. This generates the pattern (PseudoLA sym), 1634 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 1635 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 1636 } 1637 1638 switch (getTargetMachine().getCodeModel()) { 1639 default: 1640 report_fatal_error("Unsupported code model for lowering"); 1641 case CodeModel::Small: { 1642 // Generate a sequence for accessing addresses within the first 2 GiB of 1643 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 1644 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 1645 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 1646 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1647 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 1648 } 1649 case CodeModel::Medium: { 1650 // Generate a sequence for accessing addresses within any 2GiB range within 1651 // the address space. This generates the pattern (PseudoLLA sym), which 1652 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 1653 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1654 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1655 } 1656 } 1657 } 1658 1659 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 1660 SelectionDAG &DAG) const { 1661 SDLoc DL(Op); 1662 EVT Ty = Op.getValueType(); 1663 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1664 int64_t Offset = N->getOffset(); 1665 MVT XLenVT = Subtarget.getXLenVT(); 1666 1667 const GlobalValue *GV = N->getGlobal(); 1668 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 1669 SDValue Addr = getAddr(N, DAG, IsLocal); 1670 1671 // In order to maximise the opportunity for common subexpression elimination, 1672 // emit a separate ADD node for the global address offset instead of folding 1673 // it in the global address node. Later peephole optimisations may choose to 1674 // fold it back in when profitable. 1675 if (Offset != 0) 1676 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1677 DAG.getConstant(Offset, DL, XLenVT)); 1678 return Addr; 1679 } 1680 1681 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 1682 SelectionDAG &DAG) const { 1683 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 1684 1685 return getAddr(N, DAG); 1686 } 1687 1688 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 1689 SelectionDAG &DAG) const { 1690 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 1691 1692 return getAddr(N, DAG); 1693 } 1694 1695 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 1696 SelectionDAG &DAG) const { 1697 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 1698 1699 return getAddr(N, DAG); 1700 } 1701 1702 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 1703 SelectionDAG &DAG, 1704 bool UseGOT) const { 1705 SDLoc DL(N); 1706 EVT Ty = getPointerTy(DAG.getDataLayout()); 1707 const GlobalValue *GV = N->getGlobal(); 1708 MVT XLenVT = Subtarget.getXLenVT(); 1709 1710 if (UseGOT) { 1711 // Use PC-relative addressing to access the GOT for this TLS symbol, then 1712 // load the address from the GOT and add the thread pointer. This generates 1713 // the pattern (PseudoLA_TLS_IE sym), which expands to 1714 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 1715 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1716 SDValue Load = 1717 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 1718 1719 // Add the thread pointer. 1720 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1721 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 1722 } 1723 1724 // Generate a sequence for accessing the address relative to the thread 1725 // pointer, with the appropriate adjustment for the thread pointer offset. 1726 // This generates the pattern 1727 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 1728 SDValue AddrHi = 1729 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 1730 SDValue AddrAdd = 1731 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 1732 SDValue AddrLo = 1733 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 1734 1735 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1736 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1737 SDValue MNAdd = SDValue( 1738 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 1739 0); 1740 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 1741 } 1742 1743 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 1744 SelectionDAG &DAG) const { 1745 SDLoc DL(N); 1746 EVT Ty = getPointerTy(DAG.getDataLayout()); 1747 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 1748 const GlobalValue *GV = N->getGlobal(); 1749 1750 // Use a PC-relative addressing mode to access the global dynamic GOT address. 1751 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 1752 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 1753 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1754 SDValue Load = 1755 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 1756 1757 // Prepare argument list to generate call. 1758 ArgListTy Args; 1759 ArgListEntry Entry; 1760 Entry.Node = Load; 1761 Entry.Ty = CallTy; 1762 Args.push_back(Entry); 1763 1764 // Setup call to __tls_get_addr. 1765 TargetLowering::CallLoweringInfo CLI(DAG); 1766 CLI.setDebugLoc(DL) 1767 .setChain(DAG.getEntryNode()) 1768 .setLibCallee(CallingConv::C, CallTy, 1769 DAG.getExternalSymbol("__tls_get_addr", Ty), 1770 std::move(Args)); 1771 1772 return LowerCallTo(CLI).first; 1773 } 1774 1775 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 1776 SelectionDAG &DAG) const { 1777 SDLoc DL(Op); 1778 EVT Ty = Op.getValueType(); 1779 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1780 int64_t Offset = N->getOffset(); 1781 MVT XLenVT = Subtarget.getXLenVT(); 1782 1783 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 1784 1785 if (DAG.getMachineFunction().getFunction().getCallingConv() == 1786 CallingConv::GHC) 1787 report_fatal_error("In GHC calling convention TLS is not supported"); 1788 1789 SDValue Addr; 1790 switch (Model) { 1791 case TLSModel::LocalExec: 1792 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 1793 break; 1794 case TLSModel::InitialExec: 1795 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 1796 break; 1797 case TLSModel::LocalDynamic: 1798 case TLSModel::GeneralDynamic: 1799 Addr = getDynamicTLSAddr(N, DAG); 1800 break; 1801 } 1802 1803 // In order to maximise the opportunity for common subexpression elimination, 1804 // emit a separate ADD node for the global address offset instead of folding 1805 // it in the global address node. Later peephole optimisations may choose to 1806 // fold it back in when profitable. 1807 if (Offset != 0) 1808 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1809 DAG.getConstant(Offset, DL, XLenVT)); 1810 return Addr; 1811 } 1812 1813 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 1814 SDValue CondV = Op.getOperand(0); 1815 SDValue TrueV = Op.getOperand(1); 1816 SDValue FalseV = Op.getOperand(2); 1817 SDLoc DL(Op); 1818 MVT XLenVT = Subtarget.getXLenVT(); 1819 1820 // If the result type is XLenVT and CondV is the output of a SETCC node 1821 // which also operated on XLenVT inputs, then merge the SETCC node into the 1822 // lowered RISCVISD::SELECT_CC to take advantage of the integer 1823 // compare+branch instructions. i.e.: 1824 // (select (setcc lhs, rhs, cc), truev, falsev) 1825 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 1826 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 1827 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 1828 SDValue LHS = CondV.getOperand(0); 1829 SDValue RHS = CondV.getOperand(1); 1830 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 1831 ISD::CondCode CCVal = CC->get(); 1832 1833 normaliseSetCC(LHS, RHS, CCVal); 1834 1835 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 1836 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 1837 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1838 } 1839 1840 // Otherwise: 1841 // (select condv, truev, falsev) 1842 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 1843 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 1844 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 1845 1846 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 1847 1848 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1849 } 1850 1851 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 1852 MachineFunction &MF = DAG.getMachineFunction(); 1853 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 1854 1855 SDLoc DL(Op); 1856 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1857 getPointerTy(MF.getDataLayout())); 1858 1859 // vastart just stores the address of the VarArgsFrameIndex slot into the 1860 // memory location argument. 1861 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1862 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 1863 MachinePointerInfo(SV)); 1864 } 1865 1866 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 1867 SelectionDAG &DAG) const { 1868 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1869 MachineFunction &MF = DAG.getMachineFunction(); 1870 MachineFrameInfo &MFI = MF.getFrameInfo(); 1871 MFI.setFrameAddressIsTaken(true); 1872 Register FrameReg = RI.getFrameRegister(MF); 1873 int XLenInBytes = Subtarget.getXLen() / 8; 1874 1875 EVT VT = Op.getValueType(); 1876 SDLoc DL(Op); 1877 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 1878 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1879 while (Depth--) { 1880 int Offset = -(XLenInBytes * 2); 1881 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 1882 DAG.getIntPtrConstant(Offset, DL)); 1883 FrameAddr = 1884 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 1885 } 1886 return FrameAddr; 1887 } 1888 1889 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 1890 SelectionDAG &DAG) const { 1891 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1892 MachineFunction &MF = DAG.getMachineFunction(); 1893 MachineFrameInfo &MFI = MF.getFrameInfo(); 1894 MFI.setReturnAddressIsTaken(true); 1895 MVT XLenVT = Subtarget.getXLenVT(); 1896 int XLenInBytes = Subtarget.getXLen() / 8; 1897 1898 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 1899 return SDValue(); 1900 1901 EVT VT = Op.getValueType(); 1902 SDLoc DL(Op); 1903 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1904 if (Depth) { 1905 int Off = -XLenInBytes; 1906 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 1907 SDValue Offset = DAG.getConstant(Off, DL, VT); 1908 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 1909 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 1910 MachinePointerInfo()); 1911 } 1912 1913 // Return the value of the return address register, marking it an implicit 1914 // live-in. 1915 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 1916 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 1917 } 1918 1919 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 1920 SelectionDAG &DAG) const { 1921 SDLoc DL(Op); 1922 SDValue Lo = Op.getOperand(0); 1923 SDValue Hi = Op.getOperand(1); 1924 SDValue Shamt = Op.getOperand(2); 1925 EVT VT = Lo.getValueType(); 1926 1927 // if Shamt-XLEN < 0: // Shamt < XLEN 1928 // Lo = Lo << Shamt 1929 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 1930 // else: 1931 // Lo = 0 1932 // Hi = Lo << (Shamt-XLEN) 1933 1934 SDValue Zero = DAG.getConstant(0, DL, VT); 1935 SDValue One = DAG.getConstant(1, DL, VT); 1936 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1937 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1938 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1939 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1940 1941 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 1942 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 1943 SDValue ShiftRightLo = 1944 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 1945 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 1946 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 1947 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 1948 1949 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1950 1951 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 1952 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1953 1954 SDValue Parts[2] = {Lo, Hi}; 1955 return DAG.getMergeValues(Parts, DL); 1956 } 1957 1958 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 1959 bool IsSRA) const { 1960 SDLoc DL(Op); 1961 SDValue Lo = Op.getOperand(0); 1962 SDValue Hi = Op.getOperand(1); 1963 SDValue Shamt = Op.getOperand(2); 1964 EVT VT = Lo.getValueType(); 1965 1966 // SRA expansion: 1967 // if Shamt-XLEN < 0: // Shamt < XLEN 1968 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1969 // Hi = Hi >>s Shamt 1970 // else: 1971 // Lo = Hi >>s (Shamt-XLEN); 1972 // Hi = Hi >>s (XLEN-1) 1973 // 1974 // SRL expansion: 1975 // if Shamt-XLEN < 0: // Shamt < XLEN 1976 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1977 // Hi = Hi >>u Shamt 1978 // else: 1979 // Lo = Hi >>u (Shamt-XLEN); 1980 // Hi = 0; 1981 1982 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 1983 1984 SDValue Zero = DAG.getConstant(0, DL, VT); 1985 SDValue One = DAG.getConstant(1, DL, VT); 1986 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1987 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1988 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1989 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1990 1991 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 1992 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 1993 SDValue ShiftLeftHi = 1994 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 1995 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 1996 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 1997 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 1998 SDValue HiFalse = 1999 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 2000 2001 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 2002 2003 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 2004 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 2005 2006 SDValue Parts[2] = {Lo, Hi}; 2007 return DAG.getMergeValues(Parts, DL); 2008 } 2009 2010 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is 2011 // illegal (currently only vXi64 RV32). 2012 // FIXME: We could also catch non-constant sign-extended i32 values and lower 2013 // them to SPLAT_VECTOR_I64 2014 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op, 2015 SelectionDAG &DAG) const { 2016 SDLoc DL(Op); 2017 EVT VecVT = Op.getValueType(); 2018 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 2019 "Unexpected SPLAT_VECTOR lowering"); 2020 SDValue SplatVal = Op.getOperand(0); 2021 2022 // If we can prove that the value is a sign-extended 32-bit value, lower this 2023 // as a custom node in order to try and match RVV vector/scalar instructions. 2024 if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) { 2025 if (isInt<32>(CVal->getSExtValue())) 2026 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 2027 DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32)); 2028 } 2029 2030 if (SplatVal.getOpcode() == ISD::SIGN_EXTEND && 2031 SplatVal.getOperand(0).getValueType() == MVT::i32) { 2032 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 2033 SplatVal.getOperand(0)); 2034 } 2035 2036 // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not 2037 // to accidentally sign-extend the 32-bit halves to the e64 SEW: 2038 // vmv.v.x vX, hi 2039 // vsll.vx vX, vX, /*32*/ 2040 // vmv.v.x vY, lo 2041 // vsll.vx vY, vY, /*32*/ 2042 // vsrl.vx vY, vY, /*32*/ 2043 // vor.vv vX, vX, vY 2044 SDValue One = DAG.getConstant(1, DL, MVT::i32); 2045 SDValue Zero = DAG.getConstant(0, DL, MVT::i32); 2046 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT); 2047 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero); 2048 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One); 2049 2050 Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 2051 Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV); 2052 Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV); 2053 2054 if (isNullConstant(Hi)) 2055 return Lo; 2056 2057 Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi); 2058 Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV); 2059 2060 return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi); 2061 } 2062 2063 // Custom-lower extensions from mask vectors by using a vselect either with 1 2064 // for zero/any-extension or -1 for sign-extension: 2065 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 2066 // Note that any-extension is lowered identically to zero-extension. 2067 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 2068 int64_t ExtTrueVal) const { 2069 SDLoc DL(Op); 2070 MVT VecVT = Op.getSimpleValueType(); 2071 SDValue Src = Op.getOperand(0); 2072 // Only custom-lower extensions from mask types 2073 assert(Src.getValueType().isVector() && 2074 Src.getValueType().getVectorElementType() == MVT::i1); 2075 2076 MVT XLenVT = Subtarget.getXLenVT(); 2077 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 2078 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT); 2079 2080 if (VecVT.isScalableVector()) { 2081 // Be careful not to introduce illegal scalar types at this stage, and be 2082 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 2083 // illegal and must be expanded. Since we know that the constants are 2084 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 2085 bool IsRV32E64 = 2086 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 2087 2088 if (!IsRV32E64) { 2089 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 2090 SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal); 2091 } else { 2092 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 2093 SplatTrueVal = 2094 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal); 2095 } 2096 2097 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 2098 } 2099 2100 MVT ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( 2101 DAG, VecVT, Subtarget); 2102 MVT I1ContainerVT = 2103 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2104 2105 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget); 2106 2107 SDValue Mask, VL; 2108 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2109 2110 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL); 2111 SplatTrueVal = 2112 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL); 2113 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, 2114 SplatTrueVal, SplatZero, VL); 2115 2116 return convertFromScalableVector(VecVT, Select, DAG, Subtarget); 2117 } 2118 2119 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( 2120 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { 2121 MVT ExtVT = Op.getSimpleValueType(); 2122 // Only custom-lower extensions from fixed-length vector types. 2123 if (!ExtVT.isFixedLengthVector()) 2124 return Op; 2125 MVT VT = Op.getOperand(0).getSimpleValueType(); 2126 // Grab the canonical container type for the extended type. Infer the smaller 2127 // type from that to ensure the same number of vector elements, as we know 2128 // the LMUL will be sufficient to hold the smaller type. 2129 MVT ContainerExtVT = RISCVTargetLowering::getContainerForFixedLengthVector( 2130 DAG, ExtVT, Subtarget); 2131 // Get the extended container type manually to ensure the same number of 2132 // vector elements between source and dest. 2133 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), 2134 ContainerExtVT.getVectorElementCount()); 2135 2136 SDValue Op1 = 2137 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 2138 2139 SDLoc DL(Op); 2140 SDValue Mask, VL; 2141 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2142 2143 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL); 2144 2145 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget); 2146 } 2147 2148 // Custom-lower truncations from vectors to mask vectors by using a mask and a 2149 // setcc operation: 2150 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 2151 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, 2152 SelectionDAG &DAG) const { 2153 SDLoc DL(Op); 2154 EVT MaskVT = Op.getValueType(); 2155 // Only expect to custom-lower truncations to mask types 2156 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 2157 "Unexpected type for vector mask lowering"); 2158 SDValue Src = Op.getOperand(0); 2159 MVT VecVT = Src.getSimpleValueType(); 2160 2161 // If this is a fixed vector, we need to convert it to a scalable vector. 2162 MVT ContainerVT = VecVT; 2163 if (VecVT.isFixedLengthVector()) { 2164 ContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget); 2165 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 2166 } 2167 2168 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 2169 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 2170 2171 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne); 2172 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero); 2173 2174 if (VecVT.isScalableVector()) { 2175 SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); 2176 return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); 2177 } 2178 2179 SDValue Mask, VL; 2180 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2181 2182 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 2183 SDValue Trunc = 2184 DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL); 2185 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero, 2186 DAG.getCondCode(ISD::SETNE), Mask, VL); 2187 return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); 2188 } 2189 2190 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 2191 SelectionDAG &DAG) const { 2192 SDLoc DL(Op); 2193 MVT VecVT = Op.getSimpleValueType(); 2194 SDValue Vec = Op.getOperand(0); 2195 SDValue Val = Op.getOperand(1); 2196 SDValue Idx = Op.getOperand(2); 2197 2198 MVT ContainerVT = VecVT; 2199 // If the operand is a fixed-length vector, convert to a scalable one. 2200 if (VecVT.isFixedLengthVector()) { 2201 ContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget); 2202 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2203 } 2204 2205 SDValue Mask, VL; 2206 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2207 2208 // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is 2209 // first slid down into position, the value is inserted into the first 2210 // position, and the vector is slid back up. We do this to simplify patterns. 2211 // (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx), 2212 if (Subtarget.is64Bit() || Val.getValueType() != MVT::i64) { 2213 if (isNullConstant(Idx)) 2214 return DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, Vec, Val, VL); 2215 SDValue Slidedown = 2216 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 2217 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 2218 SDValue InsertElt0 = 2219 DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, Slidedown, Val, VL); 2220 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, InsertElt0, 2221 Idx, Mask, VL); 2222 } 2223 2224 // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type 2225 // is illegal (currently only vXi64 RV32). 2226 // Since there is no easy way of getting a single element into a vector when 2227 // XLEN<SEW, we lower the operation to the following sequence: 2228 // splat vVal, rVal 2229 // vid.v vVid 2230 // vmseq.vx mMask, vVid, rIdx 2231 // vmerge.vvm vDest, vSrc, vVal, mMask 2232 // This essentially merges the original vector with the inserted element by 2233 // using a mask whose only set bit is that corresponding to the insert 2234 // index. 2235 SDValue SplattedVal = DAG.getSplatVector(ContainerVT, DL, Val); 2236 SDValue SplattedIdx = 2237 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, Idx, VL); 2238 2239 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); 2240 auto SetCCVT = 2241 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ContainerVT); 2242 SDValue SelectCond = 2243 DAG.getNode(RISCVISD::SETCC_VL, DL, SetCCVT, VID, SplattedIdx, 2244 DAG.getCondCode(ISD::SETEQ), Mask, VL); 2245 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, 2246 SelectCond, SplattedVal, Vec, VL); 2247 if (!VecVT.isFixedLengthVector()) 2248 return Select; 2249 return convertFromScalableVector(VecVT, Select, DAG, Subtarget); 2250 } 2251 2252 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 2253 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer 2254 // types this is done using VMV_X_S to allow us to glean information about the 2255 // sign bits of the result. 2256 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 2257 SelectionDAG &DAG) const { 2258 SDLoc DL(Op); 2259 SDValue Idx = Op.getOperand(1); 2260 SDValue Vec = Op.getOperand(0); 2261 EVT EltVT = Op.getValueType(); 2262 MVT VecVT = Vec.getSimpleValueType(); 2263 MVT XLenVT = Subtarget.getXLenVT(); 2264 2265 // If this is a fixed vector, we need to convert it to a scalable vector. 2266 MVT ContainerVT = VecVT; 2267 if (VecVT.isFixedLengthVector()) { 2268 ContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget); 2269 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2270 } 2271 2272 // If the index is 0, the vector is already in the right position. 2273 if (!isNullConstant(Idx)) { 2274 // Use a VL of 1 to avoid processing more elements than we need. 2275 SDValue VL = DAG.getConstant(1, DL, XLenVT); 2276 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2277 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2278 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 2279 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 2280 } 2281 2282 if (!EltVT.isInteger()) { 2283 // Floating-point extracts are handled in TableGen. 2284 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 2285 DAG.getConstant(0, DL, XLenVT)); 2286 } 2287 2288 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 2289 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 2290 } 2291 2292 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 2293 SelectionDAG &DAG) const { 2294 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2295 SDLoc DL(Op); 2296 2297 if (Subtarget.hasStdExtV()) { 2298 // Some RVV intrinsics may claim that they want an integer operand to be 2299 // extended. 2300 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 2301 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 2302 if (II->ExtendedOperand) { 2303 assert(II->ExtendedOperand < Op.getNumOperands()); 2304 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 2305 SDValue &ScalarOp = Operands[II->ExtendedOperand]; 2306 EVT OpVT = ScalarOp.getValueType(); 2307 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 2308 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 2309 // If the operand is a constant, sign extend to increase our chances 2310 // of being able to use a .vi instruction. ANY_EXTEND would become a 2311 // a zero extend and the simm5 check in isel would fail. 2312 // FIXME: Should we ignore the upper bits in isel instead? 2313 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 2314 : ISD::ANY_EXTEND; 2315 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 2316 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), 2317 Operands); 2318 } 2319 } 2320 } 2321 } 2322 2323 switch (IntNo) { 2324 default: 2325 return SDValue(); // Don't custom lower most intrinsics. 2326 case Intrinsic::thread_pointer: { 2327 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2328 return DAG.getRegister(RISCV::X4, PtrVT); 2329 } 2330 case Intrinsic::riscv_vmv_x_s: 2331 assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!"); 2332 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 2333 Op.getOperand(1)); 2334 case Intrinsic::riscv_vmv_v_x: { 2335 SDValue Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), 2336 Op.getOperand(1)); 2337 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), 2338 Scalar, Op.getOperand(2)); 2339 } 2340 case Intrinsic::riscv_vfmv_v_f: 2341 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), 2342 Op.getOperand(1), Op.getOperand(2)); 2343 } 2344 } 2345 2346 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 2347 SelectionDAG &DAG) const { 2348 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 2349 SDLoc DL(Op); 2350 2351 if (Subtarget.hasStdExtV()) { 2352 // Some RVV intrinsics may claim that they want an integer operand to be 2353 // extended. 2354 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 2355 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 2356 if (II->ExtendedOperand) { 2357 // The operands start from the second argument in INTRINSIC_W_CHAIN. 2358 unsigned ExtendOp = II->ExtendedOperand + 1; 2359 assert(ExtendOp < Op.getNumOperands()); 2360 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 2361 SDValue &ScalarOp = Operands[ExtendOp]; 2362 EVT OpVT = ScalarOp.getValueType(); 2363 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 2364 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 2365 // If the operand is a constant, sign extend to increase our chances 2366 // of being able to use a .vi instruction. ANY_EXTEND would become a 2367 // a zero extend and the simm5 check in isel would fail. 2368 // FIXME: Should we ignore the upper bits in isel instead? 2369 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 2370 : ISD::ANY_EXTEND; 2371 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 2372 return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(), 2373 Operands); 2374 } 2375 } 2376 } 2377 } 2378 2379 return SDValue(); // Don't custom lower most intrinsics. 2380 } 2381 2382 static MVT getLMUL1VT(MVT VT) { 2383 assert(VT.getVectorElementType().getSizeInBits() <= 64 && 2384 "Unexpected vector MVT"); 2385 return MVT::getScalableVectorVT( 2386 VT.getVectorElementType(), 2387 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); 2388 } 2389 2390 static std::pair<unsigned, uint64_t> 2391 getRVVReductionOpAndIdentityVal(unsigned ISDOpcode, unsigned EltSizeBits) { 2392 switch (ISDOpcode) { 2393 default: 2394 llvm_unreachable("Unhandled reduction"); 2395 case ISD::VECREDUCE_ADD: 2396 return {RISCVISD::VECREDUCE_ADD, 0}; 2397 case ISD::VECREDUCE_UMAX: 2398 return {RISCVISD::VECREDUCE_UMAX, 0}; 2399 case ISD::VECREDUCE_SMAX: 2400 return {RISCVISD::VECREDUCE_SMAX, minIntN(EltSizeBits)}; 2401 case ISD::VECREDUCE_UMIN: 2402 return {RISCVISD::VECREDUCE_UMIN, maxUIntN(EltSizeBits)}; 2403 case ISD::VECREDUCE_SMIN: 2404 return {RISCVISD::VECREDUCE_SMIN, maxIntN(EltSizeBits)}; 2405 case ISD::VECREDUCE_AND: 2406 return {RISCVISD::VECREDUCE_AND, -1}; 2407 case ISD::VECREDUCE_OR: 2408 return {RISCVISD::VECREDUCE_OR, 0}; 2409 case ISD::VECREDUCE_XOR: 2410 return {RISCVISD::VECREDUCE_XOR, 0}; 2411 } 2412 } 2413 2414 // Take a (supported) standard ISD reduction opcode and transform it to a RISCV 2415 // reduction opcode. Note that this returns a vector type, which must be 2416 // further processed to access the scalar result in element 0. 2417 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 2418 SelectionDAG &DAG) const { 2419 SDLoc DL(Op); 2420 assert(Op.getValueType().isSimple() && 2421 Op.getOperand(0).getValueType().isSimple() && 2422 "Unexpected vector-reduce lowering"); 2423 MVT VecVT = Op.getOperand(0).getSimpleValueType(); 2424 MVT VecEltVT = VecVT.getVectorElementType(); 2425 unsigned RVVOpcode; 2426 uint64_t IdentityVal; 2427 std::tie(RVVOpcode, IdentityVal) = 2428 getRVVReductionOpAndIdentityVal(Op.getOpcode(), VecEltVT.getSizeInBits()); 2429 MVT M1VT = getLMUL1VT(VecVT); 2430 SDValue IdentitySplat = 2431 DAG.getSplatVector(M1VT, DL, DAG.getConstant(IdentityVal, DL, VecEltVT)); 2432 SDValue Reduction = 2433 DAG.getNode(RVVOpcode, DL, M1VT, Op.getOperand(0), IdentitySplat); 2434 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 2435 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 2436 return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); 2437 } 2438 2439 // Given a reduction op, this function returns the matching reduction opcode, 2440 // the vector SDValue and the scalar SDValue required to lower this to a 2441 // RISCVISD node. 2442 static std::tuple<unsigned, SDValue, SDValue> 2443 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { 2444 SDLoc DL(Op); 2445 switch (Op.getOpcode()) { 2446 default: 2447 llvm_unreachable("Unhandled reduction"); 2448 case ISD::VECREDUCE_FADD: 2449 return std::make_tuple(RISCVISD::VECREDUCE_FADD, Op.getOperand(0), 2450 DAG.getConstantFP(0.0, DL, EltVT)); 2451 case ISD::VECREDUCE_SEQ_FADD: 2452 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD, Op.getOperand(1), 2453 Op.getOperand(0)); 2454 } 2455 } 2456 2457 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, 2458 SelectionDAG &DAG) const { 2459 SDLoc DL(Op); 2460 MVT VecEltVT = Op.getSimpleValueType(); 2461 2462 unsigned RVVOpcode; 2463 SDValue VectorVal, ScalarVal; 2464 std::tie(RVVOpcode, VectorVal, ScalarVal) = 2465 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT); 2466 2467 MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType()); 2468 SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal); 2469 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat); 2470 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 2471 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 2472 } 2473 2474 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, 2475 SelectionDAG &DAG) const { 2476 SDValue Vec = Op.getOperand(0); 2477 SDValue SubVec = Op.getOperand(1); 2478 MVT VecVT = Vec.getSimpleValueType(); 2479 MVT SubVecVT = SubVec.getSimpleValueType(); 2480 2481 SDLoc DL(Op); 2482 MVT XLenVT = Subtarget.getXLenVT(); 2483 unsigned OrigIdx = Op.getConstantOperandVal(2); 2484 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2485 2486 // We don't have the ability to slide mask vectors up indexed by their i1 2487 // elements; the smallest we can do is i8. Often we are able to bitcast to 2488 // equivalent i8 vectors. Note that when inserting a fixed-length vector 2489 // into a scalable one, we might not necessarily have enough scalable 2490 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. 2491 if (SubVecVT.getVectorElementType() == MVT::i1 && 2492 (OrigIdx != 0 || !Vec.isUndef())) { 2493 if (VecVT.getVectorMinNumElements() >= 8 && 2494 SubVecVT.getVectorMinNumElements() >= 8) { 2495 assert(OrigIdx % 8 == 0 && "Invalid index"); 2496 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 2497 SubVecVT.getVectorMinNumElements() % 8 == 0 && 2498 "Unexpected mask vector lowering"); 2499 OrigIdx /= 8; 2500 SubVecVT = 2501 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 2502 SubVecVT.isScalableVector()); 2503 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 2504 VecVT.isScalableVector()); 2505 Vec = DAG.getBitcast(VecVT, Vec); 2506 SubVec = DAG.getBitcast(SubVecVT, SubVec); 2507 } else { 2508 // We can't slide this mask vector up indexed by its i1 elements. 2509 // This poses a problem when we wish to insert a scalable vector which 2510 // can't be re-expressed as a larger type. Just choose the slow path and 2511 // extend to a larger type, then truncate back down. 2512 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 2513 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 2514 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 2515 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec); 2516 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec, 2517 Op.getOperand(2)); 2518 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT); 2519 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE); 2520 } 2521 } 2522 2523 // If the subvector vector is a fixed-length type, we cannot use subregister 2524 // manipulation to simplify the codegen; we don't know which register of a 2525 // LMUL group contains the specific subvector as we only know the minimum 2526 // register size. Therefore we must slide the vector group up the full 2527 // amount. 2528 if (SubVecVT.isFixedLengthVector()) { 2529 if (OrigIdx == 0 && Vec.isUndef()) 2530 return Op; 2531 MVT ContainerVT = VecVT; 2532 if (VecVT.isFixedLengthVector()) { 2533 ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( 2534 DAG, VecVT, Subtarget); 2535 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2536 } 2537 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, 2538 DAG.getUNDEF(ContainerVT), SubVec, 2539 DAG.getConstant(0, DL, XLenVT)); 2540 SDValue Mask = 2541 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 2542 // Set the vector length to only the number of elements we care about. Note 2543 // that for slideup this includes the offset. 2544 SDValue VL = 2545 DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT); 2546 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 2547 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, 2548 SubVec, SlideupAmt, Mask, VL); 2549 if (!VecVT.isFixedLengthVector()) 2550 return Slideup; 2551 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 2552 } 2553 2554 unsigned SubRegIdx, RemIdx; 2555 std::tie(SubRegIdx, RemIdx) = 2556 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2557 VecVT, SubVecVT, OrigIdx, TRI); 2558 2559 RISCVVLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT); 2560 bool IsSubVecPartReg = SubVecLMUL == RISCVVLMUL::LMUL_F2 || 2561 SubVecLMUL == RISCVVLMUL::LMUL_F4 || 2562 SubVecLMUL == RISCVVLMUL::LMUL_F8; 2563 2564 // 1. If the Idx has been completely eliminated and this subvector's size is 2565 // a vector register or a multiple thereof, or the surrounding elements are 2566 // undef, then this is a subvector insert which naturally aligns to a vector 2567 // register. These can easily be handled using subregister manipulation. 2568 // 2. If the subvector is smaller than a vector register, then the insertion 2569 // must preserve the undisturbed elements of the register. We do this by 2570 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type 2571 // (which resolves to a subregister copy), performing a VSLIDEUP to place the 2572 // subvector within the vector register, and an INSERT_SUBVECTOR of that 2573 // LMUL=1 type back into the larger vector (resolving to another subregister 2574 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type 2575 // to avoid allocating a large register group to hold our subvector. 2576 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef())) 2577 return Op; 2578 2579 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements 2580 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy 2581 // (in our case undisturbed). This means we can set up a subvector insertion 2582 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the 2583 // size of the subvector. 2584 MVT InterSubVT = VecVT; 2585 SDValue AlignedExtract = Vec; 2586 unsigned AlignedIdx = OrigIdx - RemIdx; 2587 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 2588 InterSubVT = getLMUL1VT(VecVT); 2589 // Extract a subvector equal to the nearest full vector register type. This 2590 // should resolve to a EXTRACT_SUBREG instruction. 2591 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 2592 DAG.getConstant(AlignedIdx, DL, XLenVT)); 2593 } 2594 2595 SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT); 2596 // For scalable vectors this must be further multiplied by vscale. 2597 SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt); 2598 2599 SDValue Mask, VL; 2600 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 2601 2602 // Construct the vector length corresponding to RemIdx + length(SubVecVT). 2603 VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT); 2604 VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL); 2605 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL); 2606 2607 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT, 2608 DAG.getUNDEF(InterSubVT), SubVec, 2609 DAG.getConstant(0, DL, XLenVT)); 2610 2611 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT, 2612 AlignedExtract, SubVec, SlideupAmt, Mask, VL); 2613 2614 // If required, insert this subvector back into the correct vector register. 2615 // This should resolve to an INSERT_SUBREG instruction. 2616 if (VecVT.bitsGT(InterSubVT)) 2617 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup, 2618 DAG.getConstant(AlignedIdx, DL, XLenVT)); 2619 2620 // We might have bitcast from a mask type: cast back to the original type if 2621 // required. 2622 return DAG.getBitcast(Op.getSimpleValueType(), Slideup); 2623 } 2624 2625 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, 2626 SelectionDAG &DAG) const { 2627 SDValue Vec = Op.getOperand(0); 2628 MVT SubVecVT = Op.getSimpleValueType(); 2629 MVT VecVT = Vec.getSimpleValueType(); 2630 2631 SDLoc DL(Op); 2632 MVT XLenVT = Subtarget.getXLenVT(); 2633 unsigned OrigIdx = Op.getConstantOperandVal(1); 2634 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2635 2636 // We don't have the ability to slide mask vectors down indexed by their i1 2637 // elements; the smallest we can do is i8. Often we are able to bitcast to 2638 // equivalent i8 vectors. Note that when extracting a fixed-length vector 2639 // from a scalable one, we might not necessarily have enough scalable 2640 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. 2641 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) { 2642 if (VecVT.getVectorMinNumElements() >= 8 && 2643 SubVecVT.getVectorMinNumElements() >= 8) { 2644 assert(OrigIdx % 8 == 0 && "Invalid index"); 2645 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 2646 SubVecVT.getVectorMinNumElements() % 8 == 0 && 2647 "Unexpected mask vector lowering"); 2648 OrigIdx /= 8; 2649 SubVecVT = 2650 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 2651 SubVecVT.isScalableVector()); 2652 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 2653 VecVT.isScalableVector()); 2654 Vec = DAG.getBitcast(VecVT, Vec); 2655 } else { 2656 // We can't slide this mask vector down, indexed by its i1 elements. 2657 // This poses a problem when we wish to extract a scalable vector which 2658 // can't be re-expressed as a larger type. Just choose the slow path and 2659 // extend to a larger type, then truncate back down. 2660 // TODO: We could probably improve this when extracting certain fixed 2661 // from fixed, where we can extract as i8 and shift the correct element 2662 // right to reach the desired subvector? 2663 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 2664 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 2665 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 2666 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec, 2667 Op.getOperand(1)); 2668 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT); 2669 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE); 2670 } 2671 } 2672 2673 // If the subvector vector is a fixed-length type, we cannot use subregister 2674 // manipulation to simplify the codegen; we don't know which register of a 2675 // LMUL group contains the specific subvector as we only know the minimum 2676 // register size. Therefore we must slide the vector group down the full 2677 // amount. 2678 if (SubVecVT.isFixedLengthVector()) { 2679 // With an index of 0 this is a cast-like subvector, which can be performed 2680 // with subregister operations. 2681 if (OrigIdx == 0) 2682 return Op; 2683 MVT ContainerVT = VecVT; 2684 if (VecVT.isFixedLengthVector()) { 2685 ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( 2686 DAG, VecVT, Subtarget); 2687 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2688 } 2689 SDValue Mask = 2690 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 2691 // Set the vector length to only the number of elements we care about. This 2692 // avoids sliding down elements we're going to discard straight away. 2693 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT); 2694 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 2695 SDValue Slidedown = 2696 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 2697 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL); 2698 // Now we can use a cast-like subvector extract to get the result. 2699 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 2700 DAG.getConstant(0, DL, XLenVT)); 2701 } 2702 2703 unsigned SubRegIdx, RemIdx; 2704 std::tie(SubRegIdx, RemIdx) = 2705 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2706 VecVT, SubVecVT, OrigIdx, TRI); 2707 2708 // If the Idx has been completely eliminated then this is a subvector extract 2709 // which naturally aligns to a vector register. These can easily be handled 2710 // using subregister manipulation. 2711 if (RemIdx == 0) 2712 return Op; 2713 2714 // Else we must shift our vector register directly to extract the subvector. 2715 // Do this using VSLIDEDOWN. 2716 2717 // If the vector type is an LMUL-group type, extract a subvector equal to the 2718 // nearest full vector register type. This should resolve to a EXTRACT_SUBREG 2719 // instruction. 2720 MVT InterSubVT = VecVT; 2721 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 2722 InterSubVT = getLMUL1VT(VecVT); 2723 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 2724 DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT)); 2725 } 2726 2727 // Slide this vector register down by the desired number of elements in order 2728 // to place the desired subvector starting at element 0. 2729 SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT); 2730 // For scalable vectors this must be further multiplied by vscale. 2731 SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt); 2732 2733 SDValue Mask, VL; 2734 std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); 2735 SDValue Slidedown = 2736 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT, 2737 DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL); 2738 2739 // Now the vector is in the right position, extract our final subvector. This 2740 // should resolve to a COPY. 2741 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 2742 DAG.getConstant(0, DL, XLenVT)); 2743 2744 // We might have bitcast from a mask type: cast back to the original type if 2745 // required. 2746 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown); 2747 } 2748 2749 SDValue 2750 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, 2751 SelectionDAG &DAG) const { 2752 auto *Load = cast<LoadSDNode>(Op); 2753 2754 SDLoc DL(Op); 2755 MVT VT = Op.getSimpleValueType(); 2756 MVT ContainerVT = 2757 RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); 2758 2759 SDValue VL = 2760 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 2761 2762 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 2763 SDValue NewLoad = DAG.getMemIntrinsicNode( 2764 RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL}, 2765 Load->getMemoryVT(), Load->getMemOperand()); 2766 2767 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 2768 return DAG.getMergeValues({Result, Load->getChain()}, DL); 2769 } 2770 2771 SDValue 2772 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, 2773 SelectionDAG &DAG) const { 2774 auto *Store = cast<StoreSDNode>(Op); 2775 2776 SDLoc DL(Op); 2777 MVT VT = Store->getValue().getSimpleValueType(); 2778 2779 // FIXME: We probably need to zero any extra bits in a byte for mask stores. 2780 // This is tricky to do. 2781 2782 MVT ContainerVT = 2783 RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); 2784 2785 SDValue VL = 2786 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 2787 2788 SDValue NewValue = 2789 convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget); 2790 return DAG.getMemIntrinsicNode( 2791 RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other), 2792 {Store->getChain(), NewValue, Store->getBasePtr(), VL}, 2793 Store->getMemoryVT(), Store->getMemOperand()); 2794 } 2795 2796 SDValue 2797 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, 2798 SelectionDAG &DAG) const { 2799 MVT InVT = Op.getOperand(0).getSimpleValueType(); 2800 MVT ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( 2801 DAG, InVT, Subtarget); 2802 2803 MVT VT = Op.getSimpleValueType(); 2804 2805 SDValue Op1 = 2806 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 2807 SDValue Op2 = 2808 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 2809 2810 SDLoc DL(Op); 2811 SDValue VL = 2812 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 2813 2814 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 2815 2816 bool Invert = false; 2817 Optional<unsigned> LogicOpc; 2818 if (ContainerVT.isFloatingPoint()) { 2819 bool Swap = false; 2820 switch (CC) { 2821 default: 2822 break; 2823 case ISD::SETULE: 2824 case ISD::SETULT: 2825 Swap = true; 2826 LLVM_FALLTHROUGH; 2827 case ISD::SETUGE: 2828 case ISD::SETUGT: 2829 CC = getSetCCInverse(CC, ContainerVT); 2830 Invert = true; 2831 break; 2832 case ISD::SETOGE: 2833 case ISD::SETOGT: 2834 case ISD::SETGE: 2835 case ISD::SETGT: 2836 Swap = true; 2837 break; 2838 case ISD::SETUEQ: 2839 // Use !((OLT Op1, Op2) || (OLT Op2, Op1)) 2840 Invert = true; 2841 LogicOpc = RISCVISD::VMOR_VL; 2842 CC = ISD::SETOLT; 2843 break; 2844 case ISD::SETONE: 2845 // Use ((OLT Op1, Op2) || (OLT Op2, Op1)) 2846 LogicOpc = RISCVISD::VMOR_VL; 2847 CC = ISD::SETOLT; 2848 break; 2849 case ISD::SETO: 2850 // Use (OEQ Op1, Op1) && (OEQ Op2, Op2) 2851 LogicOpc = RISCVISD::VMAND_VL; 2852 CC = ISD::SETOEQ; 2853 break; 2854 case ISD::SETUO: 2855 // Use (UNE Op1, Op1) || (UNE Op2, Op2) 2856 LogicOpc = RISCVISD::VMOR_VL; 2857 CC = ISD::SETUNE; 2858 break; 2859 } 2860 2861 if (Swap) { 2862 CC = getSetCCSwappedOperands(CC); 2863 std::swap(Op1, Op2); 2864 } 2865 } 2866 2867 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2868 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2869 2870 // There are 3 cases we need to emit. 2871 // 1. For (OEQ Op1, Op1) && (OEQ Op2, Op2) or (UNE Op1, Op1) || (UNE Op2, Op2) 2872 // we need to compare each operand with itself. 2873 // 2. For (OLT Op1, Op2) || (OLT Op2, Op1) we need to compare Op1 and Op2 in 2874 // both orders. 2875 // 3. For any other case we just need one compare with Op1 and Op2. 2876 SDValue Cmp; 2877 if (LogicOpc && (CC == ISD::SETOEQ || CC == ISD::SETUNE)) { 2878 Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op1, 2879 DAG.getCondCode(CC), Mask, VL); 2880 SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op2, 2881 DAG.getCondCode(CC), Mask, VL); 2882 Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL); 2883 } else { 2884 Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2, 2885 DAG.getCondCode(CC), Mask, VL); 2886 if (LogicOpc) { 2887 SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op1, 2888 DAG.getCondCode(CC), Mask, VL); 2889 Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL); 2890 } 2891 } 2892 2893 if (Invert) { 2894 SDValue AllOnes = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2895 Cmp = DAG.getNode(RISCVISD::VMXOR_VL, DL, MaskVT, Cmp, AllOnes, VL); 2896 } 2897 2898 return convertFromScalableVector(VT, Cmp, DAG, Subtarget); 2899 } 2900 2901 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV( 2902 SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const { 2903 MVT VT = Op.getSimpleValueType(); 2904 2905 if (VT.getVectorElementType() == MVT::i1) 2906 return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false); 2907 2908 return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true); 2909 } 2910 2911 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( 2912 SDValue Op, SelectionDAG &DAG) const { 2913 MVT VT = Op.getSimpleValueType(); 2914 MVT ContainerVT = 2915 RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); 2916 2917 MVT I1ContainerVT = 2918 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2919 2920 SDValue CC = 2921 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget); 2922 SDValue Op1 = 2923 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 2924 SDValue Op2 = 2925 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget); 2926 2927 SDLoc DL(Op); 2928 SDValue Mask, VL; 2929 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2930 2931 SDValue Select = 2932 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL); 2933 2934 return convertFromScalableVector(VT, Select, DAG, Subtarget); 2935 } 2936 2937 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, 2938 unsigned NewOpc, 2939 bool HasMask) const { 2940 MVT VT = Op.getSimpleValueType(); 2941 assert(useRVVForFixedLengthVectorVT(VT) && 2942 "Only expected to lower fixed length vector operation!"); 2943 MVT ContainerVT = 2944 RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); 2945 2946 // Create list of operands by converting existing ones to scalable types. 2947 SmallVector<SDValue, 6> Ops; 2948 for (const SDValue &V : Op->op_values()) { 2949 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 2950 2951 // Pass through non-vector operands. 2952 if (!V.getValueType().isVector()) { 2953 Ops.push_back(V); 2954 continue; 2955 } 2956 2957 // "cast" fixed length vector to a scalable vector. 2958 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && 2959 "Only fixed length vectors are supported!"); 2960 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 2961 } 2962 2963 SDLoc DL(Op); 2964 SDValue Mask, VL; 2965 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2966 if (HasMask) 2967 Ops.push_back(Mask); 2968 Ops.push_back(VL); 2969 2970 SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops); 2971 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 2972 } 2973 2974 // Returns the opcode of the target-specific SDNode that implements the 32-bit 2975 // form of the given Opcode. 2976 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 2977 switch (Opcode) { 2978 default: 2979 llvm_unreachable("Unexpected opcode"); 2980 case ISD::SHL: 2981 return RISCVISD::SLLW; 2982 case ISD::SRA: 2983 return RISCVISD::SRAW; 2984 case ISD::SRL: 2985 return RISCVISD::SRLW; 2986 case ISD::SDIV: 2987 return RISCVISD::DIVW; 2988 case ISD::UDIV: 2989 return RISCVISD::DIVUW; 2990 case ISD::UREM: 2991 return RISCVISD::REMUW; 2992 case ISD::ROTL: 2993 return RISCVISD::ROLW; 2994 case ISD::ROTR: 2995 return RISCVISD::RORW; 2996 case RISCVISD::GREVI: 2997 return RISCVISD::GREVIW; 2998 case RISCVISD::GORCI: 2999 return RISCVISD::GORCIW; 3000 } 3001 } 3002 3003 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 3004 // Because i32 isn't a legal type for RV64, these operations would otherwise 3005 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 3006 // later one because the fact the operation was originally of type i32 is 3007 // lost. 3008 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 3009 unsigned ExtOpc = ISD::ANY_EXTEND) { 3010 SDLoc DL(N); 3011 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 3012 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 3013 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 3014 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 3015 // ReplaceNodeResults requires we maintain the same type for the return value. 3016 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 3017 } 3018 3019 // Converts the given 32-bit operation to a i64 operation with signed extension 3020 // semantic to reduce the signed extension instructions. 3021 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 3022 SDLoc DL(N); 3023 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3024 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 3025 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 3026 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 3027 DAG.getValueType(MVT::i32)); 3028 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 3029 } 3030 3031 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 3032 SmallVectorImpl<SDValue> &Results, 3033 SelectionDAG &DAG) const { 3034 SDLoc DL(N); 3035 switch (N->getOpcode()) { 3036 default: 3037 llvm_unreachable("Don't know how to custom type legalize this operation!"); 3038 case ISD::STRICT_FP_TO_SINT: 3039 case ISD::STRICT_FP_TO_UINT: 3040 case ISD::FP_TO_SINT: 3041 case ISD::FP_TO_UINT: { 3042 bool IsStrict = N->isStrictFPOpcode(); 3043 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3044 "Unexpected custom legalisation"); 3045 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 3046 // If the FP type needs to be softened, emit a library call using the 'si' 3047 // version. If we left it to default legalization we'd end up with 'di'. If 3048 // the FP type doesn't need to be softened just let generic type 3049 // legalization promote the result type. 3050 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 3051 TargetLowering::TypeSoftenFloat) 3052 return; 3053 RTLIB::Libcall LC; 3054 if (N->getOpcode() == ISD::FP_TO_SINT || 3055 N->getOpcode() == ISD::STRICT_FP_TO_SINT) 3056 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 3057 else 3058 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 3059 MakeLibCallOptions CallOptions; 3060 EVT OpVT = Op0.getValueType(); 3061 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 3062 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 3063 SDValue Result; 3064 std::tie(Result, Chain) = 3065 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 3066 Results.push_back(Result); 3067 if (IsStrict) 3068 Results.push_back(Chain); 3069 break; 3070 } 3071 case ISD::READCYCLECOUNTER: { 3072 assert(!Subtarget.is64Bit() && 3073 "READCYCLECOUNTER only has custom type legalization on riscv32"); 3074 3075 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 3076 SDValue RCW = 3077 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 3078 3079 Results.push_back( 3080 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 3081 Results.push_back(RCW.getValue(2)); 3082 break; 3083 } 3084 case ISD::ADD: 3085 case ISD::SUB: 3086 case ISD::MUL: 3087 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3088 "Unexpected custom legalisation"); 3089 if (N->getOperand(1).getOpcode() == ISD::Constant) 3090 return; 3091 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 3092 break; 3093 case ISD::SHL: 3094 case ISD::SRA: 3095 case ISD::SRL: 3096 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3097 "Unexpected custom legalisation"); 3098 if (N->getOperand(1).getOpcode() == ISD::Constant) 3099 return; 3100 Results.push_back(customLegalizeToWOp(N, DAG)); 3101 break; 3102 case ISD::ROTL: 3103 case ISD::ROTR: 3104 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3105 "Unexpected custom legalisation"); 3106 Results.push_back(customLegalizeToWOp(N, DAG)); 3107 break; 3108 case ISD::SDIV: 3109 case ISD::UDIV: 3110 case ISD::UREM: { 3111 MVT VT = N->getSimpleValueType(0); 3112 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 3113 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 3114 "Unexpected custom legalisation"); 3115 if (N->getOperand(0).getOpcode() == ISD::Constant || 3116 N->getOperand(1).getOpcode() == ISD::Constant) 3117 return; 3118 3119 // If the input is i32, use ANY_EXTEND since the W instructions don't read 3120 // the upper 32 bits. For other types we need to sign or zero extend 3121 // based on the opcode. 3122 unsigned ExtOpc = ISD::ANY_EXTEND; 3123 if (VT != MVT::i32) 3124 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 3125 : ISD::ZERO_EXTEND; 3126 3127 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 3128 break; 3129 } 3130 case ISD::BITCAST: { 3131 assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3132 Subtarget.hasStdExtF()) || 3133 (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) && 3134 "Unexpected custom legalisation"); 3135 SDValue Op0 = N->getOperand(0); 3136 if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) { 3137 if (Op0.getValueType() != MVT::f16) 3138 return; 3139 SDValue FPConv = 3140 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0); 3141 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 3142 } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3143 Subtarget.hasStdExtF()) { 3144 if (Op0.getValueType() != MVT::f32) 3145 return; 3146 SDValue FPConv = 3147 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 3148 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 3149 } 3150 break; 3151 } 3152 case RISCVISD::GREVI: 3153 case RISCVISD::GORCI: { 3154 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3155 "Unexpected custom legalisation"); 3156 // This is similar to customLegalizeToWOp, except that we pass the second 3157 // operand (a TargetConstant) straight through: it is already of type 3158 // XLenVT. 3159 SDLoc DL(N); 3160 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 3161 SDValue NewOp0 = 3162 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3163 SDValue NewRes = 3164 DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1)); 3165 // ReplaceNodeResults requires we maintain the same type for the return 3166 // value. 3167 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 3168 break; 3169 } 3170 case RISCVISD::SHFLI: { 3171 // There is no SHFLIW instruction, but we can just promote the operation. 3172 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3173 "Unexpected custom legalisation"); 3174 SDLoc DL(N); 3175 SDValue NewOp0 = 3176 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3177 SDValue NewRes = 3178 DAG.getNode(RISCVISD::SHFLI, DL, MVT::i64, NewOp0, N->getOperand(1)); 3179 // ReplaceNodeResults requires we maintain the same type for the return 3180 // value. 3181 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 3182 break; 3183 } 3184 case ISD::BSWAP: 3185 case ISD::BITREVERSE: { 3186 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3187 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 3188 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 3189 N->getOperand(0)); 3190 unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24; 3191 SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0, 3192 DAG.getTargetConstant(Imm, DL, 3193 Subtarget.getXLenVT())); 3194 // ReplaceNodeResults requires we maintain the same type for the return 3195 // value. 3196 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW)); 3197 break; 3198 } 3199 case ISD::FSHL: 3200 case ISD::FSHR: { 3201 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3202 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); 3203 SDValue NewOp0 = 3204 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3205 SDValue NewOp1 = 3206 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 3207 SDValue NewOp2 = 3208 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 3209 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. 3210 // Mask the shift amount to 5 bits. 3211 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, 3212 DAG.getConstant(0x1f, DL, MVT::i64)); 3213 unsigned Opc = 3214 N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; 3215 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); 3216 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); 3217 break; 3218 } 3219 case ISD::EXTRACT_VECTOR_ELT: { 3220 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 3221 // type is illegal (currently only vXi64 RV32). 3222 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 3223 // transferred to the destination register. We issue two of these from the 3224 // upper- and lower- halves of the SEW-bit vector element, slid down to the 3225 // first element. 3226 SDLoc DL(N); 3227 SDValue Vec = N->getOperand(0); 3228 SDValue Idx = N->getOperand(1); 3229 3230 // The vector type hasn't been legalized yet so we can't issue target 3231 // specific nodes if it needs legalization. 3232 // FIXME: We would manually legalize if it's important. 3233 if (!isTypeLegal(Vec.getValueType())) 3234 return; 3235 3236 MVT VecVT = Vec.getSimpleValueType(); 3237 3238 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 3239 VecVT.getVectorElementType() == MVT::i64 && 3240 "Unexpected EXTRACT_VECTOR_ELT legalization"); 3241 3242 // If this is a fixed vector, we need to convert it to a scalable vector. 3243 MVT ContainerVT = VecVT; 3244 if (VecVT.isFixedLengthVector()) { 3245 ContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget); 3246 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3247 } 3248 3249 MVT XLenVT = Subtarget.getXLenVT(); 3250 3251 // Use a VL of 1 to avoid processing more elements than we need. 3252 MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); 3253 SDValue VL = DAG.getConstant(1, DL, XLenVT); 3254 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 3255 3256 // Unless the index is known to be 0, we must slide the vector down to get 3257 // the desired element into index 0. 3258 if (!isNullConstant(Idx)) { 3259 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 3260 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 3261 } 3262 3263 // Extract the lower XLEN bits of the correct vector element. 3264 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 3265 3266 // To extract the upper XLEN bits of the vector element, shift the first 3267 // element right by 32 bits and re-extract the lower XLEN bits. 3268 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 3269 DAG.getConstant(32, DL, XLenVT), VL); 3270 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, 3271 ThirtyTwoV, Mask, VL); 3272 3273 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 3274 3275 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 3276 break; 3277 } 3278 case ISD::INTRINSIC_WO_CHAIN: { 3279 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 3280 switch (IntNo) { 3281 default: 3282 llvm_unreachable( 3283 "Don't know how to custom type legalize this intrinsic!"); 3284 case Intrinsic::riscv_vmv_x_s: { 3285 EVT VT = N->getValueType(0); 3286 assert((VT == MVT::i8 || VT == MVT::i16 || 3287 (Subtarget.is64Bit() && VT == MVT::i32)) && 3288 "Unexpected custom legalisation!"); 3289 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 3290 Subtarget.getXLenVT(), N->getOperand(1)); 3291 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 3292 break; 3293 } 3294 } 3295 break; 3296 } 3297 case ISD::VECREDUCE_ADD: 3298 case ISD::VECREDUCE_AND: 3299 case ISD::VECREDUCE_OR: 3300 case ISD::VECREDUCE_XOR: 3301 case ISD::VECREDUCE_SMAX: 3302 case ISD::VECREDUCE_UMAX: 3303 case ISD::VECREDUCE_SMIN: 3304 case ISD::VECREDUCE_UMIN: 3305 // The custom-lowering for these nodes returns a vector whose first element 3306 // is the result of the reduction. Extract its first element and let the 3307 // legalization for EXTRACT_VECTOR_ELT do the rest of the job. 3308 Results.push_back(lowerVECREDUCE(SDValue(N, 0), DAG)); 3309 break; 3310 } 3311 } 3312 3313 // A structure to hold one of the bit-manipulation patterns below. Together, a 3314 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source: 3315 // (or (and (shl x, 1), 0xAAAAAAAA), 3316 // (and (srl x, 1), 0x55555555)) 3317 struct RISCVBitmanipPat { 3318 SDValue Op; 3319 unsigned ShAmt; 3320 bool IsSHL; 3321 3322 bool formsPairWith(const RISCVBitmanipPat &Other) const { 3323 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; 3324 } 3325 }; 3326 3327 // Matches patterns of the form 3328 // (and (shl x, C2), (C1 << C2)) 3329 // (and (srl x, C2), C1) 3330 // (shl (and x, C1), C2) 3331 // (srl (and x, (C1 << C2)), C2) 3332 // Where C2 is a power of 2 and C1 has at least that many leading zeroes. 3333 // The expected masks for each shift amount are specified in BitmanipMasks where 3334 // BitmanipMasks[log2(C2)] specifies the expected C1 value. 3335 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether 3336 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible 3337 // XLen is 64. 3338 static Optional<RISCVBitmanipPat> 3339 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) { 3340 assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) && 3341 "Unexpected number of masks"); 3342 Optional<uint64_t> Mask; 3343 // Optionally consume a mask around the shift operation. 3344 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { 3345 Mask = Op.getConstantOperandVal(1); 3346 Op = Op.getOperand(0); 3347 } 3348 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) 3349 return None; 3350 bool IsSHL = Op.getOpcode() == ISD::SHL; 3351 3352 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3353 return None; 3354 uint64_t ShAmt = Op.getConstantOperandVal(1); 3355 3356 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 3357 if (ShAmt >= Width && !isPowerOf2_64(ShAmt)) 3358 return None; 3359 // If we don't have enough masks for 64 bit, then we must be trying to 3360 // match SHFL so we're only allowed to shift 1/4 of the width. 3361 if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2)) 3362 return None; 3363 3364 SDValue Src = Op.getOperand(0); 3365 3366 // The expected mask is shifted left when the AND is found around SHL 3367 // patterns. 3368 // ((x >> 1) & 0x55555555) 3369 // ((x << 1) & 0xAAAAAAAA) 3370 bool SHLExpMask = IsSHL; 3371 3372 if (!Mask) { 3373 // Sometimes LLVM keeps the mask as an operand of the shift, typically when 3374 // the mask is all ones: consume that now. 3375 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { 3376 Mask = Src.getConstantOperandVal(1); 3377 Src = Src.getOperand(0); 3378 // The expected mask is now in fact shifted left for SRL, so reverse the 3379 // decision. 3380 // ((x & 0xAAAAAAAA) >> 1) 3381 // ((x & 0x55555555) << 1) 3382 SHLExpMask = !SHLExpMask; 3383 } else { 3384 // Use a default shifted mask of all-ones if there's no AND, truncated 3385 // down to the expected width. This simplifies the logic later on. 3386 Mask = maskTrailingOnes<uint64_t>(Width); 3387 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); 3388 } 3389 } 3390 3391 unsigned MaskIdx = Log2_32(ShAmt); 3392 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 3393 3394 if (SHLExpMask) 3395 ExpMask <<= ShAmt; 3396 3397 if (Mask != ExpMask) 3398 return None; 3399 3400 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; 3401 } 3402 3403 // Matches any of the following bit-manipulation patterns: 3404 // (and (shl x, 1), (0x55555555 << 1)) 3405 // (and (srl x, 1), 0x55555555) 3406 // (shl (and x, 0x55555555), 1) 3407 // (srl (and x, (0x55555555 << 1)), 1) 3408 // where the shift amount and mask may vary thus: 3409 // [1] = 0x55555555 / 0xAAAAAAAA 3410 // [2] = 0x33333333 / 0xCCCCCCCC 3411 // [4] = 0x0F0F0F0F / 0xF0F0F0F0 3412 // [8] = 0x00FF00FF / 0xFF00FF00 3413 // [16] = 0x0000FFFF / 0xFFFFFFFF 3414 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) 3415 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) { 3416 // These are the unshifted masks which we use to match bit-manipulation 3417 // patterns. They may be shifted left in certain circumstances. 3418 static const uint64_t BitmanipMasks[] = { 3419 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 3420 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; 3421 3422 return matchRISCVBitmanipPat(Op, BitmanipMasks); 3423 } 3424 3425 // Match the following pattern as a GREVI(W) operation 3426 // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) 3427 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, 3428 const RISCVSubtarget &Subtarget) { 3429 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 3430 EVT VT = Op.getValueType(); 3431 3432 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 3433 auto LHS = matchGREVIPat(Op.getOperand(0)); 3434 auto RHS = matchGREVIPat(Op.getOperand(1)); 3435 if (LHS && RHS && LHS->formsPairWith(*RHS)) { 3436 SDLoc DL(Op); 3437 return DAG.getNode( 3438 RISCVISD::GREVI, DL, VT, LHS->Op, 3439 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 3440 } 3441 } 3442 return SDValue(); 3443 } 3444 3445 // Matches any the following pattern as a GORCI(W) operation 3446 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2 3447 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2 3448 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) 3449 // Note that with the variant of 3., 3450 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) 3451 // the inner pattern will first be matched as GREVI and then the outer 3452 // pattern will be matched to GORC via the first rule above. 3453 // 4. (or (rotl/rotr x, bitwidth/2), x) 3454 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, 3455 const RISCVSubtarget &Subtarget) { 3456 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 3457 EVT VT = Op.getValueType(); 3458 3459 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 3460 SDLoc DL(Op); 3461 SDValue Op0 = Op.getOperand(0); 3462 SDValue Op1 = Op.getOperand(1); 3463 3464 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { 3465 if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X && 3466 isPowerOf2_32(Reverse.getConstantOperandVal(1))) 3467 return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1)); 3468 // We can also form GORCI from ROTL/ROTR by half the bitwidth. 3469 if ((Reverse.getOpcode() == ISD::ROTL || 3470 Reverse.getOpcode() == ISD::ROTR) && 3471 Reverse.getOperand(0) == X && 3472 isa<ConstantSDNode>(Reverse.getOperand(1))) { 3473 uint64_t RotAmt = Reverse.getConstantOperandVal(1); 3474 if (RotAmt == (VT.getSizeInBits() / 2)) 3475 return DAG.getNode( 3476 RISCVISD::GORCI, DL, VT, X, 3477 DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT())); 3478 } 3479 return SDValue(); 3480 }; 3481 3482 // Check for either commutable permutation of (or (GREVI x, shamt), x) 3483 if (SDValue V = MatchOROfReverse(Op0, Op1)) 3484 return V; 3485 if (SDValue V = MatchOROfReverse(Op1, Op0)) 3486 return V; 3487 3488 // OR is commutable so canonicalize its OR operand to the left 3489 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) 3490 std::swap(Op0, Op1); 3491 if (Op0.getOpcode() != ISD::OR) 3492 return SDValue(); 3493 SDValue OrOp0 = Op0.getOperand(0); 3494 SDValue OrOp1 = Op0.getOperand(1); 3495 auto LHS = matchGREVIPat(OrOp0); 3496 // OR is commutable so swap the operands and try again: x might have been 3497 // on the left 3498 if (!LHS) { 3499 std::swap(OrOp0, OrOp1); 3500 LHS = matchGREVIPat(OrOp0); 3501 } 3502 auto RHS = matchGREVIPat(Op1); 3503 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { 3504 return DAG.getNode( 3505 RISCVISD::GORCI, DL, VT, LHS->Op, 3506 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 3507 } 3508 } 3509 return SDValue(); 3510 } 3511 3512 // Matches any of the following bit-manipulation patterns: 3513 // (and (shl x, 1), (0x22222222 << 1)) 3514 // (and (srl x, 1), 0x22222222) 3515 // (shl (and x, 0x22222222), 1) 3516 // (srl (and x, (0x22222222 << 1)), 1) 3517 // where the shift amount and mask may vary thus: 3518 // [1] = 0x22222222 / 0x44444444 3519 // [2] = 0x0C0C0C0C / 0x3C3C3C3C 3520 // [4] = 0x00F000F0 / 0x0F000F00 3521 // [8] = 0x0000FF00 / 0x00FF0000 3522 // [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64) 3523 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) { 3524 // These are the unshifted masks which we use to match bit-manipulation 3525 // patterns. They may be shifted left in certain circumstances. 3526 static const uint64_t BitmanipMasks[] = { 3527 0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL, 3528 0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL}; 3529 3530 return matchRISCVBitmanipPat(Op, BitmanipMasks); 3531 } 3532 3533 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x) 3534 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG, 3535 const RISCVSubtarget &Subtarget) { 3536 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 3537 EVT VT = Op.getValueType(); 3538 3539 if (VT != MVT::i32 && VT != Subtarget.getXLenVT()) 3540 return SDValue(); 3541 3542 SDValue Op0 = Op.getOperand(0); 3543 SDValue Op1 = Op.getOperand(1); 3544 3545 // Or is commutable so canonicalize the second OR to the LHS. 3546 if (Op0.getOpcode() != ISD::OR) 3547 std::swap(Op0, Op1); 3548 if (Op0.getOpcode() != ISD::OR) 3549 return SDValue(); 3550 3551 // We found an inner OR, so our operands are the operands of the inner OR 3552 // and the other operand of the outer OR. 3553 SDValue A = Op0.getOperand(0); 3554 SDValue B = Op0.getOperand(1); 3555 SDValue C = Op1; 3556 3557 auto Match1 = matchSHFLPat(A); 3558 auto Match2 = matchSHFLPat(B); 3559 3560 // If neither matched, we failed. 3561 if (!Match1 && !Match2) 3562 return SDValue(); 3563 3564 // We had at least one match. if one failed, try the remaining C operand. 3565 if (!Match1) { 3566 std::swap(A, C); 3567 Match1 = matchSHFLPat(A); 3568 if (!Match1) 3569 return SDValue(); 3570 } else if (!Match2) { 3571 std::swap(B, C); 3572 Match2 = matchSHFLPat(B); 3573 if (!Match2) 3574 return SDValue(); 3575 } 3576 assert(Match1 && Match2); 3577 3578 // Make sure our matches pair up. 3579 if (!Match1->formsPairWith(*Match2)) 3580 return SDValue(); 3581 3582 // All the remains is to make sure C is an AND with the same input, that masks 3583 // out the bits that are being shuffled. 3584 if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) || 3585 C.getOperand(0) != Match1->Op) 3586 return SDValue(); 3587 3588 uint64_t Mask = C.getConstantOperandVal(1); 3589 3590 static const uint64_t BitmanipMasks[] = { 3591 0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL, 3592 0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL, 3593 }; 3594 3595 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 3596 unsigned MaskIdx = Log2_32(Match1->ShAmt); 3597 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 3598 3599 if (Mask != ExpMask) 3600 return SDValue(); 3601 3602 SDLoc DL(Op); 3603 return DAG.getNode( 3604 RISCVISD::SHFLI, DL, VT, Match1->Op, 3605 DAG.getTargetConstant(Match1->ShAmt, DL, Subtarget.getXLenVT())); 3606 } 3607 3608 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is 3609 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. 3610 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does 3611 // not undo itself, but they are redundant. 3612 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { 3613 unsigned ShAmt1 = N->getConstantOperandVal(1); 3614 SDValue Src = N->getOperand(0); 3615 3616 if (Src.getOpcode() != N->getOpcode()) 3617 return SDValue(); 3618 3619 unsigned ShAmt2 = Src.getConstantOperandVal(1); 3620 Src = Src.getOperand(0); 3621 3622 unsigned CombinedShAmt; 3623 if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW) 3624 CombinedShAmt = ShAmt1 | ShAmt2; 3625 else 3626 CombinedShAmt = ShAmt1 ^ ShAmt2; 3627 3628 if (CombinedShAmt == 0) 3629 return Src; 3630 3631 SDLoc DL(N); 3632 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src, 3633 DAG.getTargetConstant(CombinedShAmt, DL, 3634 N->getOperand(1).getValueType())); 3635 } 3636 3637 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 3638 DAGCombinerInfo &DCI) const { 3639 SelectionDAG &DAG = DCI.DAG; 3640 3641 switch (N->getOpcode()) { 3642 default: 3643 break; 3644 case RISCVISD::SplitF64: { 3645 SDValue Op0 = N->getOperand(0); 3646 // If the input to SplitF64 is just BuildPairF64 then the operation is 3647 // redundant. Instead, use BuildPairF64's operands directly. 3648 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 3649 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 3650 3651 SDLoc DL(N); 3652 3653 // It's cheaper to materialise two 32-bit integers than to load a double 3654 // from the constant pool and transfer it to integer registers through the 3655 // stack. 3656 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 3657 APInt V = C->getValueAPF().bitcastToAPInt(); 3658 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 3659 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 3660 return DCI.CombineTo(N, Lo, Hi); 3661 } 3662 3663 // This is a target-specific version of a DAGCombine performed in 3664 // DAGCombiner::visitBITCAST. It performs the equivalent of: 3665 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 3666 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 3667 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 3668 !Op0.getNode()->hasOneUse()) 3669 break; 3670 SDValue NewSplitF64 = 3671 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 3672 Op0.getOperand(0)); 3673 SDValue Lo = NewSplitF64.getValue(0); 3674 SDValue Hi = NewSplitF64.getValue(1); 3675 APInt SignBit = APInt::getSignMask(32); 3676 if (Op0.getOpcode() == ISD::FNEG) { 3677 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 3678 DAG.getConstant(SignBit, DL, MVT::i32)); 3679 return DCI.CombineTo(N, Lo, NewHi); 3680 } 3681 assert(Op0.getOpcode() == ISD::FABS); 3682 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 3683 DAG.getConstant(~SignBit, DL, MVT::i32)); 3684 return DCI.CombineTo(N, Lo, NewHi); 3685 } 3686 case RISCVISD::SLLW: 3687 case RISCVISD::SRAW: 3688 case RISCVISD::SRLW: 3689 case RISCVISD::ROLW: 3690 case RISCVISD::RORW: { 3691 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 3692 SDValue LHS = N->getOperand(0); 3693 SDValue RHS = N->getOperand(1); 3694 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 3695 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 3696 if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) || 3697 SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) { 3698 if (N->getOpcode() != ISD::DELETED_NODE) 3699 DCI.AddToWorklist(N); 3700 return SDValue(N, 0); 3701 } 3702 break; 3703 } 3704 case RISCVISD::FSL: 3705 case RISCVISD::FSR: { 3706 // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read. 3707 SDValue ShAmt = N->getOperand(2); 3708 unsigned BitWidth = ShAmt.getValueSizeInBits(); 3709 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width"); 3710 APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1); 3711 if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 3712 if (N->getOpcode() != ISD::DELETED_NODE) 3713 DCI.AddToWorklist(N); 3714 return SDValue(N, 0); 3715 } 3716 break; 3717 } 3718 case RISCVISD::FSLW: 3719 case RISCVISD::FSRW: { 3720 // Only the lower 32 bits of Values and lower 6 bits of shift amount are 3721 // read. 3722 SDValue Op0 = N->getOperand(0); 3723 SDValue Op1 = N->getOperand(1); 3724 SDValue ShAmt = N->getOperand(2); 3725 APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 3726 APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6); 3727 if (SimplifyDemandedBits(Op0, OpMask, DCI) || 3728 SimplifyDemandedBits(Op1, OpMask, DCI) || 3729 SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 3730 if (N->getOpcode() != ISD::DELETED_NODE) 3731 DCI.AddToWorklist(N); 3732 return SDValue(N, 0); 3733 } 3734 break; 3735 } 3736 case RISCVISD::GREVIW: 3737 case RISCVISD::GORCIW: { 3738 // Only the lower 32 bits of the first operand are read 3739 SDValue Op0 = N->getOperand(0); 3740 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 3741 if (SimplifyDemandedBits(Op0, Mask, DCI)) { 3742 if (N->getOpcode() != ISD::DELETED_NODE) 3743 DCI.AddToWorklist(N); 3744 return SDValue(N, 0); 3745 } 3746 3747 return combineGREVI_GORCI(N, DCI.DAG); 3748 } 3749 case RISCVISD::FMV_X_ANYEXTW_RV64: { 3750 SDLoc DL(N); 3751 SDValue Op0 = N->getOperand(0); 3752 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 3753 // conversion is unnecessary and can be replaced with an ANY_EXTEND 3754 // of the FMV_W_X_RV64 operand. 3755 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 3756 assert(Op0.getOperand(0).getValueType() == MVT::i64 && 3757 "Unexpected value type!"); 3758 return Op0.getOperand(0); 3759 } 3760 3761 // This is a target-specific version of a DAGCombine performed in 3762 // DAGCombiner::visitBITCAST. It performs the equivalent of: 3763 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 3764 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 3765 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 3766 !Op0.getNode()->hasOneUse()) 3767 break; 3768 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 3769 Op0.getOperand(0)); 3770 APInt SignBit = APInt::getSignMask(32).sext(64); 3771 if (Op0.getOpcode() == ISD::FNEG) 3772 return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 3773 DAG.getConstant(SignBit, DL, MVT::i64)); 3774 3775 assert(Op0.getOpcode() == ISD::FABS); 3776 return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 3777 DAG.getConstant(~SignBit, DL, MVT::i64)); 3778 } 3779 case RISCVISD::GREVI: 3780 case RISCVISD::GORCI: 3781 return combineGREVI_GORCI(N, DCI.DAG); 3782 case ISD::OR: 3783 if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) 3784 return GREV; 3785 if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) 3786 return GORC; 3787 if (auto SHFL = combineORToSHFL(SDValue(N, 0), DCI.DAG, Subtarget)) 3788 return SHFL; 3789 break; 3790 case RISCVISD::SELECT_CC: { 3791 // Transform 3792 // (select_cc (xor X, 1), 0, setne, trueV, falseV) -> 3793 // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. 3794 // This can occur when legalizing some floating point comparisons. 3795 SDValue LHS = N->getOperand(0); 3796 SDValue RHS = N->getOperand(1); 3797 auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2)); 3798 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 3799 if (ISD::isIntEqualitySetCC(CCVal) && isNullConstant(RHS) && 3800 LHS.getOpcode() == ISD::XOR && isOneConstant(LHS.getOperand(1)) && 3801 DAG.MaskedValueIsZero(LHS.getOperand(0), Mask)) { 3802 SDLoc DL(N); 3803 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 3804 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 3805 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), 3806 {LHS.getOperand(0), RHS, TargetCC, N->getOperand(3), 3807 N->getOperand(4)}); 3808 } 3809 break; 3810 } 3811 case ISD::SETCC: { 3812 // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1. 3813 // Comparing with 0 may allow us to fold into bnez/beqz. 3814 SDValue LHS = N->getOperand(0); 3815 SDValue RHS = N->getOperand(1); 3816 if (LHS.getValueType().isScalableVector()) 3817 break; 3818 auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 3819 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 3820 if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) && 3821 DAG.MaskedValueIsZero(LHS, Mask)) { 3822 SDLoc DL(N); 3823 SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType()); 3824 CC = ISD::getSetCCInverse(CC, LHS.getValueType()); 3825 return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC); 3826 } 3827 break; 3828 } 3829 case ISD::FCOPYSIGN: { 3830 EVT VT = N->getValueType(0); 3831 if (!VT.isVector()) 3832 break; 3833 // There is a form of VFSGNJ which injects the negated sign of its second 3834 // operand. Try and bubble any FNEG up after the extend/round to produce 3835 // this optimized pattern. Avoid modifying cases where FP_ROUND and 3836 // TRUNC=1. 3837 SDValue In2 = N->getOperand(1); 3838 // Avoid cases where the extend/round has multiple uses, as duplicating 3839 // those is typically more expensive than removing a fneg. 3840 if (!In2.hasOneUse()) 3841 break; 3842 if (In2.getOpcode() != ISD::FP_EXTEND && 3843 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0)) 3844 break; 3845 In2 = In2.getOperand(0); 3846 if (In2.getOpcode() != ISD::FNEG) 3847 break; 3848 SDLoc DL(N); 3849 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT); 3850 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), 3851 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); 3852 } 3853 } 3854 3855 return SDValue(); 3856 } 3857 3858 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 3859 const SDNode *N, CombineLevel Level) const { 3860 // The following folds are only desirable if `(OP _, c1 << c2)` can be 3861 // materialised in fewer instructions than `(OP _, c1)`: 3862 // 3863 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 3864 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 3865 SDValue N0 = N->getOperand(0); 3866 EVT Ty = N0.getValueType(); 3867 if (Ty.isScalarInteger() && 3868 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 3869 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 3870 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3871 if (C1 && C2) { 3872 const APInt &C1Int = C1->getAPIntValue(); 3873 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 3874 3875 // We can materialise `c1 << c2` into an add immediate, so it's "free", 3876 // and the combine should happen, to potentially allow further combines 3877 // later. 3878 if (ShiftedC1Int.getMinSignedBits() <= 64 && 3879 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 3880 return true; 3881 3882 // We can materialise `c1` in an add immediate, so it's "free", and the 3883 // combine should be prevented. 3884 if (C1Int.getMinSignedBits() <= 64 && 3885 isLegalAddImmediate(C1Int.getSExtValue())) 3886 return false; 3887 3888 // Neither constant will fit into an immediate, so find materialisation 3889 // costs. 3890 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 3891 Subtarget.is64Bit()); 3892 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 3893 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); 3894 3895 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 3896 // combine should be prevented. 3897 if (C1Cost < ShiftedC1Cost) 3898 return false; 3899 } 3900 } 3901 return true; 3902 } 3903 3904 bool RISCVTargetLowering::targetShrinkDemandedConstant( 3905 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 3906 TargetLoweringOpt &TLO) const { 3907 // Delay this optimization as late as possible. 3908 if (!TLO.LegalOps) 3909 return false; 3910 3911 EVT VT = Op.getValueType(); 3912 if (VT.isVector()) 3913 return false; 3914 3915 // Only handle AND for now. 3916 if (Op.getOpcode() != ISD::AND) 3917 return false; 3918 3919 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 3920 if (!C) 3921 return false; 3922 3923 const APInt &Mask = C->getAPIntValue(); 3924 3925 // Clear all non-demanded bits initially. 3926 APInt ShrunkMask = Mask & DemandedBits; 3927 3928 // If the shrunk mask fits in sign extended 12 bits, let the target 3929 // independent code apply it. 3930 if (ShrunkMask.isSignedIntN(12)) 3931 return false; 3932 3933 // Try to make a smaller immediate by setting undemanded bits. 3934 3935 // We need to be able to make a negative number through a combination of mask 3936 // and undemanded bits. 3937 APInt ExpandedMask = Mask | ~DemandedBits; 3938 if (!ExpandedMask.isNegative()) 3939 return false; 3940 3941 // What is the fewest number of bits we need to represent the negative number. 3942 unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); 3943 3944 // Try to make a 12 bit negative immediate. If that fails try to make a 32 3945 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 3946 APInt NewMask = ShrunkMask; 3947 if (MinSignedBits <= 12) 3948 NewMask.setBitsFrom(11); 3949 else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 3950 NewMask.setBitsFrom(31); 3951 else 3952 return false; 3953 3954 // Sanity check that our new mask is a subset of the demanded mask. 3955 assert(NewMask.isSubsetOf(ExpandedMask)); 3956 3957 // If we aren't changing the mask, just return true to keep it and prevent 3958 // the caller from optimizing. 3959 if (NewMask == Mask) 3960 return true; 3961 3962 // Replace the constant with the new mask. 3963 SDLoc DL(Op); 3964 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); 3965 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); 3966 return TLO.CombineTo(Op, NewOp); 3967 } 3968 3969 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 3970 KnownBits &Known, 3971 const APInt &DemandedElts, 3972 const SelectionDAG &DAG, 3973 unsigned Depth) const { 3974 unsigned BitWidth = Known.getBitWidth(); 3975 unsigned Opc = Op.getOpcode(); 3976 assert((Opc >= ISD::BUILTIN_OP_END || 3977 Opc == ISD::INTRINSIC_WO_CHAIN || 3978 Opc == ISD::INTRINSIC_W_CHAIN || 3979 Opc == ISD::INTRINSIC_VOID) && 3980 "Should use MaskedValueIsZero if you don't know whether Op" 3981 " is a target node!"); 3982 3983 Known.resetAll(); 3984 switch (Opc) { 3985 default: break; 3986 case RISCVISD::REMUW: { 3987 KnownBits Known2; 3988 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3989 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3990 // We only care about the lower 32 bits. 3991 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 3992 // Restore the original width by sign extending. 3993 Known = Known.sext(BitWidth); 3994 break; 3995 } 3996 case RISCVISD::DIVUW: { 3997 KnownBits Known2; 3998 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3999 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4000 // We only care about the lower 32 bits. 4001 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 4002 // Restore the original width by sign extending. 4003 Known = Known.sext(BitWidth); 4004 break; 4005 } 4006 case RISCVISD::READ_VLENB: 4007 // We assume VLENB is at least 8 bytes. 4008 // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits. 4009 Known.Zero.setLowBits(3); 4010 break; 4011 } 4012 } 4013 4014 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 4015 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 4016 unsigned Depth) const { 4017 switch (Op.getOpcode()) { 4018 default: 4019 break; 4020 case RISCVISD::SLLW: 4021 case RISCVISD::SRAW: 4022 case RISCVISD::SRLW: 4023 case RISCVISD::DIVW: 4024 case RISCVISD::DIVUW: 4025 case RISCVISD::REMUW: 4026 case RISCVISD::ROLW: 4027 case RISCVISD::RORW: 4028 case RISCVISD::GREVIW: 4029 case RISCVISD::GORCIW: 4030 case RISCVISD::FSLW: 4031 case RISCVISD::FSRW: 4032 // TODO: As the result is sign-extended, this is conservatively correct. A 4033 // more precise answer could be calculated for SRAW depending on known 4034 // bits in the shift amount. 4035 return 33; 4036 case RISCVISD::SHFLI: { 4037 // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word 4038 // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but 4039 // will stay within the upper 32 bits. If there were more than 32 sign bits 4040 // before there will be at least 33 sign bits after. 4041 if (Op.getValueType() == MVT::i64 && 4042 (Op.getConstantOperandVal(1) & 0x10) == 0) { 4043 unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); 4044 if (Tmp > 32) 4045 return 33; 4046 } 4047 break; 4048 } 4049 case RISCVISD::VMV_X_S: 4050 // The number of sign bits of the scalar result is computed by obtaining the 4051 // element type of the input vector operand, subtracting its width from the 4052 // XLEN, and then adding one (sign bit within the element type). If the 4053 // element type is wider than XLen, the least-significant XLEN bits are 4054 // taken. 4055 if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen()) 4056 return 1; 4057 return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; 4058 } 4059 4060 return 1; 4061 } 4062 4063 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 4064 MachineBasicBlock *BB) { 4065 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 4066 4067 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 4068 // Should the count have wrapped while it was being read, we need to try 4069 // again. 4070 // ... 4071 // read: 4072 // rdcycleh x3 # load high word of cycle 4073 // rdcycle x2 # load low word of cycle 4074 // rdcycleh x4 # load high word of cycle 4075 // bne x3, x4, read # check if high word reads match, otherwise try again 4076 // ... 4077 4078 MachineFunction &MF = *BB->getParent(); 4079 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4080 MachineFunction::iterator It = ++BB->getIterator(); 4081 4082 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 4083 MF.insert(It, LoopMBB); 4084 4085 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 4086 MF.insert(It, DoneMBB); 4087 4088 // Transfer the remainder of BB and its successor edges to DoneMBB. 4089 DoneMBB->splice(DoneMBB->begin(), BB, 4090 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 4091 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 4092 4093 BB->addSuccessor(LoopMBB); 4094 4095 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4096 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 4097 Register LoReg = MI.getOperand(0).getReg(); 4098 Register HiReg = MI.getOperand(1).getReg(); 4099 DebugLoc DL = MI.getDebugLoc(); 4100 4101 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 4102 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 4103 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 4104 .addReg(RISCV::X0); 4105 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 4106 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 4107 .addReg(RISCV::X0); 4108 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 4109 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 4110 .addReg(RISCV::X0); 4111 4112 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 4113 .addReg(HiReg) 4114 .addReg(ReadAgainReg) 4115 .addMBB(LoopMBB); 4116 4117 LoopMBB->addSuccessor(LoopMBB); 4118 LoopMBB->addSuccessor(DoneMBB); 4119 4120 MI.eraseFromParent(); 4121 4122 return DoneMBB; 4123 } 4124 4125 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 4126 MachineBasicBlock *BB) { 4127 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 4128 4129 MachineFunction &MF = *BB->getParent(); 4130 DebugLoc DL = MI.getDebugLoc(); 4131 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 4132 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 4133 Register LoReg = MI.getOperand(0).getReg(); 4134 Register HiReg = MI.getOperand(1).getReg(); 4135 Register SrcReg = MI.getOperand(2).getReg(); 4136 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 4137 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 4138 4139 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 4140 RI); 4141 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 4142 MachineMemOperand *MMOLo = 4143 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 4144 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 4145 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 4146 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 4147 .addFrameIndex(FI) 4148 .addImm(0) 4149 .addMemOperand(MMOLo); 4150 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 4151 .addFrameIndex(FI) 4152 .addImm(4) 4153 .addMemOperand(MMOHi); 4154 MI.eraseFromParent(); // The pseudo instruction is gone now. 4155 return BB; 4156 } 4157 4158 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 4159 MachineBasicBlock *BB) { 4160 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 4161 "Unexpected instruction"); 4162 4163 MachineFunction &MF = *BB->getParent(); 4164 DebugLoc DL = MI.getDebugLoc(); 4165 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 4166 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 4167 Register DstReg = MI.getOperand(0).getReg(); 4168 Register LoReg = MI.getOperand(1).getReg(); 4169 Register HiReg = MI.getOperand(2).getReg(); 4170 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 4171 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 4172 4173 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 4174 MachineMemOperand *MMOLo = 4175 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 4176 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 4177 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 4178 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 4179 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 4180 .addFrameIndex(FI) 4181 .addImm(0) 4182 .addMemOperand(MMOLo); 4183 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 4184 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 4185 .addFrameIndex(FI) 4186 .addImm(4) 4187 .addMemOperand(MMOHi); 4188 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 4189 MI.eraseFromParent(); // The pseudo instruction is gone now. 4190 return BB; 4191 } 4192 4193 static bool isSelectPseudo(MachineInstr &MI) { 4194 switch (MI.getOpcode()) { 4195 default: 4196 return false; 4197 case RISCV::Select_GPR_Using_CC_GPR: 4198 case RISCV::Select_FPR16_Using_CC_GPR: 4199 case RISCV::Select_FPR32_Using_CC_GPR: 4200 case RISCV::Select_FPR64_Using_CC_GPR: 4201 return true; 4202 } 4203 } 4204 4205 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 4206 MachineBasicBlock *BB) { 4207 // To "insert" Select_* instructions, we actually have to insert the triangle 4208 // control-flow pattern. The incoming instructions know the destination vreg 4209 // to set, the condition code register to branch on, the true/false values to 4210 // select between, and the condcode to use to select the appropriate branch. 4211 // 4212 // We produce the following control flow: 4213 // HeadMBB 4214 // | \ 4215 // | IfFalseMBB 4216 // | / 4217 // TailMBB 4218 // 4219 // When we find a sequence of selects we attempt to optimize their emission 4220 // by sharing the control flow. Currently we only handle cases where we have 4221 // multiple selects with the exact same condition (same LHS, RHS and CC). 4222 // The selects may be interleaved with other instructions if the other 4223 // instructions meet some requirements we deem safe: 4224 // - They are debug instructions. Otherwise, 4225 // - They do not have side-effects, do not access memory and their inputs do 4226 // not depend on the results of the select pseudo-instructions. 4227 // The TrueV/FalseV operands of the selects cannot depend on the result of 4228 // previous selects in the sequence. 4229 // These conditions could be further relaxed. See the X86 target for a 4230 // related approach and more information. 4231 Register LHS = MI.getOperand(1).getReg(); 4232 Register RHS = MI.getOperand(2).getReg(); 4233 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 4234 4235 SmallVector<MachineInstr *, 4> SelectDebugValues; 4236 SmallSet<Register, 4> SelectDests; 4237 SelectDests.insert(MI.getOperand(0).getReg()); 4238 4239 MachineInstr *LastSelectPseudo = &MI; 4240 4241 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 4242 SequenceMBBI != E; ++SequenceMBBI) { 4243 if (SequenceMBBI->isDebugInstr()) 4244 continue; 4245 else if (isSelectPseudo(*SequenceMBBI)) { 4246 if (SequenceMBBI->getOperand(1).getReg() != LHS || 4247 SequenceMBBI->getOperand(2).getReg() != RHS || 4248 SequenceMBBI->getOperand(3).getImm() != CC || 4249 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 4250 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 4251 break; 4252 LastSelectPseudo = &*SequenceMBBI; 4253 SequenceMBBI->collectDebugValues(SelectDebugValues); 4254 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 4255 } else { 4256 if (SequenceMBBI->hasUnmodeledSideEffects() || 4257 SequenceMBBI->mayLoadOrStore()) 4258 break; 4259 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 4260 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 4261 })) 4262 break; 4263 } 4264 } 4265 4266 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 4267 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4268 DebugLoc DL = MI.getDebugLoc(); 4269 MachineFunction::iterator I = ++BB->getIterator(); 4270 4271 MachineBasicBlock *HeadMBB = BB; 4272 MachineFunction *F = BB->getParent(); 4273 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 4274 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 4275 4276 F->insert(I, IfFalseMBB); 4277 F->insert(I, TailMBB); 4278 4279 // Transfer debug instructions associated with the selects to TailMBB. 4280 for (MachineInstr *DebugInstr : SelectDebugValues) { 4281 TailMBB->push_back(DebugInstr->removeFromParent()); 4282 } 4283 4284 // Move all instructions after the sequence to TailMBB. 4285 TailMBB->splice(TailMBB->end(), HeadMBB, 4286 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 4287 // Update machine-CFG edges by transferring all successors of the current 4288 // block to the new block which will contain the Phi nodes for the selects. 4289 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 4290 // Set the successors for HeadMBB. 4291 HeadMBB->addSuccessor(IfFalseMBB); 4292 HeadMBB->addSuccessor(TailMBB); 4293 4294 // Insert appropriate branch. 4295 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 4296 4297 BuildMI(HeadMBB, DL, TII.get(Opcode)) 4298 .addReg(LHS) 4299 .addReg(RHS) 4300 .addMBB(TailMBB); 4301 4302 // IfFalseMBB just falls through to TailMBB. 4303 IfFalseMBB->addSuccessor(TailMBB); 4304 4305 // Create PHIs for all of the select pseudo-instructions. 4306 auto SelectMBBI = MI.getIterator(); 4307 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 4308 auto InsertionPoint = TailMBB->begin(); 4309 while (SelectMBBI != SelectEnd) { 4310 auto Next = std::next(SelectMBBI); 4311 if (isSelectPseudo(*SelectMBBI)) { 4312 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 4313 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 4314 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 4315 .addReg(SelectMBBI->getOperand(4).getReg()) 4316 .addMBB(HeadMBB) 4317 .addReg(SelectMBBI->getOperand(5).getReg()) 4318 .addMBB(IfFalseMBB); 4319 SelectMBBI->eraseFromParent(); 4320 } 4321 SelectMBBI = Next; 4322 } 4323 4324 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 4325 return TailMBB; 4326 } 4327 4328 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, 4329 int VLIndex, unsigned SEWIndex, 4330 RISCVVLMUL VLMul, bool ForceTailAgnostic) { 4331 MachineFunction &MF = *BB->getParent(); 4332 DebugLoc DL = MI.getDebugLoc(); 4333 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 4334 4335 unsigned SEW = MI.getOperand(SEWIndex).getImm(); 4336 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 4337 RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8)); 4338 4339 MachineRegisterInfo &MRI = MF.getRegInfo(); 4340 4341 auto BuildVSETVLI = [&]() { 4342 if (VLIndex >= 0) { 4343 Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 4344 Register VLReg = MI.getOperand(VLIndex).getReg(); 4345 4346 // VL might be a compile time constant, but isel would have to put it 4347 // in a register. See if VL comes from an ADDI X0, imm. 4348 if (VLReg.isVirtual()) { 4349 MachineInstr *Def = MRI.getVRegDef(VLReg); 4350 if (Def && Def->getOpcode() == RISCV::ADDI && 4351 Def->getOperand(1).getReg() == RISCV::X0 && 4352 Def->getOperand(2).isImm()) { 4353 uint64_t Imm = Def->getOperand(2).getImm(); 4354 // VSETIVLI allows a 5-bit zero extended immediate. 4355 if (isUInt<5>(Imm)) 4356 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI)) 4357 .addReg(DestReg, RegState::Define | RegState::Dead) 4358 .addImm(Imm); 4359 } 4360 } 4361 4362 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) 4363 .addReg(DestReg, RegState::Define | RegState::Dead) 4364 .addReg(VLReg); 4365 } 4366 4367 // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). 4368 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) 4369 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 4370 .addReg(RISCV::X0, RegState::Kill); 4371 }; 4372 4373 MachineInstrBuilder MIB = BuildVSETVLI(); 4374 4375 // Default to tail agnostic unless the destination is tied to a source. In 4376 // that case the user would have some control over the tail values. The tail 4377 // policy is also ignored on instructions that only update element 0 like 4378 // vmv.s.x or reductions so use agnostic there to match the common case. 4379 // FIXME: This is conservatively correct, but we might want to detect that 4380 // the input is undefined. 4381 bool TailAgnostic = true; 4382 unsigned UseOpIdx; 4383 if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 4384 TailAgnostic = false; 4385 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 4386 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 4387 MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); 4388 if (UseMI && UseMI->isImplicitDef()) 4389 TailAgnostic = true; 4390 } 4391 4392 // For simplicity we reuse the vtype representation here. 4393 MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth, 4394 /*TailAgnostic*/ TailAgnostic, 4395 /*MaskAgnostic*/ false)); 4396 4397 // Remove (now) redundant operands from pseudo 4398 MI.getOperand(SEWIndex).setImm(-1); 4399 if (VLIndex >= 0) { 4400 MI.getOperand(VLIndex).setReg(RISCV::NoRegister); 4401 MI.getOperand(VLIndex).setIsKill(false); 4402 } 4403 4404 return BB; 4405 } 4406 4407 MachineBasicBlock * 4408 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 4409 MachineBasicBlock *BB) const { 4410 uint64_t TSFlags = MI.getDesc().TSFlags; 4411 4412 if (TSFlags & RISCVII::HasSEWOpMask) { 4413 unsigned NumOperands = MI.getNumExplicitOperands(); 4414 int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1; 4415 unsigned SEWIndex = NumOperands - 1; 4416 bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask; 4417 4418 RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >> 4419 RISCVII::VLMulShift); 4420 return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic); 4421 } 4422 4423 switch (MI.getOpcode()) { 4424 default: 4425 llvm_unreachable("Unexpected instr type to insert"); 4426 case RISCV::ReadCycleWide: 4427 assert(!Subtarget.is64Bit() && 4428 "ReadCycleWrite is only to be used on riscv32"); 4429 return emitReadCycleWidePseudo(MI, BB); 4430 case RISCV::Select_GPR_Using_CC_GPR: 4431 case RISCV::Select_FPR16_Using_CC_GPR: 4432 case RISCV::Select_FPR32_Using_CC_GPR: 4433 case RISCV::Select_FPR64_Using_CC_GPR: 4434 return emitSelectPseudo(MI, BB); 4435 case RISCV::BuildPairF64Pseudo: 4436 return emitBuildPairF64Pseudo(MI, BB); 4437 case RISCV::SplitF64Pseudo: 4438 return emitSplitF64Pseudo(MI, BB); 4439 } 4440 } 4441 4442 // Calling Convention Implementation. 4443 // The expectations for frontend ABI lowering vary from target to target. 4444 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 4445 // details, but this is a longer term goal. For now, we simply try to keep the 4446 // role of the frontend as simple and well-defined as possible. The rules can 4447 // be summarised as: 4448 // * Never split up large scalar arguments. We handle them here. 4449 // * If a hardfloat calling convention is being used, and the struct may be 4450 // passed in a pair of registers (fp+fp, int+fp), and both registers are 4451 // available, then pass as two separate arguments. If either the GPRs or FPRs 4452 // are exhausted, then pass according to the rule below. 4453 // * If a struct could never be passed in registers or directly in a stack 4454 // slot (as it is larger than 2*XLEN and the floating point rules don't 4455 // apply), then pass it using a pointer with the byval attribute. 4456 // * If a struct is less than 2*XLEN, then coerce to either a two-element 4457 // word-sized array or a 2*XLEN scalar (depending on alignment). 4458 // * The frontend can determine whether a struct is returned by reference or 4459 // not based on its size and fields. If it will be returned by reference, the 4460 // frontend must modify the prototype so a pointer with the sret annotation is 4461 // passed as the first argument. This is not necessary for large scalar 4462 // returns. 4463 // * Struct return values and varargs should be coerced to structs containing 4464 // register-size fields in the same situations they would be for fixed 4465 // arguments. 4466 4467 static const MCPhysReg ArgGPRs[] = { 4468 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 4469 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 4470 }; 4471 static const MCPhysReg ArgFPR16s[] = { 4472 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 4473 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 4474 }; 4475 static const MCPhysReg ArgFPR32s[] = { 4476 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 4477 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 4478 }; 4479 static const MCPhysReg ArgFPR64s[] = { 4480 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 4481 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 4482 }; 4483 // This is an interim calling convention and it may be changed in the future. 4484 static const MCPhysReg ArgVRs[] = { 4485 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 4486 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 4487 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 4488 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 4489 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 4490 RISCV::V20M2, RISCV::V22M2}; 4491 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 4492 RISCV::V20M4}; 4493 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 4494 4495 // Pass a 2*XLEN argument that has been split into two XLEN values through 4496 // registers or the stack as necessary. 4497 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 4498 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 4499 MVT ValVT2, MVT LocVT2, 4500 ISD::ArgFlagsTy ArgFlags2) { 4501 unsigned XLenInBytes = XLen / 8; 4502 if (Register Reg = State.AllocateReg(ArgGPRs)) { 4503 // At least one half can be passed via register. 4504 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 4505 VA1.getLocVT(), CCValAssign::Full)); 4506 } else { 4507 // Both halves must be passed on the stack, with proper alignment. 4508 Align StackAlign = 4509 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 4510 State.addLoc( 4511 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 4512 State.AllocateStack(XLenInBytes, StackAlign), 4513 VA1.getLocVT(), CCValAssign::Full)); 4514 State.addLoc(CCValAssign::getMem( 4515 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 4516 LocVT2, CCValAssign::Full)); 4517 return false; 4518 } 4519 4520 if (Register Reg = State.AllocateReg(ArgGPRs)) { 4521 // The second half can also be passed via register. 4522 State.addLoc( 4523 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 4524 } else { 4525 // The second half is passed via the stack, without additional alignment. 4526 State.addLoc(CCValAssign::getMem( 4527 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 4528 LocVT2, CCValAssign::Full)); 4529 } 4530 4531 return false; 4532 } 4533 4534 // Implements the RISC-V calling convention. Returns true upon failure. 4535 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 4536 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 4537 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 4538 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 4539 Optional<unsigned> FirstMaskArgument) { 4540 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 4541 assert(XLen == 32 || XLen == 64); 4542 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 4543 4544 // Any return value split in to more than two values can't be returned 4545 // directly. 4546 if (IsRet && ValNo > 1) 4547 return true; 4548 4549 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 4550 // variadic argument, or if no F16/F32 argument registers are available. 4551 bool UseGPRForF16_F32 = true; 4552 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 4553 // variadic argument, or if no F64 argument registers are available. 4554 bool UseGPRForF64 = true; 4555 4556 switch (ABI) { 4557 default: 4558 llvm_unreachable("Unexpected ABI"); 4559 case RISCVABI::ABI_ILP32: 4560 case RISCVABI::ABI_LP64: 4561 break; 4562 case RISCVABI::ABI_ILP32F: 4563 case RISCVABI::ABI_LP64F: 4564 UseGPRForF16_F32 = !IsFixed; 4565 break; 4566 case RISCVABI::ABI_ILP32D: 4567 case RISCVABI::ABI_LP64D: 4568 UseGPRForF16_F32 = !IsFixed; 4569 UseGPRForF64 = !IsFixed; 4570 break; 4571 } 4572 4573 // FPR16, FPR32, and FPR64 alias each other. 4574 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { 4575 UseGPRForF16_F32 = true; 4576 UseGPRForF64 = true; 4577 } 4578 4579 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 4580 // similar local variables rather than directly checking against the target 4581 // ABI. 4582 4583 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { 4584 LocVT = XLenVT; 4585 LocInfo = CCValAssign::BCvt; 4586 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 4587 LocVT = MVT::i64; 4588 LocInfo = CCValAssign::BCvt; 4589 } 4590 4591 // If this is a variadic argument, the RISC-V calling convention requires 4592 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 4593 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 4594 // be used regardless of whether the original argument was split during 4595 // legalisation or not. The argument will not be passed by registers if the 4596 // original type is larger than 2*XLEN, so the register alignment rule does 4597 // not apply. 4598 unsigned TwoXLenInBytes = (2 * XLen) / 8; 4599 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 4600 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 4601 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 4602 // Skip 'odd' register if necessary. 4603 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 4604 State.AllocateReg(ArgGPRs); 4605 } 4606 4607 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 4608 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 4609 State.getPendingArgFlags(); 4610 4611 assert(PendingLocs.size() == PendingArgFlags.size() && 4612 "PendingLocs and PendingArgFlags out of sync"); 4613 4614 // Handle passing f64 on RV32D with a soft float ABI or when floating point 4615 // registers are exhausted. 4616 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 4617 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 4618 "Can't lower f64 if it is split"); 4619 // Depending on available argument GPRS, f64 may be passed in a pair of 4620 // GPRs, split between a GPR and the stack, or passed completely on the 4621 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 4622 // cases. 4623 Register Reg = State.AllocateReg(ArgGPRs); 4624 LocVT = MVT::i32; 4625 if (!Reg) { 4626 unsigned StackOffset = State.AllocateStack(8, Align(8)); 4627 State.addLoc( 4628 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 4629 return false; 4630 } 4631 if (!State.AllocateReg(ArgGPRs)) 4632 State.AllocateStack(4, Align(4)); 4633 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4634 return false; 4635 } 4636 4637 // Split arguments might be passed indirectly, so keep track of the pending 4638 // values. 4639 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 4640 LocVT = XLenVT; 4641 LocInfo = CCValAssign::Indirect; 4642 PendingLocs.push_back( 4643 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 4644 PendingArgFlags.push_back(ArgFlags); 4645 if (!ArgFlags.isSplitEnd()) { 4646 return false; 4647 } 4648 } 4649 4650 // If the split argument only had two elements, it should be passed directly 4651 // in registers or on the stack. 4652 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 4653 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 4654 // Apply the normal calling convention rules to the first half of the 4655 // split argument. 4656 CCValAssign VA = PendingLocs[0]; 4657 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 4658 PendingLocs.clear(); 4659 PendingArgFlags.clear(); 4660 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 4661 ArgFlags); 4662 } 4663 4664 // Allocate to a register if possible, or else a stack slot. 4665 Register Reg; 4666 if (ValVT == MVT::f16 && !UseGPRForF16_F32) 4667 Reg = State.AllocateReg(ArgFPR16s); 4668 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 4669 Reg = State.AllocateReg(ArgFPR32s); 4670 else if (ValVT == MVT::f64 && !UseGPRForF64) 4671 Reg = State.AllocateReg(ArgFPR64s); 4672 else if (ValVT.isScalableVector()) { 4673 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 4674 if (RC == &RISCV::VRRegClass) { 4675 // Assign the first mask argument to V0. 4676 // This is an interim calling convention and it may be changed in the 4677 // future. 4678 if (FirstMaskArgument.hasValue() && 4679 ValNo == FirstMaskArgument.getValue()) { 4680 Reg = State.AllocateReg(RISCV::V0); 4681 } else { 4682 Reg = State.AllocateReg(ArgVRs); 4683 } 4684 } else if (RC == &RISCV::VRM2RegClass) { 4685 Reg = State.AllocateReg(ArgVRM2s); 4686 } else if (RC == &RISCV::VRM4RegClass) { 4687 Reg = State.AllocateReg(ArgVRM4s); 4688 } else if (RC == &RISCV::VRM8RegClass) { 4689 Reg = State.AllocateReg(ArgVRM8s); 4690 } else { 4691 llvm_unreachable("Unhandled class register for ValueType"); 4692 } 4693 if (!Reg) { 4694 LocInfo = CCValAssign::Indirect; 4695 // Try using a GPR to pass the address 4696 Reg = State.AllocateReg(ArgGPRs); 4697 LocVT = XLenVT; 4698 } 4699 } else 4700 Reg = State.AllocateReg(ArgGPRs); 4701 unsigned StackOffset = 4702 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); 4703 4704 // If we reach this point and PendingLocs is non-empty, we must be at the 4705 // end of a split argument that must be passed indirectly. 4706 if (!PendingLocs.empty()) { 4707 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 4708 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 4709 4710 for (auto &It : PendingLocs) { 4711 if (Reg) 4712 It.convertToReg(Reg); 4713 else 4714 It.convertToMem(StackOffset); 4715 State.addLoc(It); 4716 } 4717 PendingLocs.clear(); 4718 PendingArgFlags.clear(); 4719 return false; 4720 } 4721 4722 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 4723 (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) && 4724 "Expected an XLenVT or scalable vector types at this stage"); 4725 4726 if (Reg) { 4727 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4728 return false; 4729 } 4730 4731 // When a floating-point value is passed on the stack, no bit-conversion is 4732 // needed. 4733 if (ValVT.isFloatingPoint()) { 4734 LocVT = ValVT; 4735 LocInfo = CCValAssign::Full; 4736 } 4737 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 4738 return false; 4739 } 4740 4741 template <typename ArgTy> 4742 static Optional<unsigned> preAssignMask(const ArgTy &Args) { 4743 for (const auto &ArgIdx : enumerate(Args)) { 4744 MVT ArgVT = ArgIdx.value().VT; 4745 if (ArgVT.isScalableVector() && 4746 ArgVT.getVectorElementType().SimpleTy == MVT::i1) 4747 return ArgIdx.index(); 4748 } 4749 return None; 4750 } 4751 4752 void RISCVTargetLowering::analyzeInputArgs( 4753 MachineFunction &MF, CCState &CCInfo, 4754 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 4755 unsigned NumArgs = Ins.size(); 4756 FunctionType *FType = MF.getFunction().getFunctionType(); 4757 4758 Optional<unsigned> FirstMaskArgument; 4759 if (Subtarget.hasStdExtV()) 4760 FirstMaskArgument = preAssignMask(Ins); 4761 4762 for (unsigned i = 0; i != NumArgs; ++i) { 4763 MVT ArgVT = Ins[i].VT; 4764 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 4765 4766 Type *ArgTy = nullptr; 4767 if (IsRet) 4768 ArgTy = FType->getReturnType(); 4769 else if (Ins[i].isOrigArg()) 4770 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 4771 4772 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 4773 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 4774 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 4775 FirstMaskArgument)) { 4776 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 4777 << EVT(ArgVT).getEVTString() << '\n'); 4778 llvm_unreachable(nullptr); 4779 } 4780 } 4781 } 4782 4783 void RISCVTargetLowering::analyzeOutputArgs( 4784 MachineFunction &MF, CCState &CCInfo, 4785 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 4786 CallLoweringInfo *CLI) const { 4787 unsigned NumArgs = Outs.size(); 4788 4789 Optional<unsigned> FirstMaskArgument; 4790 if (Subtarget.hasStdExtV()) 4791 FirstMaskArgument = preAssignMask(Outs); 4792 4793 for (unsigned i = 0; i != NumArgs; i++) { 4794 MVT ArgVT = Outs[i].VT; 4795 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 4796 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 4797 4798 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 4799 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 4800 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 4801 FirstMaskArgument)) { 4802 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 4803 << EVT(ArgVT).getEVTString() << "\n"); 4804 llvm_unreachable(nullptr); 4805 } 4806 } 4807 } 4808 4809 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 4810 // values. 4811 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 4812 const CCValAssign &VA, const SDLoc &DL) { 4813 switch (VA.getLocInfo()) { 4814 default: 4815 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 4816 case CCValAssign::Full: 4817 break; 4818 case CCValAssign::BCvt: 4819 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 4820 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); 4821 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 4822 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 4823 else 4824 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 4825 break; 4826 } 4827 return Val; 4828 } 4829 4830 // The caller is responsible for loading the full value if the argument is 4831 // passed with CCValAssign::Indirect. 4832 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 4833 const CCValAssign &VA, const SDLoc &DL, 4834 const RISCVTargetLowering &TLI) { 4835 MachineFunction &MF = DAG.getMachineFunction(); 4836 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4837 EVT LocVT = VA.getLocVT(); 4838 SDValue Val; 4839 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 4840 Register VReg = RegInfo.createVirtualRegister(RC); 4841 RegInfo.addLiveIn(VA.getLocReg(), VReg); 4842 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 4843 4844 if (VA.getLocInfo() == CCValAssign::Indirect) 4845 return Val; 4846 4847 return convertLocVTToValVT(DAG, Val, VA, DL); 4848 } 4849 4850 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 4851 const CCValAssign &VA, const SDLoc &DL) { 4852 EVT LocVT = VA.getLocVT(); 4853 4854 switch (VA.getLocInfo()) { 4855 default: 4856 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 4857 case CCValAssign::Full: 4858 break; 4859 case CCValAssign::BCvt: 4860 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 4861 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 4862 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 4863 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 4864 else 4865 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 4866 break; 4867 } 4868 return Val; 4869 } 4870 4871 // The caller is responsible for loading the full value if the argument is 4872 // passed with CCValAssign::Indirect. 4873 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 4874 const CCValAssign &VA, const SDLoc &DL) { 4875 MachineFunction &MF = DAG.getMachineFunction(); 4876 MachineFrameInfo &MFI = MF.getFrameInfo(); 4877 EVT LocVT = VA.getLocVT(); 4878 EVT ValVT = VA.getValVT(); 4879 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 4880 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 4881 VA.getLocMemOffset(), /*Immutable=*/true); 4882 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 4883 SDValue Val; 4884 4885 ISD::LoadExtType ExtType; 4886 switch (VA.getLocInfo()) { 4887 default: 4888 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 4889 case CCValAssign::Full: 4890 case CCValAssign::Indirect: 4891 case CCValAssign::BCvt: 4892 ExtType = ISD::NON_EXTLOAD; 4893 break; 4894 } 4895 Val = DAG.getExtLoad( 4896 ExtType, DL, LocVT, Chain, FIN, 4897 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 4898 return Val; 4899 } 4900 4901 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 4902 const CCValAssign &VA, const SDLoc &DL) { 4903 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 4904 "Unexpected VA"); 4905 MachineFunction &MF = DAG.getMachineFunction(); 4906 MachineFrameInfo &MFI = MF.getFrameInfo(); 4907 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4908 4909 if (VA.isMemLoc()) { 4910 // f64 is passed on the stack. 4911 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 4912 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 4913 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 4914 MachinePointerInfo::getFixedStack(MF, FI)); 4915 } 4916 4917 assert(VA.isRegLoc() && "Expected register VA assignment"); 4918 4919 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 4920 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 4921 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 4922 SDValue Hi; 4923 if (VA.getLocReg() == RISCV::X17) { 4924 // Second half of f64 is passed on the stack. 4925 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 4926 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 4927 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 4928 MachinePointerInfo::getFixedStack(MF, FI)); 4929 } else { 4930 // Second half of f64 is passed in another GPR. 4931 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 4932 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 4933 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 4934 } 4935 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 4936 } 4937 4938 // FastCC has less than 1% performance improvement for some particular 4939 // benchmark. But theoretically, it may has benenfit for some cases. 4940 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, 4941 CCValAssign::LocInfo LocInfo, 4942 ISD::ArgFlagsTy ArgFlags, CCState &State) { 4943 4944 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 4945 // X5 and X6 might be used for save-restore libcall. 4946 static const MCPhysReg GPRList[] = { 4947 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 4948 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 4949 RISCV::X29, RISCV::X30, RISCV::X31}; 4950 if (unsigned Reg = State.AllocateReg(GPRList)) { 4951 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4952 return false; 4953 } 4954 } 4955 4956 if (LocVT == MVT::f16) { 4957 static const MCPhysReg FPR16List[] = { 4958 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 4959 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 4960 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 4961 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 4962 if (unsigned Reg = State.AllocateReg(FPR16List)) { 4963 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4964 return false; 4965 } 4966 } 4967 4968 if (LocVT == MVT::f32) { 4969 static const MCPhysReg FPR32List[] = { 4970 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 4971 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 4972 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 4973 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 4974 if (unsigned Reg = State.AllocateReg(FPR32List)) { 4975 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4976 return false; 4977 } 4978 } 4979 4980 if (LocVT == MVT::f64) { 4981 static const MCPhysReg FPR64List[] = { 4982 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 4983 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 4984 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 4985 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 4986 if (unsigned Reg = State.AllocateReg(FPR64List)) { 4987 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4988 return false; 4989 } 4990 } 4991 4992 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 4993 unsigned Offset4 = State.AllocateStack(4, Align(4)); 4994 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 4995 return false; 4996 } 4997 4998 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 4999 unsigned Offset5 = State.AllocateStack(8, Align(8)); 5000 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 5001 return false; 5002 } 5003 5004 return true; // CC didn't match. 5005 } 5006 5007 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 5008 CCValAssign::LocInfo LocInfo, 5009 ISD::ArgFlagsTy ArgFlags, CCState &State) { 5010 5011 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 5012 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 5013 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 5014 static const MCPhysReg GPRList[] = { 5015 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 5016 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 5017 if (unsigned Reg = State.AllocateReg(GPRList)) { 5018 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5019 return false; 5020 } 5021 } 5022 5023 if (LocVT == MVT::f32) { 5024 // Pass in STG registers: F1, ..., F6 5025 // fs0 ... fs5 5026 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 5027 RISCV::F18_F, RISCV::F19_F, 5028 RISCV::F20_F, RISCV::F21_F}; 5029 if (unsigned Reg = State.AllocateReg(FPR32List)) { 5030 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5031 return false; 5032 } 5033 } 5034 5035 if (LocVT == MVT::f64) { 5036 // Pass in STG registers: D1, ..., D6 5037 // fs6 ... fs11 5038 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 5039 RISCV::F24_D, RISCV::F25_D, 5040 RISCV::F26_D, RISCV::F27_D}; 5041 if (unsigned Reg = State.AllocateReg(FPR64List)) { 5042 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5043 return false; 5044 } 5045 } 5046 5047 report_fatal_error("No registers left in GHC calling convention"); 5048 return true; 5049 } 5050 5051 // Transform physical registers into virtual registers. 5052 SDValue RISCVTargetLowering::LowerFormalArguments( 5053 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 5054 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 5055 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 5056 5057 MachineFunction &MF = DAG.getMachineFunction(); 5058 5059 switch (CallConv) { 5060 default: 5061 report_fatal_error("Unsupported calling convention"); 5062 case CallingConv::C: 5063 case CallingConv::Fast: 5064 break; 5065 case CallingConv::GHC: 5066 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || 5067 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) 5068 report_fatal_error( 5069 "GHC calling convention requires the F and D instruction set extensions"); 5070 } 5071 5072 const Function &Func = MF.getFunction(); 5073 if (Func.hasFnAttribute("interrupt")) { 5074 if (!Func.arg_empty()) 5075 report_fatal_error( 5076 "Functions with the interrupt attribute cannot have arguments!"); 5077 5078 StringRef Kind = 5079 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 5080 5081 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 5082 report_fatal_error( 5083 "Function interrupt attribute argument not supported!"); 5084 } 5085 5086 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5087 MVT XLenVT = Subtarget.getXLenVT(); 5088 unsigned XLenInBytes = Subtarget.getXLen() / 8; 5089 // Used with vargs to acumulate store chains. 5090 std::vector<SDValue> OutChains; 5091 5092 // Assign locations to all of the incoming arguments. 5093 SmallVector<CCValAssign, 16> ArgLocs; 5094 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 5095 5096 if (CallConv == CallingConv::Fast) 5097 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); 5098 else if (CallConv == CallingConv::GHC) 5099 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); 5100 else 5101 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 5102 5103 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 5104 CCValAssign &VA = ArgLocs[i]; 5105 SDValue ArgValue; 5106 // Passing f64 on RV32D with a soft float ABI must be handled as a special 5107 // case. 5108 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 5109 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 5110 else if (VA.isRegLoc()) 5111 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 5112 else 5113 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 5114 5115 if (VA.getLocInfo() == CCValAssign::Indirect) { 5116 // If the original argument was split and passed by reference (e.g. i128 5117 // on RV32), we need to load all parts of it here (using the same 5118 // address). 5119 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 5120 MachinePointerInfo())); 5121 unsigned ArgIndex = Ins[i].OrigArgIndex; 5122 assert(Ins[i].PartOffset == 0); 5123 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 5124 CCValAssign &PartVA = ArgLocs[i + 1]; 5125 unsigned PartOffset = Ins[i + 1].PartOffset; 5126 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 5127 DAG.getIntPtrConstant(PartOffset, DL)); 5128 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 5129 MachinePointerInfo())); 5130 ++i; 5131 } 5132 continue; 5133 } 5134 InVals.push_back(ArgValue); 5135 } 5136 5137 if (IsVarArg) { 5138 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 5139 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 5140 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 5141 MachineFrameInfo &MFI = MF.getFrameInfo(); 5142 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5143 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 5144 5145 // Offset of the first variable argument from stack pointer, and size of 5146 // the vararg save area. For now, the varargs save area is either zero or 5147 // large enough to hold a0-a7. 5148 int VaArgOffset, VarArgsSaveSize; 5149 5150 // If all registers are allocated, then all varargs must be passed on the 5151 // stack and we don't need to save any argregs. 5152 if (ArgRegs.size() == Idx) { 5153 VaArgOffset = CCInfo.getNextStackOffset(); 5154 VarArgsSaveSize = 0; 5155 } else { 5156 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 5157 VaArgOffset = -VarArgsSaveSize; 5158 } 5159 5160 // Record the frame index of the first variable argument 5161 // which is a value necessary to VASTART. 5162 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 5163 RVFI->setVarArgsFrameIndex(FI); 5164 5165 // If saving an odd number of registers then create an extra stack slot to 5166 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 5167 // offsets to even-numbered registered remain 2*XLEN-aligned. 5168 if (Idx % 2) { 5169 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 5170 VarArgsSaveSize += XLenInBytes; 5171 } 5172 5173 // Copy the integer registers that may have been used for passing varargs 5174 // to the vararg save area. 5175 for (unsigned I = Idx; I < ArgRegs.size(); 5176 ++I, VaArgOffset += XLenInBytes) { 5177 const Register Reg = RegInfo.createVirtualRegister(RC); 5178 RegInfo.addLiveIn(ArgRegs[I], Reg); 5179 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 5180 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 5181 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 5182 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 5183 MachinePointerInfo::getFixedStack(MF, FI)); 5184 cast<StoreSDNode>(Store.getNode()) 5185 ->getMemOperand() 5186 ->setValue((Value *)nullptr); 5187 OutChains.push_back(Store); 5188 } 5189 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 5190 } 5191 5192 // All stores are grouped in one node to allow the matching between 5193 // the size of Ins and InVals. This only happens for vararg functions. 5194 if (!OutChains.empty()) { 5195 OutChains.push_back(Chain); 5196 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 5197 } 5198 5199 return Chain; 5200 } 5201 5202 /// isEligibleForTailCallOptimization - Check whether the call is eligible 5203 /// for tail call optimization. 5204 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 5205 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 5206 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 5207 const SmallVector<CCValAssign, 16> &ArgLocs) const { 5208 5209 auto &Callee = CLI.Callee; 5210 auto CalleeCC = CLI.CallConv; 5211 auto &Outs = CLI.Outs; 5212 auto &Caller = MF.getFunction(); 5213 auto CallerCC = Caller.getCallingConv(); 5214 5215 // Exception-handling functions need a special set of instructions to 5216 // indicate a return to the hardware. Tail-calling another function would 5217 // probably break this. 5218 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 5219 // should be expanded as new function attributes are introduced. 5220 if (Caller.hasFnAttribute("interrupt")) 5221 return false; 5222 5223 // Do not tail call opt if the stack is used to pass parameters. 5224 if (CCInfo.getNextStackOffset() != 0) 5225 return false; 5226 5227 // Do not tail call opt if any parameters need to be passed indirectly. 5228 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 5229 // passed indirectly. So the address of the value will be passed in a 5230 // register, or if not available, then the address is put on the stack. In 5231 // order to pass indirectly, space on the stack often needs to be allocated 5232 // in order to store the value. In this case the CCInfo.getNextStackOffset() 5233 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 5234 // are passed CCValAssign::Indirect. 5235 for (auto &VA : ArgLocs) 5236 if (VA.getLocInfo() == CCValAssign::Indirect) 5237 return false; 5238 5239 // Do not tail call opt if either caller or callee uses struct return 5240 // semantics. 5241 auto IsCallerStructRet = Caller.hasStructRetAttr(); 5242 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 5243 if (IsCallerStructRet || IsCalleeStructRet) 5244 return false; 5245 5246 // Externally-defined functions with weak linkage should not be 5247 // tail-called. The behaviour of branch instructions in this situation (as 5248 // used for tail calls) is implementation-defined, so we cannot rely on the 5249 // linker replacing the tail call with a return. 5250 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 5251 const GlobalValue *GV = G->getGlobal(); 5252 if (GV->hasExternalWeakLinkage()) 5253 return false; 5254 } 5255 5256 // The callee has to preserve all registers the caller needs to preserve. 5257 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 5258 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 5259 if (CalleeCC != CallerCC) { 5260 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 5261 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 5262 return false; 5263 } 5264 5265 // Byval parameters hand the function a pointer directly into the stack area 5266 // we want to reuse during a tail call. Working around this *is* possible 5267 // but less efficient and uglier in LowerCall. 5268 for (auto &Arg : Outs) 5269 if (Arg.Flags.isByVal()) 5270 return false; 5271 5272 return true; 5273 } 5274 5275 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 5276 // and output parameter nodes. 5277 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 5278 SmallVectorImpl<SDValue> &InVals) const { 5279 SelectionDAG &DAG = CLI.DAG; 5280 SDLoc &DL = CLI.DL; 5281 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 5282 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 5283 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 5284 SDValue Chain = CLI.Chain; 5285 SDValue Callee = CLI.Callee; 5286 bool &IsTailCall = CLI.IsTailCall; 5287 CallingConv::ID CallConv = CLI.CallConv; 5288 bool IsVarArg = CLI.IsVarArg; 5289 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5290 MVT XLenVT = Subtarget.getXLenVT(); 5291 5292 MachineFunction &MF = DAG.getMachineFunction(); 5293 5294 // Analyze the operands of the call, assigning locations to each operand. 5295 SmallVector<CCValAssign, 16> ArgLocs; 5296 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 5297 5298 if (CallConv == CallingConv::Fast) 5299 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); 5300 else if (CallConv == CallingConv::GHC) 5301 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); 5302 else 5303 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 5304 5305 // Check if it's really possible to do a tail call. 5306 if (IsTailCall) 5307 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 5308 5309 if (IsTailCall) 5310 ++NumTailCalls; 5311 else if (CLI.CB && CLI.CB->isMustTailCall()) 5312 report_fatal_error("failed to perform tail call elimination on a call " 5313 "site marked musttail"); 5314 5315 // Get a count of how many bytes are to be pushed on the stack. 5316 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 5317 5318 // Create local copies for byval args 5319 SmallVector<SDValue, 8> ByValArgs; 5320 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 5321 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5322 if (!Flags.isByVal()) 5323 continue; 5324 5325 SDValue Arg = OutVals[i]; 5326 unsigned Size = Flags.getByValSize(); 5327 Align Alignment = Flags.getNonZeroByValAlign(); 5328 5329 int FI = 5330 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 5331 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 5332 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 5333 5334 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 5335 /*IsVolatile=*/false, 5336 /*AlwaysInline=*/false, IsTailCall, 5337 MachinePointerInfo(), MachinePointerInfo()); 5338 ByValArgs.push_back(FIPtr); 5339 } 5340 5341 if (!IsTailCall) 5342 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 5343 5344 // Copy argument values to their designated locations. 5345 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 5346 SmallVector<SDValue, 8> MemOpChains; 5347 SDValue StackPtr; 5348 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 5349 CCValAssign &VA = ArgLocs[i]; 5350 SDValue ArgValue = OutVals[i]; 5351 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5352 5353 // Handle passing f64 on RV32D with a soft float ABI as a special case. 5354 bool IsF64OnRV32DSoftABI = 5355 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 5356 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 5357 SDValue SplitF64 = DAG.getNode( 5358 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 5359 SDValue Lo = SplitF64.getValue(0); 5360 SDValue Hi = SplitF64.getValue(1); 5361 5362 Register RegLo = VA.getLocReg(); 5363 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 5364 5365 if (RegLo == RISCV::X17) { 5366 // Second half of f64 is passed on the stack. 5367 // Work out the address of the stack slot. 5368 if (!StackPtr.getNode()) 5369 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 5370 // Emit the store. 5371 MemOpChains.push_back( 5372 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 5373 } else { 5374 // Second half of f64 is passed in another GPR. 5375 assert(RegLo < RISCV::X31 && "Invalid register pair"); 5376 Register RegHigh = RegLo + 1; 5377 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 5378 } 5379 continue; 5380 } 5381 5382 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 5383 // as any other MemLoc. 5384 5385 // Promote the value if needed. 5386 // For now, only handle fully promoted and indirect arguments. 5387 if (VA.getLocInfo() == CCValAssign::Indirect) { 5388 // Store the argument in a stack slot and pass its address. 5389 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 5390 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 5391 MemOpChains.push_back( 5392 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 5393 MachinePointerInfo::getFixedStack(MF, FI))); 5394 // If the original argument was split (e.g. i128), we need 5395 // to store all parts of it here (and pass just one address). 5396 unsigned ArgIndex = Outs[i].OrigArgIndex; 5397 assert(Outs[i].PartOffset == 0); 5398 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 5399 SDValue PartValue = OutVals[i + 1]; 5400 unsigned PartOffset = Outs[i + 1].PartOffset; 5401 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 5402 DAG.getIntPtrConstant(PartOffset, DL)); 5403 MemOpChains.push_back( 5404 DAG.getStore(Chain, DL, PartValue, Address, 5405 MachinePointerInfo::getFixedStack(MF, FI))); 5406 ++i; 5407 } 5408 ArgValue = SpillSlot; 5409 } else { 5410 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 5411 } 5412 5413 // Use local copy if it is a byval arg. 5414 if (Flags.isByVal()) 5415 ArgValue = ByValArgs[j++]; 5416 5417 if (VA.isRegLoc()) { 5418 // Queue up the argument copies and emit them at the end. 5419 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 5420 } else { 5421 assert(VA.isMemLoc() && "Argument not register or memory"); 5422 assert(!IsTailCall && "Tail call not allowed if stack is used " 5423 "for passing parameters"); 5424 5425 // Work out the address of the stack slot. 5426 if (!StackPtr.getNode()) 5427 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 5428 SDValue Address = 5429 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 5430 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 5431 5432 // Emit the store. 5433 MemOpChains.push_back( 5434 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 5435 } 5436 } 5437 5438 // Join the stores, which are independent of one another. 5439 if (!MemOpChains.empty()) 5440 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 5441 5442 SDValue Glue; 5443 5444 // Build a sequence of copy-to-reg nodes, chained and glued together. 5445 for (auto &Reg : RegsToPass) { 5446 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 5447 Glue = Chain.getValue(1); 5448 } 5449 5450 // Validate that none of the argument registers have been marked as 5451 // reserved, if so report an error. Do the same for the return address if this 5452 // is not a tailcall. 5453 validateCCReservedRegs(RegsToPass, MF); 5454 if (!IsTailCall && 5455 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 5456 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 5457 MF.getFunction(), 5458 "Return address register required, but has been reserved."}); 5459 5460 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 5461 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 5462 // split it and then direct call can be matched by PseudoCALL. 5463 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 5464 const GlobalValue *GV = S->getGlobal(); 5465 5466 unsigned OpFlags = RISCVII::MO_CALL; 5467 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 5468 OpFlags = RISCVII::MO_PLT; 5469 5470 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 5471 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 5472 unsigned OpFlags = RISCVII::MO_CALL; 5473 5474 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 5475 nullptr)) 5476 OpFlags = RISCVII::MO_PLT; 5477 5478 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 5479 } 5480 5481 // The first call operand is the chain and the second is the target address. 5482 SmallVector<SDValue, 8> Ops; 5483 Ops.push_back(Chain); 5484 Ops.push_back(Callee); 5485 5486 // Add argument registers to the end of the list so that they are 5487 // known live into the call. 5488 for (auto &Reg : RegsToPass) 5489 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 5490 5491 if (!IsTailCall) { 5492 // Add a register mask operand representing the call-preserved registers. 5493 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 5494 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 5495 assert(Mask && "Missing call preserved mask for calling convention"); 5496 Ops.push_back(DAG.getRegisterMask(Mask)); 5497 } 5498 5499 // Glue the call to the argument copies, if any. 5500 if (Glue.getNode()) 5501 Ops.push_back(Glue); 5502 5503 // Emit the call. 5504 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 5505 5506 if (IsTailCall) { 5507 MF.getFrameInfo().setHasTailCall(); 5508 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 5509 } 5510 5511 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 5512 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 5513 Glue = Chain.getValue(1); 5514 5515 // Mark the end of the call, which is glued to the call itself. 5516 Chain = DAG.getCALLSEQ_END(Chain, 5517 DAG.getConstant(NumBytes, DL, PtrVT, true), 5518 DAG.getConstant(0, DL, PtrVT, true), 5519 Glue, DL); 5520 Glue = Chain.getValue(1); 5521 5522 // Assign locations to each value returned by this call. 5523 SmallVector<CCValAssign, 16> RVLocs; 5524 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 5525 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 5526 5527 // Copy all of the result registers out of their specified physreg. 5528 for (auto &VA : RVLocs) { 5529 // Copy the value out 5530 SDValue RetValue = 5531 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 5532 // Glue the RetValue to the end of the call sequence 5533 Chain = RetValue.getValue(1); 5534 Glue = RetValue.getValue(2); 5535 5536 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 5537 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 5538 SDValue RetValue2 = 5539 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 5540 Chain = RetValue2.getValue(1); 5541 Glue = RetValue2.getValue(2); 5542 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 5543 RetValue2); 5544 } 5545 5546 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 5547 5548 InVals.push_back(RetValue); 5549 } 5550 5551 return Chain; 5552 } 5553 5554 bool RISCVTargetLowering::CanLowerReturn( 5555 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 5556 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 5557 SmallVector<CCValAssign, 16> RVLocs; 5558 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 5559 5560 Optional<unsigned> FirstMaskArgument; 5561 if (Subtarget.hasStdExtV()) 5562 FirstMaskArgument = preAssignMask(Outs); 5563 5564 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 5565 MVT VT = Outs[i].VT; 5566 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 5567 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 5568 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 5569 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 5570 *this, FirstMaskArgument)) 5571 return false; 5572 } 5573 return true; 5574 } 5575 5576 SDValue 5577 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 5578 bool IsVarArg, 5579 const SmallVectorImpl<ISD::OutputArg> &Outs, 5580 const SmallVectorImpl<SDValue> &OutVals, 5581 const SDLoc &DL, SelectionDAG &DAG) const { 5582 const MachineFunction &MF = DAG.getMachineFunction(); 5583 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 5584 5585 // Stores the assignment of the return value to a location. 5586 SmallVector<CCValAssign, 16> RVLocs; 5587 5588 // Info about the registers and stack slot. 5589 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 5590 *DAG.getContext()); 5591 5592 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 5593 nullptr); 5594 5595 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 5596 report_fatal_error("GHC functions return void only"); 5597 5598 SDValue Glue; 5599 SmallVector<SDValue, 4> RetOps(1, Chain); 5600 5601 // Copy the result values into the output registers. 5602 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 5603 SDValue Val = OutVals[i]; 5604 CCValAssign &VA = RVLocs[i]; 5605 assert(VA.isRegLoc() && "Can only return in registers!"); 5606 5607 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 5608 // Handle returning f64 on RV32D with a soft float ABI. 5609 assert(VA.isRegLoc() && "Expected return via registers"); 5610 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 5611 DAG.getVTList(MVT::i32, MVT::i32), Val); 5612 SDValue Lo = SplitF64.getValue(0); 5613 SDValue Hi = SplitF64.getValue(1); 5614 Register RegLo = VA.getLocReg(); 5615 assert(RegLo < RISCV::X31 && "Invalid register pair"); 5616 Register RegHi = RegLo + 1; 5617 5618 if (STI.isRegisterReservedByUser(RegLo) || 5619 STI.isRegisterReservedByUser(RegHi)) 5620 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 5621 MF.getFunction(), 5622 "Return value register required, but has been reserved."}); 5623 5624 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 5625 Glue = Chain.getValue(1); 5626 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 5627 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 5628 Glue = Chain.getValue(1); 5629 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 5630 } else { 5631 // Handle a 'normal' return. 5632 Val = convertValVTToLocVT(DAG, Val, VA, DL); 5633 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 5634 5635 if (STI.isRegisterReservedByUser(VA.getLocReg())) 5636 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 5637 MF.getFunction(), 5638 "Return value register required, but has been reserved."}); 5639 5640 // Guarantee that all emitted copies are stuck together. 5641 Glue = Chain.getValue(1); 5642 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 5643 } 5644 } 5645 5646 RetOps[0] = Chain; // Update chain. 5647 5648 // Add the glue node if we have it. 5649 if (Glue.getNode()) { 5650 RetOps.push_back(Glue); 5651 } 5652 5653 // Interrupt service routines use different return instructions. 5654 const Function &Func = DAG.getMachineFunction().getFunction(); 5655 if (Func.hasFnAttribute("interrupt")) { 5656 if (!Func.getReturnType()->isVoidTy()) 5657 report_fatal_error( 5658 "Functions with the interrupt attribute must have void return type!"); 5659 5660 MachineFunction &MF = DAG.getMachineFunction(); 5661 StringRef Kind = 5662 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 5663 5664 unsigned RetOpc; 5665 if (Kind == "user") 5666 RetOpc = RISCVISD::URET_FLAG; 5667 else if (Kind == "supervisor") 5668 RetOpc = RISCVISD::SRET_FLAG; 5669 else 5670 RetOpc = RISCVISD::MRET_FLAG; 5671 5672 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 5673 } 5674 5675 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 5676 } 5677 5678 void RISCVTargetLowering::validateCCReservedRegs( 5679 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 5680 MachineFunction &MF) const { 5681 const Function &F = MF.getFunction(); 5682 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 5683 5684 if (llvm::any_of(Regs, [&STI](auto Reg) { 5685 return STI.isRegisterReservedByUser(Reg.first); 5686 })) 5687 F.getContext().diagnose(DiagnosticInfoUnsupported{ 5688 F, "Argument register required, but has been reserved."}); 5689 } 5690 5691 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 5692 return CI->isTailCall(); 5693 } 5694 5695 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 5696 #define NODE_NAME_CASE(NODE) \ 5697 case RISCVISD::NODE: \ 5698 return "RISCVISD::" #NODE; 5699 // clang-format off 5700 switch ((RISCVISD::NodeType)Opcode) { 5701 case RISCVISD::FIRST_NUMBER: 5702 break; 5703 NODE_NAME_CASE(RET_FLAG) 5704 NODE_NAME_CASE(URET_FLAG) 5705 NODE_NAME_CASE(SRET_FLAG) 5706 NODE_NAME_CASE(MRET_FLAG) 5707 NODE_NAME_CASE(CALL) 5708 NODE_NAME_CASE(SELECT_CC) 5709 NODE_NAME_CASE(BuildPairF64) 5710 NODE_NAME_CASE(SplitF64) 5711 NODE_NAME_CASE(TAIL) 5712 NODE_NAME_CASE(SLLW) 5713 NODE_NAME_CASE(SRAW) 5714 NODE_NAME_CASE(SRLW) 5715 NODE_NAME_CASE(DIVW) 5716 NODE_NAME_CASE(DIVUW) 5717 NODE_NAME_CASE(REMUW) 5718 NODE_NAME_CASE(ROLW) 5719 NODE_NAME_CASE(RORW) 5720 NODE_NAME_CASE(FSLW) 5721 NODE_NAME_CASE(FSRW) 5722 NODE_NAME_CASE(FSL) 5723 NODE_NAME_CASE(FSR) 5724 NODE_NAME_CASE(FMV_H_X) 5725 NODE_NAME_CASE(FMV_X_ANYEXTH) 5726 NODE_NAME_CASE(FMV_W_X_RV64) 5727 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 5728 NODE_NAME_CASE(READ_CYCLE_WIDE) 5729 NODE_NAME_CASE(GREVI) 5730 NODE_NAME_CASE(GREVIW) 5731 NODE_NAME_CASE(GORCI) 5732 NODE_NAME_CASE(GORCIW) 5733 NODE_NAME_CASE(SHFLI) 5734 NODE_NAME_CASE(VMV_V_X_VL) 5735 NODE_NAME_CASE(VFMV_V_F_VL) 5736 NODE_NAME_CASE(VMV_X_S) 5737 NODE_NAME_CASE(VMV_S_XF_VL) 5738 NODE_NAME_CASE(SPLAT_VECTOR_I64) 5739 NODE_NAME_CASE(READ_VLENB) 5740 NODE_NAME_CASE(TRUNCATE_VECTOR_VL) 5741 NODE_NAME_CASE(VLEFF) 5742 NODE_NAME_CASE(VLEFF_MASK) 5743 NODE_NAME_CASE(VSLIDEUP_VL) 5744 NODE_NAME_CASE(VSLIDEDOWN_VL) 5745 NODE_NAME_CASE(VID_VL) 5746 NODE_NAME_CASE(VFNCVT_ROD_VL) 5747 NODE_NAME_CASE(VECREDUCE_ADD) 5748 NODE_NAME_CASE(VECREDUCE_UMAX) 5749 NODE_NAME_CASE(VECREDUCE_SMAX) 5750 NODE_NAME_CASE(VECREDUCE_UMIN) 5751 NODE_NAME_CASE(VECREDUCE_SMIN) 5752 NODE_NAME_CASE(VECREDUCE_AND) 5753 NODE_NAME_CASE(VECREDUCE_OR) 5754 NODE_NAME_CASE(VECREDUCE_XOR) 5755 NODE_NAME_CASE(VECREDUCE_FADD) 5756 NODE_NAME_CASE(VECREDUCE_SEQ_FADD) 5757 NODE_NAME_CASE(ADD_VL) 5758 NODE_NAME_CASE(AND_VL) 5759 NODE_NAME_CASE(MUL_VL) 5760 NODE_NAME_CASE(OR_VL) 5761 NODE_NAME_CASE(SDIV_VL) 5762 NODE_NAME_CASE(SHL_VL) 5763 NODE_NAME_CASE(SREM_VL) 5764 NODE_NAME_CASE(SRA_VL) 5765 NODE_NAME_CASE(SRL_VL) 5766 NODE_NAME_CASE(SUB_VL) 5767 NODE_NAME_CASE(UDIV_VL) 5768 NODE_NAME_CASE(UREM_VL) 5769 NODE_NAME_CASE(XOR_VL) 5770 NODE_NAME_CASE(FADD_VL) 5771 NODE_NAME_CASE(FSUB_VL) 5772 NODE_NAME_CASE(FMUL_VL) 5773 NODE_NAME_CASE(FDIV_VL) 5774 NODE_NAME_CASE(FNEG_VL) 5775 NODE_NAME_CASE(FABS_VL) 5776 NODE_NAME_CASE(FSQRT_VL) 5777 NODE_NAME_CASE(FMA_VL) 5778 NODE_NAME_CASE(SMIN_VL) 5779 NODE_NAME_CASE(SMAX_VL) 5780 NODE_NAME_CASE(UMIN_VL) 5781 NODE_NAME_CASE(UMAX_VL) 5782 NODE_NAME_CASE(MULHS_VL) 5783 NODE_NAME_CASE(MULHU_VL) 5784 NODE_NAME_CASE(FP_TO_SINT_VL) 5785 NODE_NAME_CASE(FP_TO_UINT_VL) 5786 NODE_NAME_CASE(SINT_TO_FP_VL) 5787 NODE_NAME_CASE(UINT_TO_FP_VL) 5788 NODE_NAME_CASE(FP_EXTEND_VL) 5789 NODE_NAME_CASE(FP_ROUND_VL) 5790 NODE_NAME_CASE(SETCC_VL) 5791 NODE_NAME_CASE(VSELECT_VL) 5792 NODE_NAME_CASE(VMAND_VL) 5793 NODE_NAME_CASE(VMOR_VL) 5794 NODE_NAME_CASE(VMXOR_VL) 5795 NODE_NAME_CASE(VMCLR_VL) 5796 NODE_NAME_CASE(VMSET_VL) 5797 NODE_NAME_CASE(VRGATHER_VX_VL) 5798 NODE_NAME_CASE(VSEXT_VL) 5799 NODE_NAME_CASE(VZEXT_VL) 5800 NODE_NAME_CASE(VLE_VL) 5801 NODE_NAME_CASE(VSE_VL) 5802 } 5803 // clang-format on 5804 return nullptr; 5805 #undef NODE_NAME_CASE 5806 } 5807 5808 /// getConstraintType - Given a constraint letter, return the type of 5809 /// constraint it is for this target. 5810 RISCVTargetLowering::ConstraintType 5811 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 5812 if (Constraint.size() == 1) { 5813 switch (Constraint[0]) { 5814 default: 5815 break; 5816 case 'f': 5817 return C_RegisterClass; 5818 case 'I': 5819 case 'J': 5820 case 'K': 5821 return C_Immediate; 5822 case 'A': 5823 return C_Memory; 5824 } 5825 } 5826 return TargetLowering::getConstraintType(Constraint); 5827 } 5828 5829 std::pair<unsigned, const TargetRegisterClass *> 5830 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 5831 StringRef Constraint, 5832 MVT VT) const { 5833 // First, see if this is a constraint that directly corresponds to a 5834 // RISCV register class. 5835 if (Constraint.size() == 1) { 5836 switch (Constraint[0]) { 5837 case 'r': 5838 return std::make_pair(0U, &RISCV::GPRRegClass); 5839 case 'f': 5840 if (Subtarget.hasStdExtZfh() && VT == MVT::f16) 5841 return std::make_pair(0U, &RISCV::FPR16RegClass); 5842 if (Subtarget.hasStdExtF() && VT == MVT::f32) 5843 return std::make_pair(0U, &RISCV::FPR32RegClass); 5844 if (Subtarget.hasStdExtD() && VT == MVT::f64) 5845 return std::make_pair(0U, &RISCV::FPR64RegClass); 5846 break; 5847 default: 5848 break; 5849 } 5850 } 5851 5852 // Clang will correctly decode the usage of register name aliases into their 5853 // official names. However, other frontends like `rustc` do not. This allows 5854 // users of these frontends to use the ABI names for registers in LLVM-style 5855 // register constraints. 5856 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 5857 .Case("{zero}", RISCV::X0) 5858 .Case("{ra}", RISCV::X1) 5859 .Case("{sp}", RISCV::X2) 5860 .Case("{gp}", RISCV::X3) 5861 .Case("{tp}", RISCV::X4) 5862 .Case("{t0}", RISCV::X5) 5863 .Case("{t1}", RISCV::X6) 5864 .Case("{t2}", RISCV::X7) 5865 .Cases("{s0}", "{fp}", RISCV::X8) 5866 .Case("{s1}", RISCV::X9) 5867 .Case("{a0}", RISCV::X10) 5868 .Case("{a1}", RISCV::X11) 5869 .Case("{a2}", RISCV::X12) 5870 .Case("{a3}", RISCV::X13) 5871 .Case("{a4}", RISCV::X14) 5872 .Case("{a5}", RISCV::X15) 5873 .Case("{a6}", RISCV::X16) 5874 .Case("{a7}", RISCV::X17) 5875 .Case("{s2}", RISCV::X18) 5876 .Case("{s3}", RISCV::X19) 5877 .Case("{s4}", RISCV::X20) 5878 .Case("{s5}", RISCV::X21) 5879 .Case("{s6}", RISCV::X22) 5880 .Case("{s7}", RISCV::X23) 5881 .Case("{s8}", RISCV::X24) 5882 .Case("{s9}", RISCV::X25) 5883 .Case("{s10}", RISCV::X26) 5884 .Case("{s11}", RISCV::X27) 5885 .Case("{t3}", RISCV::X28) 5886 .Case("{t4}", RISCV::X29) 5887 .Case("{t5}", RISCV::X30) 5888 .Case("{t6}", RISCV::X31) 5889 .Default(RISCV::NoRegister); 5890 if (XRegFromAlias != RISCV::NoRegister) 5891 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 5892 5893 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 5894 // TableGen record rather than the AsmName to choose registers for InlineAsm 5895 // constraints, plus we want to match those names to the widest floating point 5896 // register type available, manually select floating point registers here. 5897 // 5898 // The second case is the ABI name of the register, so that frontends can also 5899 // use the ABI names in register constraint lists. 5900 if (Subtarget.hasStdExtF()) { 5901 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 5902 .Cases("{f0}", "{ft0}", RISCV::F0_F) 5903 .Cases("{f1}", "{ft1}", RISCV::F1_F) 5904 .Cases("{f2}", "{ft2}", RISCV::F2_F) 5905 .Cases("{f3}", "{ft3}", RISCV::F3_F) 5906 .Cases("{f4}", "{ft4}", RISCV::F4_F) 5907 .Cases("{f5}", "{ft5}", RISCV::F5_F) 5908 .Cases("{f6}", "{ft6}", RISCV::F6_F) 5909 .Cases("{f7}", "{ft7}", RISCV::F7_F) 5910 .Cases("{f8}", "{fs0}", RISCV::F8_F) 5911 .Cases("{f9}", "{fs1}", RISCV::F9_F) 5912 .Cases("{f10}", "{fa0}", RISCV::F10_F) 5913 .Cases("{f11}", "{fa1}", RISCV::F11_F) 5914 .Cases("{f12}", "{fa2}", RISCV::F12_F) 5915 .Cases("{f13}", "{fa3}", RISCV::F13_F) 5916 .Cases("{f14}", "{fa4}", RISCV::F14_F) 5917 .Cases("{f15}", "{fa5}", RISCV::F15_F) 5918 .Cases("{f16}", "{fa6}", RISCV::F16_F) 5919 .Cases("{f17}", "{fa7}", RISCV::F17_F) 5920 .Cases("{f18}", "{fs2}", RISCV::F18_F) 5921 .Cases("{f19}", "{fs3}", RISCV::F19_F) 5922 .Cases("{f20}", "{fs4}", RISCV::F20_F) 5923 .Cases("{f21}", "{fs5}", RISCV::F21_F) 5924 .Cases("{f22}", "{fs6}", RISCV::F22_F) 5925 .Cases("{f23}", "{fs7}", RISCV::F23_F) 5926 .Cases("{f24}", "{fs8}", RISCV::F24_F) 5927 .Cases("{f25}", "{fs9}", RISCV::F25_F) 5928 .Cases("{f26}", "{fs10}", RISCV::F26_F) 5929 .Cases("{f27}", "{fs11}", RISCV::F27_F) 5930 .Cases("{f28}", "{ft8}", RISCV::F28_F) 5931 .Cases("{f29}", "{ft9}", RISCV::F29_F) 5932 .Cases("{f30}", "{ft10}", RISCV::F30_F) 5933 .Cases("{f31}", "{ft11}", RISCV::F31_F) 5934 .Default(RISCV::NoRegister); 5935 if (FReg != RISCV::NoRegister) { 5936 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 5937 if (Subtarget.hasStdExtD()) { 5938 unsigned RegNo = FReg - RISCV::F0_F; 5939 unsigned DReg = RISCV::F0_D + RegNo; 5940 return std::make_pair(DReg, &RISCV::FPR64RegClass); 5941 } 5942 return std::make_pair(FReg, &RISCV::FPR32RegClass); 5943 } 5944 } 5945 5946 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 5947 } 5948 5949 unsigned 5950 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 5951 // Currently only support length 1 constraints. 5952 if (ConstraintCode.size() == 1) { 5953 switch (ConstraintCode[0]) { 5954 case 'A': 5955 return InlineAsm::Constraint_A; 5956 default: 5957 break; 5958 } 5959 } 5960 5961 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 5962 } 5963 5964 void RISCVTargetLowering::LowerAsmOperandForConstraint( 5965 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 5966 SelectionDAG &DAG) const { 5967 // Currently only support length 1 constraints. 5968 if (Constraint.length() == 1) { 5969 switch (Constraint[0]) { 5970 case 'I': 5971 // Validate & create a 12-bit signed immediate operand. 5972 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 5973 uint64_t CVal = C->getSExtValue(); 5974 if (isInt<12>(CVal)) 5975 Ops.push_back( 5976 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 5977 } 5978 return; 5979 case 'J': 5980 // Validate & create an integer zero operand. 5981 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 5982 if (C->getZExtValue() == 0) 5983 Ops.push_back( 5984 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 5985 return; 5986 case 'K': 5987 // Validate & create a 5-bit unsigned immediate operand. 5988 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 5989 uint64_t CVal = C->getZExtValue(); 5990 if (isUInt<5>(CVal)) 5991 Ops.push_back( 5992 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 5993 } 5994 return; 5995 default: 5996 break; 5997 } 5998 } 5999 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 6000 } 6001 6002 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 6003 Instruction *Inst, 6004 AtomicOrdering Ord) const { 6005 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 6006 return Builder.CreateFence(Ord); 6007 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 6008 return Builder.CreateFence(AtomicOrdering::Release); 6009 return nullptr; 6010 } 6011 6012 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 6013 Instruction *Inst, 6014 AtomicOrdering Ord) const { 6015 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 6016 return Builder.CreateFence(AtomicOrdering::Acquire); 6017 return nullptr; 6018 } 6019 6020 TargetLowering::AtomicExpansionKind 6021 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 6022 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 6023 // point operations can't be used in an lr/sc sequence without breaking the 6024 // forward-progress guarantee. 6025 if (AI->isFloatingPointOperation()) 6026 return AtomicExpansionKind::CmpXChg; 6027 6028 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 6029 if (Size == 8 || Size == 16) 6030 return AtomicExpansionKind::MaskedIntrinsic; 6031 return AtomicExpansionKind::None; 6032 } 6033 6034 static Intrinsic::ID 6035 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 6036 if (XLen == 32) { 6037 switch (BinOp) { 6038 default: 6039 llvm_unreachable("Unexpected AtomicRMW BinOp"); 6040 case AtomicRMWInst::Xchg: 6041 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 6042 case AtomicRMWInst::Add: 6043 return Intrinsic::riscv_masked_atomicrmw_add_i32; 6044 case AtomicRMWInst::Sub: 6045 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 6046 case AtomicRMWInst::Nand: 6047 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 6048 case AtomicRMWInst::Max: 6049 return Intrinsic::riscv_masked_atomicrmw_max_i32; 6050 case AtomicRMWInst::Min: 6051 return Intrinsic::riscv_masked_atomicrmw_min_i32; 6052 case AtomicRMWInst::UMax: 6053 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 6054 case AtomicRMWInst::UMin: 6055 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 6056 } 6057 } 6058 6059 if (XLen == 64) { 6060 switch (BinOp) { 6061 default: 6062 llvm_unreachable("Unexpected AtomicRMW BinOp"); 6063 case AtomicRMWInst::Xchg: 6064 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 6065 case AtomicRMWInst::Add: 6066 return Intrinsic::riscv_masked_atomicrmw_add_i64; 6067 case AtomicRMWInst::Sub: 6068 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 6069 case AtomicRMWInst::Nand: 6070 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 6071 case AtomicRMWInst::Max: 6072 return Intrinsic::riscv_masked_atomicrmw_max_i64; 6073 case AtomicRMWInst::Min: 6074 return Intrinsic::riscv_masked_atomicrmw_min_i64; 6075 case AtomicRMWInst::UMax: 6076 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 6077 case AtomicRMWInst::UMin: 6078 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 6079 } 6080 } 6081 6082 llvm_unreachable("Unexpected XLen\n"); 6083 } 6084 6085 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 6086 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 6087 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 6088 unsigned XLen = Subtarget.getXLen(); 6089 Value *Ordering = 6090 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 6091 Type *Tys[] = {AlignedAddr->getType()}; 6092 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 6093 AI->getModule(), 6094 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 6095 6096 if (XLen == 64) { 6097 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 6098 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 6099 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 6100 } 6101 6102 Value *Result; 6103 6104 // Must pass the shift amount needed to sign extend the loaded value prior 6105 // to performing a signed comparison for min/max. ShiftAmt is the number of 6106 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 6107 // is the number of bits to left+right shift the value in order to 6108 // sign-extend. 6109 if (AI->getOperation() == AtomicRMWInst::Min || 6110 AI->getOperation() == AtomicRMWInst::Max) { 6111 const DataLayout &DL = AI->getModule()->getDataLayout(); 6112 unsigned ValWidth = 6113 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 6114 Value *SextShamt = 6115 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 6116 Result = Builder.CreateCall(LrwOpScwLoop, 6117 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 6118 } else { 6119 Result = 6120 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 6121 } 6122 6123 if (XLen == 64) 6124 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 6125 return Result; 6126 } 6127 6128 TargetLowering::AtomicExpansionKind 6129 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 6130 AtomicCmpXchgInst *CI) const { 6131 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 6132 if (Size == 8 || Size == 16) 6133 return AtomicExpansionKind::MaskedIntrinsic; 6134 return AtomicExpansionKind::None; 6135 } 6136 6137 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 6138 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 6139 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 6140 unsigned XLen = Subtarget.getXLen(); 6141 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 6142 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 6143 if (XLen == 64) { 6144 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 6145 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 6146 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 6147 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 6148 } 6149 Type *Tys[] = {AlignedAddr->getType()}; 6150 Function *MaskedCmpXchg = 6151 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 6152 Value *Result = Builder.CreateCall( 6153 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 6154 if (XLen == 64) 6155 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 6156 return Result; 6157 } 6158 6159 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 6160 EVT VT) const { 6161 VT = VT.getScalarType(); 6162 6163 if (!VT.isSimple()) 6164 return false; 6165 6166 switch (VT.getSimpleVT().SimpleTy) { 6167 case MVT::f16: 6168 return Subtarget.hasStdExtZfh(); 6169 case MVT::f32: 6170 return Subtarget.hasStdExtF(); 6171 case MVT::f64: 6172 return Subtarget.hasStdExtD(); 6173 default: 6174 break; 6175 } 6176 6177 return false; 6178 } 6179 6180 Register RISCVTargetLowering::getExceptionPointerRegister( 6181 const Constant *PersonalityFn) const { 6182 return RISCV::X10; 6183 } 6184 6185 Register RISCVTargetLowering::getExceptionSelectorRegister( 6186 const Constant *PersonalityFn) const { 6187 return RISCV::X11; 6188 } 6189 6190 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 6191 // Return false to suppress the unnecessary extensions if the LibCall 6192 // arguments or return value is f32 type for LP64 ABI. 6193 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 6194 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 6195 return false; 6196 6197 return true; 6198 } 6199 6200 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 6201 if (Subtarget.is64Bit() && Type == MVT::i32) 6202 return true; 6203 6204 return IsSigned; 6205 } 6206 6207 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 6208 SDValue C) const { 6209 // Check integral scalar types. 6210 if (VT.isScalarInteger()) { 6211 // Omit the optimization if the sub target has the M extension and the data 6212 // size exceeds XLen. 6213 if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) 6214 return false; 6215 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 6216 // Break the MUL to a SLLI and an ADD/SUB. 6217 const APInt &Imm = ConstNode->getAPIntValue(); 6218 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 6219 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 6220 return true; 6221 // Omit the following optimization if the sub target has the M extension 6222 // and the data size >= XLen. 6223 if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) 6224 return false; 6225 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 6226 // a pair of LUI/ADDI. 6227 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { 6228 APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); 6229 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 6230 (1 - ImmS).isPowerOf2()) 6231 return true; 6232 } 6233 } 6234 } 6235 6236 return false; 6237 } 6238 6239 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { 6240 if (!Subtarget.useRVVForFixedLengthVectors()) 6241 return false; 6242 6243 if (!VT.isFixedLengthVector()) 6244 return false; 6245 6246 // Don't use RVV for vectors we cannot scalarize if required. 6247 switch (VT.getVectorElementType().SimpleTy) { 6248 // i1 is supported but has different rules. 6249 default: 6250 return false; 6251 case MVT::i1: 6252 // Masks can only use a single register. 6253 if (VT.getVectorNumElements() > Subtarget.getMinRVVVectorSizeInBits()) 6254 return false; 6255 break; 6256 case MVT::i8: 6257 case MVT::i16: 6258 case MVT::i32: 6259 case MVT::i64: 6260 break; 6261 case MVT::f16: 6262 if (!Subtarget.hasStdExtZfh()) 6263 return false; 6264 break; 6265 case MVT::f32: 6266 if (!Subtarget.hasStdExtF()) 6267 return false; 6268 break; 6269 case MVT::f64: 6270 if (!Subtarget.hasStdExtD()) 6271 return false; 6272 break; 6273 } 6274 6275 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 6276 // Don't use RVV for types that don't fit. 6277 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) 6278 return false; 6279 6280 // TODO: Perhaps an artificial restriction, but worth having whilst getting 6281 // the base fixed length RVV support in place. 6282 if (!VT.isPow2VectorType()) 6283 return false; 6284 6285 return true; 6286 } 6287 6288 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( 6289 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 6290 bool *Fast) const { 6291 if (!VT.isScalableVector()) 6292 return false; 6293 6294 EVT ElemVT = VT.getVectorElementType(); 6295 if (Alignment >= ElemVT.getStoreSize()) { 6296 if (Fast) 6297 *Fast = true; 6298 return true; 6299 } 6300 6301 return false; 6302 } 6303 6304 #define GET_REGISTER_MATCHER 6305 #include "RISCVGenAsmMatcher.inc" 6306 6307 Register 6308 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 6309 const MachineFunction &MF) const { 6310 Register Reg = MatchRegisterAltName(RegName); 6311 if (Reg == RISCV::NoRegister) 6312 Reg = MatchRegisterName(RegName); 6313 if (Reg == RISCV::NoRegister) 6314 report_fatal_error( 6315 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 6316 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 6317 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 6318 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 6319 StringRef(RegName) + "\".")); 6320 return Reg; 6321 } 6322 6323 namespace llvm { 6324 namespace RISCVVIntrinsicsTable { 6325 6326 #define GET_RISCVVIntrinsicsTable_IMPL 6327 #include "RISCVGenSearchableTables.inc" 6328 6329 } // namespace RISCVVIntrinsicsTable 6330 6331 } // namespace llvm 6332