1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/IntrinsicsRISCV.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/KnownBits.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "riscv-lower" 42 43 STATISTIC(NumTailCalls, "Number of tail calls"); 44 45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 46 const RISCVSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 if (Subtarget.isRV32E()) 50 report_fatal_error("Codegen not yet implemented for RV32E"); 51 52 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 53 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 54 55 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 56 !Subtarget.hasStdExtF()) { 57 errs() << "Hard-float 'f' ABI can't be used for a target that " 58 "doesn't support the F instruction set extension (ignoring " 59 "target-abi)\n"; 60 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 61 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 62 !Subtarget.hasStdExtD()) { 63 errs() << "Hard-float 'd' ABI can't be used for a target that " 64 "doesn't support the D instruction set extension (ignoring " 65 "target-abi)\n"; 66 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 67 } 68 69 switch (ABI) { 70 default: 71 report_fatal_error("Don't know how to lower this ABI"); 72 case RISCVABI::ABI_ILP32: 73 case RISCVABI::ABI_ILP32F: 74 case RISCVABI::ABI_ILP32D: 75 case RISCVABI::ABI_LP64: 76 case RISCVABI::ABI_LP64F: 77 case RISCVABI::ABI_LP64D: 78 break; 79 } 80 81 MVT XLenVT = Subtarget.getXLenVT(); 82 83 // Set up the register classes. 84 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 85 86 if (Subtarget.hasStdExtZfh()) 87 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 88 if (Subtarget.hasStdExtF()) 89 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 90 if (Subtarget.hasStdExtD()) 91 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 92 93 static const MVT::SimpleValueType BoolVecVTs[] = { 94 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 95 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 96 static const MVT::SimpleValueType IntVecVTs[] = { 97 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 98 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 99 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 100 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 101 MVT::nxv4i64, MVT::nxv8i64}; 102 static const MVT::SimpleValueType F16VecVTs[] = { 103 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 104 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 105 static const MVT::SimpleValueType F32VecVTs[] = { 106 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 107 static const MVT::SimpleValueType F64VecVTs[] = { 108 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 109 110 if (Subtarget.hasStdExtV()) { 111 auto addRegClassForRVV = [this](MVT VT) { 112 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 113 assert(Size <= 512 && isPowerOf2_32(Size)); 114 const TargetRegisterClass *RC; 115 if (Size <= 64) 116 RC = &RISCV::VRRegClass; 117 else if (Size == 128) 118 RC = &RISCV::VRM2RegClass; 119 else if (Size == 256) 120 RC = &RISCV::VRM4RegClass; 121 else 122 RC = &RISCV::VRM8RegClass; 123 124 addRegisterClass(VT, RC); 125 }; 126 127 for (MVT VT : BoolVecVTs) 128 addRegClassForRVV(VT); 129 for (MVT VT : IntVecVTs) 130 addRegClassForRVV(VT); 131 132 if (Subtarget.hasStdExtZfh()) 133 for (MVT VT : F16VecVTs) 134 addRegClassForRVV(VT); 135 136 if (Subtarget.hasStdExtF()) 137 for (MVT VT : F32VecVTs) 138 addRegClassForRVV(VT); 139 140 if (Subtarget.hasStdExtD()) 141 for (MVT VT : F64VecVTs) 142 addRegClassForRVV(VT); 143 144 if (Subtarget.useRVVForFixedLengthVectors()) { 145 auto addRegClassForFixedVectors = [this](MVT VT) { 146 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 147 const TargetRegisterClass *RC; 148 if (LMul == 1) 149 RC = &RISCV::VRRegClass; 150 else if (LMul == 2) 151 RC = &RISCV::VRM2RegClass; 152 else if (LMul == 4) 153 RC = &RISCV::VRM4RegClass; 154 else if (LMul == 8) 155 RC = &RISCV::VRM8RegClass; 156 else 157 llvm_unreachable("Unexpected LMul!"); 158 159 addRegisterClass(VT, RC); 160 }; 161 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 162 if (useRVVForFixedLengthVectorVT(VT)) 163 addRegClassForFixedVectors(VT); 164 165 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) 166 if (useRVVForFixedLengthVectorVT(VT)) 167 addRegClassForFixedVectors(VT); 168 } 169 } 170 171 // Compute derived properties from the register classes. 172 computeRegisterProperties(STI.getRegisterInfo()); 173 174 setStackPointerRegisterToSaveRestore(RISCV::X2); 175 176 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 177 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 178 179 // TODO: add all necessary setOperationAction calls. 180 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 181 182 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 183 setOperationAction(ISD::BR_CC, XLenVT, Expand); 184 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 185 186 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 187 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 188 189 setOperationAction(ISD::VASTART, MVT::Other, Custom); 190 setOperationAction(ISD::VAARG, MVT::Other, Expand); 191 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 192 setOperationAction(ISD::VAEND, MVT::Other, Expand); 193 194 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 195 if (!Subtarget.hasStdExtZbb()) { 196 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 197 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 198 } 199 200 if (Subtarget.is64Bit()) { 201 setOperationAction(ISD::ADD, MVT::i32, Custom); 202 setOperationAction(ISD::SUB, MVT::i32, Custom); 203 setOperationAction(ISD::SHL, MVT::i32, Custom); 204 setOperationAction(ISD::SRA, MVT::i32, Custom); 205 setOperationAction(ISD::SRL, MVT::i32, Custom); 206 } 207 208 if (!Subtarget.hasStdExtM()) { 209 setOperationAction(ISD::MUL, XLenVT, Expand); 210 setOperationAction(ISD::MULHS, XLenVT, Expand); 211 setOperationAction(ISD::MULHU, XLenVT, Expand); 212 setOperationAction(ISD::SDIV, XLenVT, Expand); 213 setOperationAction(ISD::UDIV, XLenVT, Expand); 214 setOperationAction(ISD::SREM, XLenVT, Expand); 215 setOperationAction(ISD::UREM, XLenVT, Expand); 216 } 217 218 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 219 setOperationAction(ISD::MUL, MVT::i32, Custom); 220 221 setOperationAction(ISD::SDIV, MVT::i8, Custom); 222 setOperationAction(ISD::UDIV, MVT::i8, Custom); 223 setOperationAction(ISD::UREM, MVT::i8, Custom); 224 setOperationAction(ISD::SDIV, MVT::i16, Custom); 225 setOperationAction(ISD::UDIV, MVT::i16, Custom); 226 setOperationAction(ISD::UREM, MVT::i16, Custom); 227 setOperationAction(ISD::SDIV, MVT::i32, Custom); 228 setOperationAction(ISD::UDIV, MVT::i32, Custom); 229 setOperationAction(ISD::UREM, MVT::i32, Custom); 230 } 231 232 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 233 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 234 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 235 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 236 237 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 238 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 239 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 240 241 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { 242 if (Subtarget.is64Bit()) { 243 setOperationAction(ISD::ROTL, MVT::i32, Custom); 244 setOperationAction(ISD::ROTR, MVT::i32, Custom); 245 } 246 } else { 247 setOperationAction(ISD::ROTL, XLenVT, Expand); 248 setOperationAction(ISD::ROTR, XLenVT, Expand); 249 } 250 251 if (Subtarget.hasStdExtZbp()) { 252 // Custom lower bswap/bitreverse so we can convert them to GREVI to enable 253 // more combining. 254 setOperationAction(ISD::BITREVERSE, XLenVT, Custom); 255 setOperationAction(ISD::BSWAP, XLenVT, Custom); 256 257 if (Subtarget.is64Bit()) { 258 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 259 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 260 } 261 } else { 262 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 263 // pattern match it directly in isel. 264 setOperationAction(ISD::BSWAP, XLenVT, 265 Subtarget.hasStdExtZbb() ? Legal : Expand); 266 } 267 268 if (Subtarget.hasStdExtZbb()) { 269 setOperationAction(ISD::SMIN, XLenVT, Legal); 270 setOperationAction(ISD::SMAX, XLenVT, Legal); 271 setOperationAction(ISD::UMIN, XLenVT, Legal); 272 setOperationAction(ISD::UMAX, XLenVT, Legal); 273 } else { 274 setOperationAction(ISD::CTTZ, XLenVT, Expand); 275 setOperationAction(ISD::CTLZ, XLenVT, Expand); 276 setOperationAction(ISD::CTPOP, XLenVT, Expand); 277 } 278 279 if (Subtarget.hasStdExtZbt()) { 280 setOperationAction(ISD::FSHL, XLenVT, Custom); 281 setOperationAction(ISD::FSHR, XLenVT, Custom); 282 setOperationAction(ISD::SELECT, XLenVT, Legal); 283 284 if (Subtarget.is64Bit()) { 285 setOperationAction(ISD::FSHL, MVT::i32, Custom); 286 setOperationAction(ISD::FSHR, MVT::i32, Custom); 287 } 288 } else { 289 setOperationAction(ISD::SELECT, XLenVT, Custom); 290 } 291 292 ISD::CondCode FPCCToExpand[] = { 293 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 294 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 295 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 296 297 ISD::NodeType FPOpToExpand[] = { 298 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, 299 ISD::FP_TO_FP16}; 300 301 if (Subtarget.hasStdExtZfh()) 302 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 303 304 if (Subtarget.hasStdExtZfh()) { 305 setOperationAction(ISD::FMINNUM, MVT::f16, Legal); 306 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); 307 for (auto CC : FPCCToExpand) 308 setCondCodeAction(CC, MVT::f16, Expand); 309 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 310 setOperationAction(ISD::SELECT, MVT::f16, Custom); 311 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 312 for (auto Op : FPOpToExpand) 313 setOperationAction(Op, MVT::f16, Expand); 314 } 315 316 if (Subtarget.hasStdExtF()) { 317 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 318 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 319 for (auto CC : FPCCToExpand) 320 setCondCodeAction(CC, MVT::f32, Expand); 321 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 322 setOperationAction(ISD::SELECT, MVT::f32, Custom); 323 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 324 for (auto Op : FPOpToExpand) 325 setOperationAction(Op, MVT::f32, Expand); 326 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 327 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 328 } 329 330 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 331 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 332 333 if (Subtarget.hasStdExtD()) { 334 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 335 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 336 for (auto CC : FPCCToExpand) 337 setCondCodeAction(CC, MVT::f64, Expand); 338 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 339 setOperationAction(ISD::SELECT, MVT::f64, Custom); 340 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 341 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 342 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 343 for (auto Op : FPOpToExpand) 344 setOperationAction(Op, MVT::f64, Expand); 345 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 346 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 347 } 348 349 if (Subtarget.is64Bit()) { 350 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 351 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 352 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 353 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 354 } 355 356 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 357 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 358 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 359 setOperationAction(ISD::JumpTable, XLenVT, Custom); 360 361 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 362 363 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 364 // Unfortunately this can't be determined just from the ISA naming string. 365 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 366 Subtarget.is64Bit() ? Legal : Custom); 367 368 setOperationAction(ISD::TRAP, MVT::Other, Legal); 369 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 370 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 371 372 if (Subtarget.hasStdExtA()) { 373 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 374 setMinCmpXchgSizeInBits(32); 375 } else { 376 setMaxAtomicSizeInBitsSupported(0); 377 } 378 379 setBooleanContents(ZeroOrOneBooleanContent); 380 381 if (Subtarget.hasStdExtV()) { 382 setBooleanVectorContents(ZeroOrOneBooleanContent); 383 384 setOperationAction(ISD::VSCALE, XLenVT, Custom); 385 386 // RVV intrinsics may have illegal operands. 387 // We also need to custom legalize vmv.x.s. 388 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); 389 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); 390 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 391 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); 392 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 393 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 394 395 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 396 397 if (Subtarget.is64Bit()) { 398 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 399 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 400 } else { 401 // We must custom-lower certain vXi64 operations on RV32 due to the vector 402 // element type being illegal. 403 setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom); 404 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); 405 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); 406 407 setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom); 408 setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom); 409 setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom); 410 setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom); 411 setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom); 412 setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom); 413 setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom); 414 setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom); 415 } 416 417 for (MVT VT : BoolVecVTs) { 418 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 419 420 // Mask VTs are custom-expanded into a series of standard nodes 421 setOperationAction(ISD::TRUNCATE, VT, Custom); 422 } 423 424 for (MVT VT : IntVecVTs) { 425 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 426 427 setOperationAction(ISD::SMIN, VT, Legal); 428 setOperationAction(ISD::SMAX, VT, Legal); 429 setOperationAction(ISD::UMIN, VT, Legal); 430 setOperationAction(ISD::UMAX, VT, Legal); 431 432 setOperationAction(ISD::ROTL, VT, Expand); 433 setOperationAction(ISD::ROTR, VT, Expand); 434 435 // Custom-lower extensions and truncations from/to mask types. 436 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 437 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 438 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 439 440 // RVV has native int->float & float->int conversions where the 441 // element type sizes are within one power-of-two of each other. Any 442 // wider distances between type sizes have to be lowered as sequences 443 // which progressively narrow the gap in stages. 444 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 445 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 446 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 447 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 448 449 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR" 450 // nodes which truncate by one power of two at a time. 451 setOperationAction(ISD::TRUNCATE, VT, Custom); 452 453 // Custom-lower insert/extract operations to simplify patterns. 454 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 455 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 456 457 // Custom-lower reduction operations to set up the corresponding custom 458 // nodes' operands. 459 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 460 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 461 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 462 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 463 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 464 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 465 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 466 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 467 468 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 469 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 470 } 471 472 // Expand various CCs to best match the RVV ISA, which natively supports UNE 473 // but no other unordered comparisons, and supports all ordered comparisons 474 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 475 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 476 // and we pattern-match those back to the "original", swapping operands once 477 // more. This way we catch both operations and both "vf" and "fv" forms with 478 // fewer patterns. 479 ISD::CondCode VFPCCToExpand[] = { 480 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 481 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 482 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 483 }; 484 485 // Sets common operation actions on RVV floating-point vector types. 486 const auto SetCommonVFPActions = [&](MVT VT) { 487 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 488 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 489 // sizes are within one power-of-two of each other. Therefore conversions 490 // between vXf16 and vXf64 must be lowered as sequences which convert via 491 // vXf32. 492 setOperationAction(ISD::FP_ROUND, VT, Custom); 493 setOperationAction(ISD::FP_EXTEND, VT, Custom); 494 // Custom-lower insert/extract operations to simplify patterns. 495 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 496 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 497 // Expand various condition codes (explained above). 498 for (auto CC : VFPCCToExpand) 499 setCondCodeAction(CC, VT, Expand); 500 501 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 502 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 503 setOperationAction(ISD::FCOPYSIGN, VT, Legal); 504 505 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 506 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 507 }; 508 509 if (Subtarget.hasStdExtZfh()) 510 for (MVT VT : F16VecVTs) 511 SetCommonVFPActions(VT); 512 513 if (Subtarget.hasStdExtF()) 514 for (MVT VT : F32VecVTs) 515 SetCommonVFPActions(VT); 516 517 if (Subtarget.hasStdExtD()) 518 for (MVT VT : F64VecVTs) 519 SetCommonVFPActions(VT); 520 521 if (Subtarget.useRVVForFixedLengthVectors()) { 522 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 523 if (!useRVVForFixedLengthVectorVT(VT)) 524 continue; 525 526 // By default everything must be expanded. 527 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 528 setOperationAction(Op, VT, Expand); 529 530 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 531 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 532 533 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 534 535 setOperationAction(ISD::LOAD, VT, Custom); 536 setOperationAction(ISD::STORE, VT, Custom); 537 538 // Operations below are different for between masks and other vectors. 539 if (VT.getVectorElementType() == MVT::i1) { 540 setOperationAction(ISD::AND, VT, Custom); 541 setOperationAction(ISD::OR, VT, Custom); 542 setOperationAction(ISD::XOR, VT, Custom); 543 setOperationAction(ISD::SETCC, VT, Custom); 544 continue; 545 } 546 547 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 548 549 setOperationAction(ISD::ADD, VT, Custom); 550 setOperationAction(ISD::MUL, VT, Custom); 551 setOperationAction(ISD::SUB, VT, Custom); 552 setOperationAction(ISD::AND, VT, Custom); 553 setOperationAction(ISD::OR, VT, Custom); 554 setOperationAction(ISD::XOR, VT, Custom); 555 setOperationAction(ISD::SDIV, VT, Custom); 556 setOperationAction(ISD::SREM, VT, Custom); 557 setOperationAction(ISD::UDIV, VT, Custom); 558 setOperationAction(ISD::UREM, VT, Custom); 559 setOperationAction(ISD::SHL, VT, Custom); 560 setOperationAction(ISD::SRA, VT, Custom); 561 setOperationAction(ISD::SRL, VT, Custom); 562 563 setOperationAction(ISD::SMIN, VT, Custom); 564 setOperationAction(ISD::SMAX, VT, Custom); 565 setOperationAction(ISD::UMIN, VT, Custom); 566 setOperationAction(ISD::UMAX, VT, Custom); 567 568 setOperationAction(ISD::MULHS, VT, Custom); 569 setOperationAction(ISD::MULHU, VT, Custom); 570 571 setOperationAction(ISD::VSELECT, VT, Custom); 572 573 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 574 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 575 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 576 577 setOperationAction(ISD::BITCAST, VT, Custom); 578 } 579 580 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { 581 if (!useRVVForFixedLengthVectorVT(VT)) 582 continue; 583 584 // By default everything must be expanded. 585 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 586 setOperationAction(Op, VT, Expand); 587 588 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 589 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 590 591 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 592 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 593 594 setOperationAction(ISD::LOAD, VT, Custom); 595 setOperationAction(ISD::STORE, VT, Custom); 596 setOperationAction(ISD::FADD, VT, Custom); 597 setOperationAction(ISD::FSUB, VT, Custom); 598 setOperationAction(ISD::FMUL, VT, Custom); 599 setOperationAction(ISD::FDIV, VT, Custom); 600 setOperationAction(ISD::FNEG, VT, Custom); 601 setOperationAction(ISD::FABS, VT, Custom); 602 setOperationAction(ISD::FSQRT, VT, Custom); 603 setOperationAction(ISD::FMA, VT, Custom); 604 605 for (auto CC : VFPCCToExpand) 606 setCondCodeAction(CC, VT, Expand); 607 608 setOperationAction(ISD::VSELECT, VT, Custom); 609 610 setOperationAction(ISD::BITCAST, VT, Custom); 611 } 612 } 613 } 614 615 // Function alignments. 616 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 617 setMinFunctionAlignment(FunctionAlignment); 618 setPrefFunctionAlignment(FunctionAlignment); 619 620 setMinimumJumpTableEntries(5); 621 622 // Jumps are expensive, compared to logic 623 setJumpIsExpensive(); 624 625 // We can use any register for comparisons 626 setHasMultipleConditionRegisters(); 627 628 setTargetDAGCombine(ISD::SETCC); 629 if (Subtarget.hasStdExtZbp()) { 630 setTargetDAGCombine(ISD::OR); 631 } 632 if (Subtarget.hasStdExtV()) 633 setTargetDAGCombine(ISD::FCOPYSIGN); 634 } 635 636 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 637 LLVMContext &Context, 638 EVT VT) const { 639 if (!VT.isVector()) 640 return getPointerTy(DL); 641 if (Subtarget.hasStdExtV() && 642 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) 643 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 644 return VT.changeVectorElementTypeToInteger(); 645 } 646 647 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 648 const CallInst &I, 649 MachineFunction &MF, 650 unsigned Intrinsic) const { 651 switch (Intrinsic) { 652 default: 653 return false; 654 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 655 case Intrinsic::riscv_masked_atomicrmw_add_i32: 656 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 657 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 658 case Intrinsic::riscv_masked_atomicrmw_max_i32: 659 case Intrinsic::riscv_masked_atomicrmw_min_i32: 660 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 661 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 662 case Intrinsic::riscv_masked_cmpxchg_i32: 663 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 664 Info.opc = ISD::INTRINSIC_W_CHAIN; 665 Info.memVT = MVT::getVT(PtrTy->getElementType()); 666 Info.ptrVal = I.getArgOperand(0); 667 Info.offset = 0; 668 Info.align = Align(4); 669 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 670 MachineMemOperand::MOVolatile; 671 return true; 672 } 673 } 674 675 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 676 const AddrMode &AM, Type *Ty, 677 unsigned AS, 678 Instruction *I) const { 679 // No global is ever allowed as a base. 680 if (AM.BaseGV) 681 return false; 682 683 // Require a 12-bit signed offset. 684 if (!isInt<12>(AM.BaseOffs)) 685 return false; 686 687 switch (AM.Scale) { 688 case 0: // "r+i" or just "i", depending on HasBaseReg. 689 break; 690 case 1: 691 if (!AM.HasBaseReg) // allow "r+i". 692 break; 693 return false; // disallow "r+r" or "r+r+i". 694 default: 695 return false; 696 } 697 698 return true; 699 } 700 701 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 702 return isInt<12>(Imm); 703 } 704 705 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 706 return isInt<12>(Imm); 707 } 708 709 // On RV32, 64-bit integers are split into their high and low parts and held 710 // in two different registers, so the trunc is free since the low register can 711 // just be used. 712 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 713 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 714 return false; 715 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 716 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 717 return (SrcBits == 64 && DestBits == 32); 718 } 719 720 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 721 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 722 !SrcVT.isInteger() || !DstVT.isInteger()) 723 return false; 724 unsigned SrcBits = SrcVT.getSizeInBits(); 725 unsigned DestBits = DstVT.getSizeInBits(); 726 return (SrcBits == 64 && DestBits == 32); 727 } 728 729 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 730 // Zexts are free if they can be combined with a load. 731 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 732 EVT MemVT = LD->getMemoryVT(); 733 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 734 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 735 (LD->getExtensionType() == ISD::NON_EXTLOAD || 736 LD->getExtensionType() == ISD::ZEXTLOAD)) 737 return true; 738 } 739 740 return TargetLowering::isZExtFree(Val, VT2); 741 } 742 743 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 744 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 745 } 746 747 bool RISCVTargetLowering::isCheapToSpeculateCttz() const { 748 return Subtarget.hasStdExtZbb(); 749 } 750 751 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { 752 return Subtarget.hasStdExtZbb(); 753 } 754 755 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 756 bool ForCodeSize) const { 757 if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) 758 return false; 759 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 760 return false; 761 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 762 return false; 763 if (Imm.isNegZero()) 764 return false; 765 return Imm.isZero(); 766 } 767 768 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 769 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || 770 (VT == MVT::f32 && Subtarget.hasStdExtF()) || 771 (VT == MVT::f64 && Subtarget.hasStdExtD()); 772 } 773 774 // Changes the condition code and swaps operands if necessary, so the SetCC 775 // operation matches one of the comparisons supported directly in the RISC-V 776 // ISA. 777 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 778 switch (CC) { 779 default: 780 break; 781 case ISD::SETGT: 782 case ISD::SETLE: 783 case ISD::SETUGT: 784 case ISD::SETULE: 785 CC = ISD::getSetCCSwappedOperands(CC); 786 std::swap(LHS, RHS); 787 break; 788 } 789 } 790 791 // Return the RISC-V branch opcode that matches the given DAG integer 792 // condition code. The CondCode must be one of those supported by the RISC-V 793 // ISA (see normaliseSetCC). 794 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 795 switch (CC) { 796 default: 797 llvm_unreachable("Unsupported CondCode"); 798 case ISD::SETEQ: 799 return RISCV::BEQ; 800 case ISD::SETNE: 801 return RISCV::BNE; 802 case ISD::SETLT: 803 return RISCV::BLT; 804 case ISD::SETGE: 805 return RISCV::BGE; 806 case ISD::SETULT: 807 return RISCV::BLTU; 808 case ISD::SETUGE: 809 return RISCV::BGEU; 810 } 811 } 812 813 RISCVVLMUL RISCVTargetLowering::getLMUL(MVT VT) { 814 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); 815 if (VT.getVectorElementType() == MVT::i1) 816 KnownSize *= 8; 817 818 switch (KnownSize) { 819 default: 820 llvm_unreachable("Invalid LMUL."); 821 case 8: 822 return RISCVVLMUL::LMUL_F8; 823 case 16: 824 return RISCVVLMUL::LMUL_F4; 825 case 32: 826 return RISCVVLMUL::LMUL_F2; 827 case 64: 828 return RISCVVLMUL::LMUL_1; 829 case 128: 830 return RISCVVLMUL::LMUL_2; 831 case 256: 832 return RISCVVLMUL::LMUL_4; 833 case 512: 834 return RISCVVLMUL::LMUL_8; 835 } 836 } 837 838 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVVLMUL LMul) { 839 switch (LMul) { 840 default: 841 llvm_unreachable("Invalid LMUL."); 842 case RISCVVLMUL::LMUL_F8: 843 case RISCVVLMUL::LMUL_F4: 844 case RISCVVLMUL::LMUL_F2: 845 case RISCVVLMUL::LMUL_1: 846 return RISCV::VRRegClassID; 847 case RISCVVLMUL::LMUL_2: 848 return RISCV::VRM2RegClassID; 849 case RISCVVLMUL::LMUL_4: 850 return RISCV::VRM4RegClassID; 851 case RISCVVLMUL::LMUL_8: 852 return RISCV::VRM8RegClassID; 853 } 854 } 855 856 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { 857 RISCVVLMUL LMUL = getLMUL(VT); 858 if (LMUL == RISCVVLMUL::LMUL_F8 || LMUL == RISCVVLMUL::LMUL_F4 || 859 LMUL == RISCVVLMUL::LMUL_F2 || LMUL == RISCVVLMUL::LMUL_1) { 860 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 861 "Unexpected subreg numbering"); 862 return RISCV::sub_vrm1_0 + Index; 863 } 864 if (LMUL == RISCVVLMUL::LMUL_2) { 865 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 866 "Unexpected subreg numbering"); 867 return RISCV::sub_vrm2_0 + Index; 868 } 869 if (LMUL == RISCVVLMUL::LMUL_4) { 870 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 871 "Unexpected subreg numbering"); 872 return RISCV::sub_vrm4_0 + Index; 873 } 874 llvm_unreachable("Invalid vector type."); 875 } 876 877 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { 878 if (VT.getVectorElementType() == MVT::i1) 879 return RISCV::VRRegClassID; 880 return getRegClassIDForLMUL(getLMUL(VT)); 881 } 882 883 // Attempt to decompose a subvector insert/extract between VecVT and 884 // SubVecVT via subregister indices. Returns the subregister index that 885 // can perform the subvector insert/extract with the given element index, as 886 // well as the index corresponding to any leftover subvectors that must be 887 // further inserted/extracted within the register class for SubVecVT. 888 std::pair<unsigned, unsigned> 889 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 890 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, 891 const RISCVRegisterInfo *TRI) { 892 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && 893 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && 894 RISCV::VRM2RegClassID > RISCV::VRRegClassID), 895 "Register classes not ordered"); 896 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT); 897 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT); 898 // Try to compose a subregister index that takes us from the incoming 899 // LMUL>1 register class down to the outgoing one. At each step we half 900 // the LMUL: 901 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 902 // Note that this is not guaranteed to find a subregister index, such as 903 // when we are extracting from one VR type to another. 904 unsigned SubRegIdx = RISCV::NoSubRegister; 905 for (const unsigned RCID : 906 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) 907 if (VecRegClassID > RCID && SubRegClassID <= RCID) { 908 VecVT = VecVT.getHalfNumVectorElementsVT(); 909 bool IsHi = 910 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); 911 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, 912 getSubregIndexByMVT(VecVT, IsHi)); 913 if (IsHi) 914 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); 915 } 916 return {SubRegIdx, InsertExtractIdx}; 917 } 918 919 // Return the largest legal scalable vector type that matches VT's element type. 920 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, 921 const RISCVSubtarget &Subtarget) { 922 assert(VT.isFixedLengthVector() && 923 DAG.getTargetLoweringInfo().isTypeLegal(VT) && 924 "Expected legal fixed length vector!"); 925 926 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 927 assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!"); 928 929 MVT EltVT = VT.getVectorElementType(); 930 switch (EltVT.SimpleTy) { 931 default: 932 llvm_unreachable("unexpected element type for RVV container"); 933 case MVT::i1: { 934 // Masks are calculated assuming 8-bit elements since that's when we need 935 // the most elements. 936 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8; 937 return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock); 938 } 939 case MVT::i8: 940 case MVT::i16: 941 case MVT::i32: 942 case MVT::i64: 943 case MVT::f16: 944 case MVT::f32: 945 case MVT::f64: { 946 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits(); 947 return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock); 948 } 949 } 950 } 951 952 // Grow V to consume an entire RVV register. 953 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 954 const RISCVSubtarget &Subtarget) { 955 assert(VT.isScalableVector() && 956 "Expected to convert into a scalable vector!"); 957 assert(V.getValueType().isFixedLengthVector() && 958 "Expected a fixed length vector operand!"); 959 SDLoc DL(V); 960 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 961 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); 962 } 963 964 // Shrink V so it's just big enough to maintain a VT's worth of data. 965 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 966 const RISCVSubtarget &Subtarget) { 967 assert(VT.isFixedLengthVector() && 968 "Expected to convert into a fixed length vector!"); 969 assert(V.getValueType().isScalableVector() && 970 "Expected a scalable vector operand!"); 971 SDLoc DL(V); 972 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 973 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); 974 } 975 976 // Gets the two common "VL" operands: an all-ones mask and the vector length. 977 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is 978 // the vector type that it is contained in. 979 static std::pair<SDValue, SDValue> 980 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, 981 const RISCVSubtarget &Subtarget) { 982 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 983 MVT XLenVT = Subtarget.getXLenVT(); 984 SDValue VL = VecVT.isFixedLengthVector() 985 ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT) 986 : DAG.getRegister(RISCV::X0, XLenVT); 987 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 988 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 989 return {Mask, VL}; 990 } 991 992 // As above but assuming the given type is a scalable vector type. 993 static std::pair<SDValue, SDValue> 994 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG, 995 const RISCVSubtarget &Subtarget) { 996 assert(VecVT.isScalableVector() && "Expecting a scalable vector"); 997 return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget); 998 } 999 1000 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 1001 const RISCVSubtarget &Subtarget) { 1002 MVT VT = Op.getSimpleValueType(); 1003 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 1004 1005 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 1006 1007 SDLoc DL(Op); 1008 SDValue Mask, VL; 1009 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1010 1011 if (VT.getVectorElementType() == MVT::i1) { 1012 if (ISD::isBuildVectorAllZeros(Op.getNode())) { 1013 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); 1014 return convertFromScalableVector(VT, VMClr, DAG, Subtarget); 1015 } 1016 1017 if (ISD::isBuildVectorAllOnes(Op.getNode())) { 1018 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 1019 return convertFromScalableVector(VT, VMSet, DAG, Subtarget); 1020 } 1021 1022 return SDValue(); 1023 } 1024 1025 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 1026 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 1027 : RISCVISD::VMV_V_X_VL; 1028 Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL); 1029 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 1030 } 1031 1032 // Try and match an index sequence, which we can lower directly to the vid 1033 // instruction. An all-undef vector is matched by getSplatValue, above. 1034 if (VT.isInteger()) { 1035 bool IsVID = true; 1036 for (unsigned i = 0, e = Op.getNumOperands(); i < e && IsVID; i++) 1037 IsVID &= Op.getOperand(i).isUndef() || 1038 (isa<ConstantSDNode>(Op.getOperand(i)) && 1039 Op.getConstantOperandVal(i) == i); 1040 1041 if (IsVID) { 1042 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); 1043 return convertFromScalableVector(VT, VID, DAG, Subtarget); 1044 } 1045 } 1046 1047 return SDValue(); 1048 } 1049 1050 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, 1051 const RISCVSubtarget &Subtarget) { 1052 SDValue V1 = Op.getOperand(0); 1053 SDLoc DL(Op); 1054 MVT VT = Op.getSimpleValueType(); 1055 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 1056 1057 if (SVN->isSplat()) { 1058 int Lane = SVN->getSplatIndex(); 1059 if (Lane >= 0) { 1060 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 1061 1062 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 1063 assert(Lane < (int)VT.getVectorNumElements() && "Unexpected lane!"); 1064 1065 SDValue Mask, VL; 1066 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1067 MVT XLenVT = Subtarget.getXLenVT(); 1068 SDValue Gather = 1069 DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1, 1070 DAG.getConstant(Lane, DL, XLenVT), Mask, VL); 1071 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 1072 } 1073 } 1074 1075 return SDValue(); 1076 } 1077 1078 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 1079 SelectionDAG &DAG) const { 1080 switch (Op.getOpcode()) { 1081 default: 1082 report_fatal_error("unimplemented operand"); 1083 case ISD::GlobalAddress: 1084 return lowerGlobalAddress(Op, DAG); 1085 case ISD::BlockAddress: 1086 return lowerBlockAddress(Op, DAG); 1087 case ISD::ConstantPool: 1088 return lowerConstantPool(Op, DAG); 1089 case ISD::JumpTable: 1090 return lowerJumpTable(Op, DAG); 1091 case ISD::GlobalTLSAddress: 1092 return lowerGlobalTLSAddress(Op, DAG); 1093 case ISD::SELECT: 1094 return lowerSELECT(Op, DAG); 1095 case ISD::VASTART: 1096 return lowerVASTART(Op, DAG); 1097 case ISD::FRAMEADDR: 1098 return lowerFRAMEADDR(Op, DAG); 1099 case ISD::RETURNADDR: 1100 return lowerRETURNADDR(Op, DAG); 1101 case ISD::SHL_PARTS: 1102 return lowerShiftLeftParts(Op, DAG); 1103 case ISD::SRA_PARTS: 1104 return lowerShiftRightParts(Op, DAG, true); 1105 case ISD::SRL_PARTS: 1106 return lowerShiftRightParts(Op, DAG, false); 1107 case ISD::BITCAST: { 1108 SDValue Op0 = Op.getOperand(0); 1109 // We can handle fixed length vector bitcasts with a simple replacement 1110 // in isel. 1111 if (Op.getValueType().isFixedLengthVector()) { 1112 if (Op0.getValueType().isFixedLengthVector()) 1113 return Op; 1114 return SDValue(); 1115 } 1116 assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) || 1117 Subtarget.hasStdExtZfh()) && 1118 "Unexpected custom legalisation"); 1119 SDLoc DL(Op); 1120 if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) { 1121 if (Op0.getValueType() != MVT::i16) 1122 return SDValue(); 1123 SDValue NewOp0 = 1124 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0); 1125 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 1126 return FPConv; 1127 } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && 1128 Subtarget.hasStdExtF()) { 1129 if (Op0.getValueType() != MVT::i32) 1130 return SDValue(); 1131 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 1132 SDValue FPConv = 1133 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 1134 return FPConv; 1135 } 1136 return SDValue(); 1137 } 1138 case ISD::INTRINSIC_WO_CHAIN: 1139 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 1140 case ISD::INTRINSIC_W_CHAIN: 1141 return LowerINTRINSIC_W_CHAIN(Op, DAG); 1142 case ISD::BSWAP: 1143 case ISD::BITREVERSE: { 1144 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. 1145 assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 1146 MVT VT = Op.getSimpleValueType(); 1147 SDLoc DL(Op); 1148 // Start with the maximum immediate value which is the bitwidth - 1. 1149 unsigned Imm = VT.getSizeInBits() - 1; 1150 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. 1151 if (Op.getOpcode() == ISD::BSWAP) 1152 Imm &= ~0x7U; 1153 return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0), 1154 DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT())); 1155 } 1156 case ISD::FSHL: 1157 case ISD::FSHR: { 1158 MVT VT = Op.getSimpleValueType(); 1159 assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization"); 1160 SDLoc DL(Op); 1161 // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only 1162 // use log(XLen) bits. Mask the shift amount accordingly. 1163 unsigned ShAmtWidth = Subtarget.getXLen() - 1; 1164 SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2), 1165 DAG.getConstant(ShAmtWidth, DL, VT)); 1166 unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR; 1167 return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt); 1168 } 1169 case ISD::TRUNCATE: { 1170 SDLoc DL(Op); 1171 EVT VT = Op.getValueType(); 1172 // Only custom-lower vector truncates 1173 if (!VT.isVector()) 1174 return Op; 1175 1176 // Truncates to mask types are handled differently 1177 if (VT.getVectorElementType() == MVT::i1) 1178 return lowerVectorMaskTrunc(Op, DAG); 1179 1180 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 1181 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR" nodes which 1182 // truncate by one power of two at a time. 1183 EVT DstEltVT = VT.getVectorElementType(); 1184 1185 SDValue Src = Op.getOperand(0); 1186 EVT SrcVT = Src.getValueType(); 1187 EVT SrcEltVT = SrcVT.getVectorElementType(); 1188 1189 assert(DstEltVT.bitsLT(SrcEltVT) && 1190 isPowerOf2_64(DstEltVT.getSizeInBits()) && 1191 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 1192 "Unexpected vector truncate lowering"); 1193 1194 SDValue Result = Src; 1195 LLVMContext &Context = *DAG.getContext(); 1196 const ElementCount Count = SrcVT.getVectorElementCount(); 1197 do { 1198 SrcEltVT = EVT::getIntegerVT(Context, SrcEltVT.getSizeInBits() / 2); 1199 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 1200 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR, DL, ResultVT, Result); 1201 } while (SrcEltVT != DstEltVT); 1202 1203 return Result; 1204 } 1205 case ISD::ANY_EXTEND: 1206 case ISD::ZERO_EXTEND: 1207 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 1208 case ISD::SIGN_EXTEND: 1209 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 1210 case ISD::SPLAT_VECTOR: 1211 return lowerSPLATVECTOR(Op, DAG); 1212 case ISD::INSERT_VECTOR_ELT: 1213 return lowerINSERT_VECTOR_ELT(Op, DAG); 1214 case ISD::EXTRACT_VECTOR_ELT: 1215 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 1216 case ISD::VSCALE: { 1217 MVT VT = Op.getSimpleValueType(); 1218 SDLoc DL(Op); 1219 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); 1220 // We define our scalable vector types for lmul=1 to use a 64 bit known 1221 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 1222 // vscale as VLENB / 8. 1223 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, 1224 DAG.getConstant(3, DL, VT)); 1225 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); 1226 } 1227 case ISD::FP_EXTEND: { 1228 // RVV can only do fp_extend to types double the size as the source. We 1229 // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going 1230 // via f32. 1231 MVT VT = Op.getSimpleValueType(); 1232 MVT SrcVT = Op.getOperand(0).getSimpleValueType(); 1233 // We only need to close the gap between vXf16->vXf64. 1234 if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 || 1235 SrcVT.getVectorElementType() != MVT::f16) 1236 return Op; 1237 SDLoc DL(Op); 1238 MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1239 SDValue IntermediateRound = 1240 DAG.getFPExtendOrRound(Op.getOperand(0), DL, InterVT); 1241 return DAG.getFPExtendOrRound(IntermediateRound, DL, VT); 1242 } 1243 case ISD::FP_ROUND: { 1244 // RVV can only do fp_round to types half the size as the source. We 1245 // custom-lower f64->f16 rounds via RVV's round-to-odd float 1246 // conversion instruction. 1247 MVT VT = Op.getSimpleValueType(); 1248 MVT SrcVT = Op.getOperand(0).getSimpleValueType(); 1249 // We only need to close the gap between vXf64<->vXf16. 1250 if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || 1251 SrcVT.getVectorElementType() != MVT::f64) 1252 return Op; 1253 SDLoc DL(Op); 1254 MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1255 SDValue IntermediateRound = 1256 DAG.getNode(RISCVISD::VFNCVT_ROD, DL, InterVT, Op.getOperand(0)); 1257 return DAG.getFPExtendOrRound(IntermediateRound, DL, VT); 1258 } 1259 case ISD::FP_TO_SINT: 1260 case ISD::FP_TO_UINT: 1261 case ISD::SINT_TO_FP: 1262 case ISD::UINT_TO_FP: { 1263 // RVV can only do fp<->int conversions to types half/double the size as 1264 // the source. We custom-lower any conversions that do two hops into 1265 // sequences. 1266 MVT VT = Op.getSimpleValueType(); 1267 if (!VT.isVector()) 1268 return Op; 1269 SDLoc DL(Op); 1270 SDValue Src = Op.getOperand(0); 1271 MVT EltVT = VT.getVectorElementType(); 1272 MVT SrcEltVT = Src.getSimpleValueType().getVectorElementType(); 1273 unsigned EltSize = EltVT.getSizeInBits(); 1274 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 1275 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 1276 "Unexpected vector element types"); 1277 bool IsInt2FP = SrcEltVT.isInteger(); 1278 // Widening conversions 1279 if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) { 1280 if (IsInt2FP) { 1281 // Do a regular integer sign/zero extension then convert to float. 1282 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()), 1283 VT.getVectorElementCount()); 1284 unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP 1285 ? ISD::ZERO_EXTEND 1286 : ISD::SIGN_EXTEND; 1287 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 1288 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 1289 } 1290 // FP2Int 1291 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 1292 // Do one doubling fp_extend then complete the operation by converting 1293 // to int. 1294 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1295 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 1296 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 1297 } 1298 1299 // Narrowing conversions 1300 if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) { 1301 if (IsInt2FP) { 1302 // One narrowing int_to_fp, then an fp_round. 1303 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 1304 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1305 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 1306 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 1307 } 1308 // FP2Int 1309 // One narrowing fp_to_int, then truncate the integer. If the float isn't 1310 // representable by the integer, the result is poison. 1311 MVT IVecVT = 1312 MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2), 1313 VT.getVectorElementCount()); 1314 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 1315 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 1316 } 1317 1318 return Op; 1319 } 1320 case ISD::VECREDUCE_ADD: 1321 case ISD::VECREDUCE_UMAX: 1322 case ISD::VECREDUCE_SMAX: 1323 case ISD::VECREDUCE_UMIN: 1324 case ISD::VECREDUCE_SMIN: 1325 case ISD::VECREDUCE_AND: 1326 case ISD::VECREDUCE_OR: 1327 case ISD::VECREDUCE_XOR: 1328 return lowerVECREDUCE(Op, DAG); 1329 case ISD::VECREDUCE_FADD: 1330 case ISD::VECREDUCE_SEQ_FADD: 1331 return lowerFPVECREDUCE(Op, DAG); 1332 case ISD::INSERT_SUBVECTOR: 1333 return lowerINSERT_SUBVECTOR(Op, DAG); 1334 case ISD::EXTRACT_SUBVECTOR: 1335 return lowerEXTRACT_SUBVECTOR(Op, DAG); 1336 case ISD::BUILD_VECTOR: 1337 return lowerBUILD_VECTOR(Op, DAG, Subtarget); 1338 case ISD::VECTOR_SHUFFLE: 1339 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); 1340 case ISD::LOAD: 1341 return lowerFixedLengthVectorLoadToRVV(Op, DAG); 1342 case ISD::STORE: 1343 return lowerFixedLengthVectorStoreToRVV(Op, DAG); 1344 case ISD::SETCC: 1345 return lowerFixedLengthVectorSetccToRVV(Op, DAG); 1346 case ISD::ADD: 1347 return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL); 1348 case ISD::SUB: 1349 return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL); 1350 case ISD::MUL: 1351 return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL); 1352 case ISD::MULHS: 1353 return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL); 1354 case ISD::MULHU: 1355 return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL); 1356 case ISD::AND: 1357 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL, 1358 RISCVISD::AND_VL); 1359 case ISD::OR: 1360 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL, 1361 RISCVISD::OR_VL); 1362 case ISD::XOR: 1363 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL, 1364 RISCVISD::XOR_VL); 1365 case ISD::SDIV: 1366 return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL); 1367 case ISD::SREM: 1368 return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL); 1369 case ISD::UDIV: 1370 return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL); 1371 case ISD::UREM: 1372 return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL); 1373 case ISD::SHL: 1374 return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL); 1375 case ISD::SRA: 1376 return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL); 1377 case ISD::SRL: 1378 return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL); 1379 case ISD::FADD: 1380 return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL); 1381 case ISD::FSUB: 1382 return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL); 1383 case ISD::FMUL: 1384 return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL); 1385 case ISD::FDIV: 1386 return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL); 1387 case ISD::FNEG: 1388 return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL); 1389 case ISD::FABS: 1390 return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL); 1391 case ISD::FSQRT: 1392 return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL); 1393 case ISD::FMA: 1394 return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL); 1395 case ISD::SMIN: 1396 return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL); 1397 case ISD::SMAX: 1398 return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL); 1399 case ISD::UMIN: 1400 return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL); 1401 case ISD::UMAX: 1402 return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL); 1403 case ISD::VSELECT: 1404 return lowerFixedLengthVectorSelectToRVV(Op, DAG); 1405 } 1406 } 1407 1408 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 1409 SelectionDAG &DAG, unsigned Flags) { 1410 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 1411 } 1412 1413 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 1414 SelectionDAG &DAG, unsigned Flags) { 1415 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 1416 Flags); 1417 } 1418 1419 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 1420 SelectionDAG &DAG, unsigned Flags) { 1421 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 1422 N->getOffset(), Flags); 1423 } 1424 1425 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 1426 SelectionDAG &DAG, unsigned Flags) { 1427 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 1428 } 1429 1430 template <class NodeTy> 1431 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 1432 bool IsLocal) const { 1433 SDLoc DL(N); 1434 EVT Ty = getPointerTy(DAG.getDataLayout()); 1435 1436 if (isPositionIndependent()) { 1437 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1438 if (IsLocal) 1439 // Use PC-relative addressing to access the symbol. This generates the 1440 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 1441 // %pcrel_lo(auipc)). 1442 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1443 1444 // Use PC-relative addressing to access the GOT for this symbol, then load 1445 // the address from the GOT. This generates the pattern (PseudoLA sym), 1446 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 1447 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 1448 } 1449 1450 switch (getTargetMachine().getCodeModel()) { 1451 default: 1452 report_fatal_error("Unsupported code model for lowering"); 1453 case CodeModel::Small: { 1454 // Generate a sequence for accessing addresses within the first 2 GiB of 1455 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 1456 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 1457 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 1458 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1459 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 1460 } 1461 case CodeModel::Medium: { 1462 // Generate a sequence for accessing addresses within any 2GiB range within 1463 // the address space. This generates the pattern (PseudoLLA sym), which 1464 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 1465 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1466 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1467 } 1468 } 1469 } 1470 1471 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 1472 SelectionDAG &DAG) const { 1473 SDLoc DL(Op); 1474 EVT Ty = Op.getValueType(); 1475 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1476 int64_t Offset = N->getOffset(); 1477 MVT XLenVT = Subtarget.getXLenVT(); 1478 1479 const GlobalValue *GV = N->getGlobal(); 1480 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 1481 SDValue Addr = getAddr(N, DAG, IsLocal); 1482 1483 // In order to maximise the opportunity for common subexpression elimination, 1484 // emit a separate ADD node for the global address offset instead of folding 1485 // it in the global address node. Later peephole optimisations may choose to 1486 // fold it back in when profitable. 1487 if (Offset != 0) 1488 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1489 DAG.getConstant(Offset, DL, XLenVT)); 1490 return Addr; 1491 } 1492 1493 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 1494 SelectionDAG &DAG) const { 1495 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 1496 1497 return getAddr(N, DAG); 1498 } 1499 1500 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 1501 SelectionDAG &DAG) const { 1502 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 1503 1504 return getAddr(N, DAG); 1505 } 1506 1507 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 1508 SelectionDAG &DAG) const { 1509 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 1510 1511 return getAddr(N, DAG); 1512 } 1513 1514 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 1515 SelectionDAG &DAG, 1516 bool UseGOT) const { 1517 SDLoc DL(N); 1518 EVT Ty = getPointerTy(DAG.getDataLayout()); 1519 const GlobalValue *GV = N->getGlobal(); 1520 MVT XLenVT = Subtarget.getXLenVT(); 1521 1522 if (UseGOT) { 1523 // Use PC-relative addressing to access the GOT for this TLS symbol, then 1524 // load the address from the GOT and add the thread pointer. This generates 1525 // the pattern (PseudoLA_TLS_IE sym), which expands to 1526 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 1527 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1528 SDValue Load = 1529 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 1530 1531 // Add the thread pointer. 1532 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1533 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 1534 } 1535 1536 // Generate a sequence for accessing the address relative to the thread 1537 // pointer, with the appropriate adjustment for the thread pointer offset. 1538 // This generates the pattern 1539 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 1540 SDValue AddrHi = 1541 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 1542 SDValue AddrAdd = 1543 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 1544 SDValue AddrLo = 1545 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 1546 1547 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1548 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1549 SDValue MNAdd = SDValue( 1550 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 1551 0); 1552 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 1553 } 1554 1555 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 1556 SelectionDAG &DAG) const { 1557 SDLoc DL(N); 1558 EVT Ty = getPointerTy(DAG.getDataLayout()); 1559 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 1560 const GlobalValue *GV = N->getGlobal(); 1561 1562 // Use a PC-relative addressing mode to access the global dynamic GOT address. 1563 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 1564 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 1565 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1566 SDValue Load = 1567 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 1568 1569 // Prepare argument list to generate call. 1570 ArgListTy Args; 1571 ArgListEntry Entry; 1572 Entry.Node = Load; 1573 Entry.Ty = CallTy; 1574 Args.push_back(Entry); 1575 1576 // Setup call to __tls_get_addr. 1577 TargetLowering::CallLoweringInfo CLI(DAG); 1578 CLI.setDebugLoc(DL) 1579 .setChain(DAG.getEntryNode()) 1580 .setLibCallee(CallingConv::C, CallTy, 1581 DAG.getExternalSymbol("__tls_get_addr", Ty), 1582 std::move(Args)); 1583 1584 return LowerCallTo(CLI).first; 1585 } 1586 1587 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 1588 SelectionDAG &DAG) const { 1589 SDLoc DL(Op); 1590 EVT Ty = Op.getValueType(); 1591 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1592 int64_t Offset = N->getOffset(); 1593 MVT XLenVT = Subtarget.getXLenVT(); 1594 1595 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 1596 1597 if (DAG.getMachineFunction().getFunction().getCallingConv() == 1598 CallingConv::GHC) 1599 report_fatal_error("In GHC calling convention TLS is not supported"); 1600 1601 SDValue Addr; 1602 switch (Model) { 1603 case TLSModel::LocalExec: 1604 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 1605 break; 1606 case TLSModel::InitialExec: 1607 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 1608 break; 1609 case TLSModel::LocalDynamic: 1610 case TLSModel::GeneralDynamic: 1611 Addr = getDynamicTLSAddr(N, DAG); 1612 break; 1613 } 1614 1615 // In order to maximise the opportunity for common subexpression elimination, 1616 // emit a separate ADD node for the global address offset instead of folding 1617 // it in the global address node. Later peephole optimisations may choose to 1618 // fold it back in when profitable. 1619 if (Offset != 0) 1620 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1621 DAG.getConstant(Offset, DL, XLenVT)); 1622 return Addr; 1623 } 1624 1625 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 1626 SDValue CondV = Op.getOperand(0); 1627 SDValue TrueV = Op.getOperand(1); 1628 SDValue FalseV = Op.getOperand(2); 1629 SDLoc DL(Op); 1630 MVT XLenVT = Subtarget.getXLenVT(); 1631 1632 // If the result type is XLenVT and CondV is the output of a SETCC node 1633 // which also operated on XLenVT inputs, then merge the SETCC node into the 1634 // lowered RISCVISD::SELECT_CC to take advantage of the integer 1635 // compare+branch instructions. i.e.: 1636 // (select (setcc lhs, rhs, cc), truev, falsev) 1637 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 1638 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 1639 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 1640 SDValue LHS = CondV.getOperand(0); 1641 SDValue RHS = CondV.getOperand(1); 1642 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 1643 ISD::CondCode CCVal = CC->get(); 1644 1645 normaliseSetCC(LHS, RHS, CCVal); 1646 1647 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 1648 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 1649 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1650 } 1651 1652 // Otherwise: 1653 // (select condv, truev, falsev) 1654 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 1655 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 1656 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 1657 1658 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 1659 1660 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1661 } 1662 1663 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 1664 MachineFunction &MF = DAG.getMachineFunction(); 1665 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 1666 1667 SDLoc DL(Op); 1668 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1669 getPointerTy(MF.getDataLayout())); 1670 1671 // vastart just stores the address of the VarArgsFrameIndex slot into the 1672 // memory location argument. 1673 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1674 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 1675 MachinePointerInfo(SV)); 1676 } 1677 1678 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 1679 SelectionDAG &DAG) const { 1680 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1681 MachineFunction &MF = DAG.getMachineFunction(); 1682 MachineFrameInfo &MFI = MF.getFrameInfo(); 1683 MFI.setFrameAddressIsTaken(true); 1684 Register FrameReg = RI.getFrameRegister(MF); 1685 int XLenInBytes = Subtarget.getXLen() / 8; 1686 1687 EVT VT = Op.getValueType(); 1688 SDLoc DL(Op); 1689 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 1690 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1691 while (Depth--) { 1692 int Offset = -(XLenInBytes * 2); 1693 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 1694 DAG.getIntPtrConstant(Offset, DL)); 1695 FrameAddr = 1696 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 1697 } 1698 return FrameAddr; 1699 } 1700 1701 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 1702 SelectionDAG &DAG) const { 1703 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1704 MachineFunction &MF = DAG.getMachineFunction(); 1705 MachineFrameInfo &MFI = MF.getFrameInfo(); 1706 MFI.setReturnAddressIsTaken(true); 1707 MVT XLenVT = Subtarget.getXLenVT(); 1708 int XLenInBytes = Subtarget.getXLen() / 8; 1709 1710 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 1711 return SDValue(); 1712 1713 EVT VT = Op.getValueType(); 1714 SDLoc DL(Op); 1715 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1716 if (Depth) { 1717 int Off = -XLenInBytes; 1718 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 1719 SDValue Offset = DAG.getConstant(Off, DL, VT); 1720 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 1721 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 1722 MachinePointerInfo()); 1723 } 1724 1725 // Return the value of the return address register, marking it an implicit 1726 // live-in. 1727 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 1728 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 1729 } 1730 1731 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 1732 SelectionDAG &DAG) const { 1733 SDLoc DL(Op); 1734 SDValue Lo = Op.getOperand(0); 1735 SDValue Hi = Op.getOperand(1); 1736 SDValue Shamt = Op.getOperand(2); 1737 EVT VT = Lo.getValueType(); 1738 1739 // if Shamt-XLEN < 0: // Shamt < XLEN 1740 // Lo = Lo << Shamt 1741 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 1742 // else: 1743 // Lo = 0 1744 // Hi = Lo << (Shamt-XLEN) 1745 1746 SDValue Zero = DAG.getConstant(0, DL, VT); 1747 SDValue One = DAG.getConstant(1, DL, VT); 1748 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1749 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1750 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1751 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1752 1753 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 1754 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 1755 SDValue ShiftRightLo = 1756 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 1757 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 1758 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 1759 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 1760 1761 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1762 1763 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 1764 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1765 1766 SDValue Parts[2] = {Lo, Hi}; 1767 return DAG.getMergeValues(Parts, DL); 1768 } 1769 1770 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 1771 bool IsSRA) const { 1772 SDLoc DL(Op); 1773 SDValue Lo = Op.getOperand(0); 1774 SDValue Hi = Op.getOperand(1); 1775 SDValue Shamt = Op.getOperand(2); 1776 EVT VT = Lo.getValueType(); 1777 1778 // SRA expansion: 1779 // if Shamt-XLEN < 0: // Shamt < XLEN 1780 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1781 // Hi = Hi >>s Shamt 1782 // else: 1783 // Lo = Hi >>s (Shamt-XLEN); 1784 // Hi = Hi >>s (XLEN-1) 1785 // 1786 // SRL expansion: 1787 // if Shamt-XLEN < 0: // Shamt < XLEN 1788 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1789 // Hi = Hi >>u Shamt 1790 // else: 1791 // Lo = Hi >>u (Shamt-XLEN); 1792 // Hi = 0; 1793 1794 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 1795 1796 SDValue Zero = DAG.getConstant(0, DL, VT); 1797 SDValue One = DAG.getConstant(1, DL, VT); 1798 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1799 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1800 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1801 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1802 1803 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 1804 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 1805 SDValue ShiftLeftHi = 1806 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 1807 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 1808 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 1809 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 1810 SDValue HiFalse = 1811 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 1812 1813 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1814 1815 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 1816 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1817 1818 SDValue Parts[2] = {Lo, Hi}; 1819 return DAG.getMergeValues(Parts, DL); 1820 } 1821 1822 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is 1823 // illegal (currently only vXi64 RV32). 1824 // FIXME: We could also catch non-constant sign-extended i32 values and lower 1825 // them to SPLAT_VECTOR_I64 1826 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op, 1827 SelectionDAG &DAG) const { 1828 SDLoc DL(Op); 1829 EVT VecVT = Op.getValueType(); 1830 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 1831 "Unexpected SPLAT_VECTOR lowering"); 1832 SDValue SplatVal = Op.getOperand(0); 1833 1834 // If we can prove that the value is a sign-extended 32-bit value, lower this 1835 // as a custom node in order to try and match RVV vector/scalar instructions. 1836 if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) { 1837 if (isInt<32>(CVal->getSExtValue())) 1838 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1839 DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32)); 1840 } 1841 1842 if (SplatVal.getOpcode() == ISD::SIGN_EXTEND && 1843 SplatVal.getOperand(0).getValueType() == MVT::i32) { 1844 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1845 SplatVal.getOperand(0)); 1846 } 1847 1848 // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not 1849 // to accidentally sign-extend the 32-bit halves to the e64 SEW: 1850 // vmv.v.x vX, hi 1851 // vsll.vx vX, vX, /*32*/ 1852 // vmv.v.x vY, lo 1853 // vsll.vx vY, vY, /*32*/ 1854 // vsrl.vx vY, vY, /*32*/ 1855 // vor.vv vX, vX, vY 1856 SDValue One = DAG.getConstant(1, DL, MVT::i32); 1857 SDValue Zero = DAG.getConstant(0, DL, MVT::i32); 1858 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT); 1859 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero); 1860 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One); 1861 1862 Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 1863 Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV); 1864 Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV); 1865 1866 if (isNullConstant(Hi)) 1867 return Lo; 1868 1869 Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi); 1870 Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV); 1871 1872 return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi); 1873 } 1874 1875 // Custom-lower extensions from mask vectors by using a vselect either with 1 1876 // for zero/any-extension or -1 for sign-extension: 1877 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 1878 // Note that any-extension is lowered identically to zero-extension. 1879 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 1880 int64_t ExtTrueVal) const { 1881 SDLoc DL(Op); 1882 MVT VecVT = Op.getSimpleValueType(); 1883 SDValue Src = Op.getOperand(0); 1884 // Only custom-lower extensions from mask types 1885 if (!Src.getValueType().isVector() || 1886 Src.getValueType().getVectorElementType() != MVT::i1) 1887 return Op; 1888 1889 MVT XLenVT = Subtarget.getXLenVT(); 1890 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 1891 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT); 1892 1893 if (VecVT.isScalableVector()) { 1894 // Be careful not to introduce illegal scalar types at this stage, and be 1895 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 1896 // illegal and must be expanded. Since we know that the constants are 1897 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 1898 bool IsRV32E64 = 1899 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 1900 1901 if (!IsRV32E64) { 1902 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 1903 SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal); 1904 } else { 1905 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 1906 SplatTrueVal = 1907 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal); 1908 } 1909 1910 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 1911 } 1912 1913 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget); 1914 MVT I1ContainerVT = 1915 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 1916 1917 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget); 1918 1919 SDValue Mask, VL; 1920 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 1921 1922 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL); 1923 SplatTrueVal = 1924 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL); 1925 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, 1926 SplatTrueVal, SplatZero, VL); 1927 1928 return convertFromScalableVector(VecVT, Select, DAG, Subtarget); 1929 } 1930 1931 // Custom-lower truncations from vectors to mask vectors by using a mask and a 1932 // setcc operation: 1933 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 1934 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, 1935 SelectionDAG &DAG) const { 1936 SDLoc DL(Op); 1937 EVT MaskVT = Op.getValueType(); 1938 // Only expect to custom-lower truncations to mask types 1939 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 1940 "Unexpected type for vector mask lowering"); 1941 SDValue Src = Op.getOperand(0); 1942 EVT VecVT = Src.getValueType(); 1943 1944 // Be careful not to introduce illegal scalar types at this stage, and be 1945 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 1946 // illegal and must be expanded. Since we know that the constants are 1947 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 1948 bool IsRV32E64 = 1949 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 1950 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 1951 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1952 1953 if (!IsRV32E64) { 1954 SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne); 1955 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 1956 } else { 1957 SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne); 1958 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 1959 } 1960 1961 SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); 1962 1963 return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); 1964 } 1965 1966 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 1967 SelectionDAG &DAG) const { 1968 SDLoc DL(Op); 1969 MVT VecVT = Op.getSimpleValueType(); 1970 SDValue Vec = Op.getOperand(0); 1971 SDValue Val = Op.getOperand(1); 1972 SDValue Idx = Op.getOperand(2); 1973 1974 // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is 1975 // first slid down into position, the value is inserted into the first 1976 // position, and the vector is slid back up. We do this to simplify patterns. 1977 // (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx), 1978 if (Subtarget.is64Bit() || Val.getValueType() != MVT::i64) { 1979 if (isNullConstant(Idx)) 1980 return Op; 1981 SDValue Mask, VL; 1982 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 1983 SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VecVT, 1984 DAG.getUNDEF(VecVT), Vec, Idx, Mask, VL); 1985 SDValue InsertElt0 = 1986 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, Slidedown, Val, 1987 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 1988 1989 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VecVT, Vec, InsertElt0, Idx, 1990 Mask, VL); 1991 } 1992 1993 if (!VecVT.isScalableVector()) 1994 return SDValue(); 1995 1996 // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type 1997 // is illegal (currently only vXi64 RV32). 1998 // Since there is no easy way of getting a single element into a vector when 1999 // XLEN<SEW, we lower the operation to the following sequence: 2000 // splat vVal, rVal 2001 // vid.v vVid 2002 // vmseq.vx mMask, vVid, rIdx 2003 // vmerge.vvm vDest, vSrc, vVal, mMask 2004 // This essentially merges the original vector with the inserted element by 2005 // using a mask whose only set bit is that corresponding to the insert 2006 // index. 2007 SDValue SplattedVal = DAG.getSplatVector(VecVT, DL, Val); 2008 SDValue SplattedIdx = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Idx); 2009 2010 SDValue Mask, VL; 2011 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 2012 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VecVT, Mask, VL); 2013 auto SetCCVT = 2014 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VecVT); 2015 SDValue SelectCond = DAG.getSetCC(DL, SetCCVT, VID, SplattedIdx, ISD::SETEQ); 2016 2017 return DAG.getNode(ISD::VSELECT, DL, VecVT, SelectCond, SplattedVal, Vec); 2018 } 2019 2020 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 2021 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer 2022 // types this is done using VMV_X_S to allow us to glean information about the 2023 // sign bits of the result. 2024 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 2025 SelectionDAG &DAG) const { 2026 SDLoc DL(Op); 2027 SDValue Idx = Op.getOperand(1); 2028 SDValue Vec = Op.getOperand(0); 2029 EVT EltVT = Op.getValueType(); 2030 MVT VecVT = Vec.getSimpleValueType(); 2031 MVT XLenVT = Subtarget.getXLenVT(); 2032 2033 // If the index is 0, the vector is already in the right position. 2034 if (!isNullConstant(Idx)) { 2035 SDValue Mask, VL; 2036 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 2037 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VecVT, DAG.getUNDEF(VecVT), 2038 Vec, Idx, Mask, VL); 2039 } 2040 2041 if (!EltVT.isInteger()) { 2042 // Floating-point extracts are handled in TableGen. 2043 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 2044 DAG.getConstant(0, DL, XLenVT)); 2045 } 2046 2047 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 2048 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 2049 } 2050 2051 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 2052 SelectionDAG &DAG) const { 2053 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2054 SDLoc DL(Op); 2055 2056 if (Subtarget.hasStdExtV()) { 2057 // Some RVV intrinsics may claim that they want an integer operand to be 2058 // extended. 2059 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 2060 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 2061 if (II->ExtendedOperand) { 2062 assert(II->ExtendedOperand < Op.getNumOperands()); 2063 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 2064 SDValue &ScalarOp = Operands[II->ExtendedOperand]; 2065 EVT OpVT = ScalarOp.getValueType(); 2066 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 2067 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 2068 // If the operand is a constant, sign extend to increase our chances 2069 // of being able to use a .vi instruction. ANY_EXTEND would become a 2070 // a zero extend and the simm5 check in isel would fail. 2071 // FIXME: Should we ignore the upper bits in isel instead? 2072 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 2073 : ISD::ANY_EXTEND; 2074 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 2075 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), 2076 Operands); 2077 } 2078 } 2079 } 2080 } 2081 2082 switch (IntNo) { 2083 default: 2084 return SDValue(); // Don't custom lower most intrinsics. 2085 case Intrinsic::thread_pointer: { 2086 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2087 return DAG.getRegister(RISCV::X4, PtrVT); 2088 } 2089 case Intrinsic::riscv_vmv_x_s: 2090 assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!"); 2091 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 2092 Op.getOperand(1)); 2093 case Intrinsic::riscv_vmv_v_x: { 2094 SDValue Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), 2095 Op.getOperand(1)); 2096 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), 2097 Scalar, Op.getOperand(2)); 2098 } 2099 case Intrinsic::riscv_vfmv_v_f: 2100 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), 2101 Op.getOperand(1), Op.getOperand(2)); 2102 } 2103 } 2104 2105 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 2106 SelectionDAG &DAG) const { 2107 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 2108 SDLoc DL(Op); 2109 2110 if (Subtarget.hasStdExtV()) { 2111 // Some RVV intrinsics may claim that they want an integer operand to be 2112 // extended. 2113 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 2114 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 2115 if (II->ExtendedOperand) { 2116 // The operands start from the second argument in INTRINSIC_W_CHAIN. 2117 unsigned ExtendOp = II->ExtendedOperand + 1; 2118 assert(ExtendOp < Op.getNumOperands()); 2119 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 2120 SDValue &ScalarOp = Operands[ExtendOp]; 2121 EVT OpVT = ScalarOp.getValueType(); 2122 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 2123 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 2124 // If the operand is a constant, sign extend to increase our chances 2125 // of being able to use a .vi instruction. ANY_EXTEND would become a 2126 // a zero extend and the simm5 check in isel would fail. 2127 // FIXME: Should we ignore the upper bits in isel instead? 2128 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 2129 : ISD::ANY_EXTEND; 2130 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 2131 return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(), 2132 Operands); 2133 } 2134 } 2135 } 2136 } 2137 2138 return SDValue(); // Don't custom lower most intrinsics. 2139 } 2140 2141 static std::pair<unsigned, uint64_t> 2142 getRVVReductionOpAndIdentityVal(unsigned ISDOpcode, unsigned EltSizeBits) { 2143 switch (ISDOpcode) { 2144 default: 2145 llvm_unreachable("Unhandled reduction"); 2146 case ISD::VECREDUCE_ADD: 2147 return {RISCVISD::VECREDUCE_ADD, 0}; 2148 case ISD::VECREDUCE_UMAX: 2149 return {RISCVISD::VECREDUCE_UMAX, 0}; 2150 case ISD::VECREDUCE_SMAX: 2151 return {RISCVISD::VECREDUCE_SMAX, minIntN(EltSizeBits)}; 2152 case ISD::VECREDUCE_UMIN: 2153 return {RISCVISD::VECREDUCE_UMIN, maxUIntN(EltSizeBits)}; 2154 case ISD::VECREDUCE_SMIN: 2155 return {RISCVISD::VECREDUCE_SMIN, maxIntN(EltSizeBits)}; 2156 case ISD::VECREDUCE_AND: 2157 return {RISCVISD::VECREDUCE_AND, -1}; 2158 case ISD::VECREDUCE_OR: 2159 return {RISCVISD::VECREDUCE_OR, 0}; 2160 case ISD::VECREDUCE_XOR: 2161 return {RISCVISD::VECREDUCE_XOR, 0}; 2162 } 2163 } 2164 2165 // Take a (supported) standard ISD reduction opcode and transform it to a RISCV 2166 // reduction opcode. Note that this returns a vector type, which must be 2167 // further processed to access the scalar result in element 0. 2168 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 2169 SelectionDAG &DAG) const { 2170 SDLoc DL(Op); 2171 assert(Op.getValueType().isSimple() && 2172 Op.getOperand(0).getValueType().isSimple() && 2173 "Unexpected vector-reduce lowering"); 2174 MVT VecEltVT = Op.getOperand(0).getSimpleValueType().getVectorElementType(); 2175 unsigned RVVOpcode; 2176 uint64_t IdentityVal; 2177 std::tie(RVVOpcode, IdentityVal) = 2178 getRVVReductionOpAndIdentityVal(Op.getOpcode(), VecEltVT.getSizeInBits()); 2179 // We have to perform a bit of a dance to get from our vector type to the 2180 // correct LMUL=1 vector type. We divide our minimum VLEN (64) by the vector 2181 // element type to find the type which fills a single register. Be careful to 2182 // use the operand's vector element type rather than the reduction's value 2183 // type, as that has likely been extended to XLEN. 2184 unsigned NumElts = 64 / VecEltVT.getSizeInBits(); 2185 MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts); 2186 SDValue IdentitySplat = 2187 DAG.getSplatVector(M1VT, DL, DAG.getConstant(IdentityVal, DL, VecEltVT)); 2188 SDValue Reduction = 2189 DAG.getNode(RVVOpcode, DL, M1VT, Op.getOperand(0), IdentitySplat); 2190 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 2191 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 2192 return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); 2193 } 2194 2195 // Given a reduction op, this function returns the matching reduction opcode, 2196 // the vector SDValue and the scalar SDValue required to lower this to a 2197 // RISCVISD node. 2198 static std::tuple<unsigned, SDValue, SDValue> 2199 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { 2200 SDLoc DL(Op); 2201 switch (Op.getOpcode()) { 2202 default: 2203 llvm_unreachable("Unhandled reduction"); 2204 case ISD::VECREDUCE_FADD: 2205 return std::make_tuple(RISCVISD::VECREDUCE_FADD, Op.getOperand(0), 2206 DAG.getConstantFP(0.0, DL, EltVT)); 2207 case ISD::VECREDUCE_SEQ_FADD: 2208 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD, Op.getOperand(1), 2209 Op.getOperand(0)); 2210 } 2211 } 2212 2213 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, 2214 SelectionDAG &DAG) const { 2215 SDLoc DL(Op); 2216 MVT VecEltVT = Op.getSimpleValueType(); 2217 // We have to perform a bit of a dance to get from our vector type to the 2218 // correct LMUL=1 vector type. See above for an explanation. 2219 unsigned NumElts = 64 / VecEltVT.getSizeInBits(); 2220 MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts); 2221 2222 unsigned RVVOpcode; 2223 SDValue VectorVal, ScalarVal; 2224 std::tie(RVVOpcode, VectorVal, ScalarVal) = 2225 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT); 2226 2227 SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal); 2228 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat); 2229 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 2230 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 2231 } 2232 2233 static MVT getLMUL1VT(MVT VT) { 2234 assert(VT.getVectorElementType().getSizeInBits() <= 64 && 2235 "Unexpected vector MVT"); 2236 return MVT::getScalableVectorVT( 2237 VT.getVectorElementType(), 2238 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); 2239 } 2240 2241 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, 2242 SelectionDAG &DAG) const { 2243 SDValue Vec = Op.getOperand(0); 2244 SDValue SubVec = Op.getOperand(1); 2245 MVT VecVT = Vec.getSimpleValueType(); 2246 MVT SubVecVT = SubVec.getSimpleValueType(); 2247 2248 // TODO: Only handle scalable->scalable inserts for now, and revisit this for 2249 // fixed-length vectors later. 2250 if (!SubVecVT.isScalableVector() || !VecVT.isScalableVector()) 2251 return Op; 2252 2253 SDLoc DL(Op); 2254 unsigned OrigIdx = Op.getConstantOperandVal(2); 2255 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2256 2257 unsigned SubRegIdx, RemIdx; 2258 std::tie(SubRegIdx, RemIdx) = 2259 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2260 VecVT, SubVecVT, OrigIdx, TRI); 2261 2262 RISCVVLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT); 2263 bool IsSubVecPartReg = SubVecLMUL == RISCVVLMUL::LMUL_F2 || 2264 SubVecLMUL == RISCVVLMUL::LMUL_F4 || 2265 SubVecLMUL == RISCVVLMUL::LMUL_F8; 2266 2267 // If the Idx has been completely eliminated and this subvector's size is a 2268 // vector register or a multiple thereof, or the surrounding elements are 2269 // undef, then this is a subvector insert which naturally aligns to a vector 2270 // register. These can easily be handled using subregister manipulation. 2271 // If the subvector is smaller than a vector register, then the insertion 2272 // must preserve the undisturbed elements of the register. We do this by 2273 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type 2274 // (which resolves to a subregister copy), performing a VSLIDEUP to place the 2275 // subvector within the vector register, and an INSERT_SUBVECTOR of that 2276 // LMUL=1 type back into the larger vector (resolving to another subregister 2277 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type 2278 // to avoid allocating a large register group to hold our subvector. 2279 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef())) 2280 return Op; 2281 2282 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements 2283 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy 2284 // (in our case undisturbed). This means we can set up a subvector insertion 2285 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the 2286 // size of the subvector. 2287 MVT XLenVT = Subtarget.getXLenVT(); 2288 MVT InterSubVT = getLMUL1VT(VecVT); 2289 2290 // Extract a subvector equal to the nearest full vector register type. This 2291 // should resolve to a EXTRACT_SUBREG instruction. 2292 unsigned AlignedIdx = OrigIdx - RemIdx; 2293 SDValue AlignedExtract = 2294 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 2295 DAG.getConstant(AlignedIdx, DL, XLenVT)); 2296 2297 SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT); 2298 // For scalable vectors this must be further multiplied by vscale. 2299 SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt); 2300 2301 SDValue Mask, VL; 2302 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 2303 2304 // Construct the vector length corresponding to RemIdx + length(SubVecVT). 2305 VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT); 2306 VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL); 2307 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL); 2308 2309 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT, 2310 DAG.getUNDEF(InterSubVT), SubVec, 2311 DAG.getConstant(0, DL, XLenVT)); 2312 2313 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT, 2314 AlignedExtract, SubVec, SlideupAmt, Mask, VL); 2315 2316 // Insert this subvector into the correct vector register. This should 2317 // resolve to an INSERT_SUBREG instruction. 2318 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup, 2319 DAG.getConstant(AlignedIdx, DL, XLenVT)); 2320 } 2321 2322 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, 2323 SelectionDAG &DAG) const { 2324 SDValue Vec = Op.getOperand(0); 2325 MVT SubVecVT = Op.getSimpleValueType(); 2326 MVT VecVT = Vec.getSimpleValueType(); 2327 2328 // TODO: Only handle scalable->scalable extracts for now, and revisit this 2329 // for fixed-length vectors later. 2330 if (!SubVecVT.isScalableVector() || !VecVT.isScalableVector()) 2331 return Op; 2332 2333 SDLoc DL(Op); 2334 unsigned OrigIdx = Op.getConstantOperandVal(1); 2335 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2336 2337 unsigned SubRegIdx, RemIdx; 2338 std::tie(SubRegIdx, RemIdx) = 2339 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2340 VecVT, SubVecVT, OrigIdx, TRI); 2341 2342 // If the Idx has been completely eliminated then this is a subvector extract 2343 // which naturally aligns to a vector register. These can easily be handled 2344 // using subregister manipulation. 2345 if (RemIdx == 0) 2346 return Op; 2347 2348 // Else we must shift our vector register directly to extract the subvector. 2349 // Do this using VSLIDEDOWN. 2350 MVT XLenVT = Subtarget.getXLenVT(); 2351 2352 // Extract a subvector equal to the nearest full vector register type. This 2353 // should resolve to a EXTRACT_SUBREG instruction. 2354 unsigned AlignedIdx = OrigIdx - RemIdx; 2355 MVT InterSubVT = getLMUL1VT(VecVT); 2356 SDValue AlignedExtract = 2357 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 2358 DAG.getConstant(AlignedIdx, DL, XLenVT)); 2359 2360 // Slide this vector register down by the desired number of elements in order 2361 // to place the desired subvector starting at element 0. 2362 SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT); 2363 // For scalable vectors this must be further multiplied by vscale. 2364 SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt); 2365 2366 SDValue Mask, VL; 2367 std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); 2368 SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT, 2369 DAG.getUNDEF(InterSubVT), AlignedExtract, 2370 SlidedownAmt, Mask, VL); 2371 2372 // Now the vector is in the right position, extract our final subvector. This 2373 // should resolve to a COPY. 2374 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 2375 DAG.getConstant(0, DL, XLenVT)); 2376 } 2377 2378 SDValue 2379 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, 2380 SelectionDAG &DAG) const { 2381 auto *Load = cast<LoadSDNode>(Op); 2382 2383 SDLoc DL(Op); 2384 MVT VT = Op.getSimpleValueType(); 2385 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2386 2387 SDValue VL = 2388 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 2389 2390 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 2391 SDValue NewLoad = DAG.getMemIntrinsicNode( 2392 RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL}, 2393 Load->getMemoryVT(), Load->getMemOperand()); 2394 2395 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 2396 return DAG.getMergeValues({Result, Load->getChain()}, DL); 2397 } 2398 2399 SDValue 2400 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, 2401 SelectionDAG &DAG) const { 2402 auto *Store = cast<StoreSDNode>(Op); 2403 2404 SDLoc DL(Op); 2405 MVT VT = Store->getValue().getSimpleValueType(); 2406 2407 // FIXME: We probably need to zero any extra bits in a byte for mask stores. 2408 // This is tricky to do. 2409 2410 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2411 2412 SDValue VL = 2413 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 2414 2415 SDValue NewValue = 2416 convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget); 2417 return DAG.getMemIntrinsicNode( 2418 RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other), 2419 {Store->getChain(), NewValue, Store->getBasePtr(), VL}, 2420 Store->getMemoryVT(), Store->getMemOperand()); 2421 } 2422 2423 SDValue 2424 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, 2425 SelectionDAG &DAG) const { 2426 MVT InVT = Op.getOperand(0).getSimpleValueType(); 2427 MVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT, Subtarget); 2428 2429 MVT VT = Op.getSimpleValueType(); 2430 2431 SDValue Op1 = 2432 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 2433 SDValue Op2 = 2434 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 2435 2436 SDLoc DL(Op); 2437 SDValue VL = 2438 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 2439 2440 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 2441 2442 bool Invert = false; 2443 Optional<unsigned> LogicOpc; 2444 if (ContainerVT.isFloatingPoint()) { 2445 bool Swap = false; 2446 switch (CC) { 2447 default: 2448 break; 2449 case ISD::SETULE: 2450 case ISD::SETULT: 2451 Swap = true; 2452 LLVM_FALLTHROUGH; 2453 case ISD::SETUGE: 2454 case ISD::SETUGT: 2455 CC = getSetCCInverse(CC, ContainerVT); 2456 Invert = true; 2457 break; 2458 case ISD::SETOGE: 2459 case ISD::SETOGT: 2460 case ISD::SETGE: 2461 case ISD::SETGT: 2462 Swap = true; 2463 break; 2464 case ISD::SETUEQ: 2465 // Use !((OLT Op1, Op2) || (OLT Op2, Op1)) 2466 Invert = true; 2467 LogicOpc = RISCVISD::VMOR_VL; 2468 CC = ISD::SETOLT; 2469 break; 2470 case ISD::SETONE: 2471 // Use ((OLT Op1, Op2) || (OLT Op2, Op1)) 2472 LogicOpc = RISCVISD::VMOR_VL; 2473 CC = ISD::SETOLT; 2474 break; 2475 case ISD::SETO: 2476 // Use (OEQ Op1, Op1) && (OEQ Op2, Op2) 2477 LogicOpc = RISCVISD::VMAND_VL; 2478 CC = ISD::SETOEQ; 2479 break; 2480 case ISD::SETUO: 2481 // Use (UNE Op1, Op1) || (UNE Op2, Op2) 2482 LogicOpc = RISCVISD::VMOR_VL; 2483 CC = ISD::SETUNE; 2484 break; 2485 } 2486 2487 if (Swap) { 2488 CC = getSetCCSwappedOperands(CC); 2489 std::swap(Op1, Op2); 2490 } 2491 } 2492 2493 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2494 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2495 2496 // There are 3 cases we need to emit. 2497 // 1. For (OEQ Op1, Op1) && (OEQ Op2, Op2) or (UNE Op1, Op1) || (UNE Op2, Op2) 2498 // we need to compare each operand with itself. 2499 // 2. For (OLT Op1, Op2) || (OLT Op2, Op1) we need to compare Op1 and Op2 in 2500 // both orders. 2501 // 3. For any other case we just need one compare with Op1 and Op2. 2502 SDValue Cmp; 2503 if (LogicOpc && (CC == ISD::SETOEQ || CC == ISD::SETUNE)) { 2504 Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op1, 2505 DAG.getCondCode(CC), Mask, VL); 2506 SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op2, 2507 DAG.getCondCode(CC), Mask, VL); 2508 Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL); 2509 } else { 2510 Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2, 2511 DAG.getCondCode(CC), Mask, VL); 2512 if (LogicOpc) { 2513 SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op1, 2514 DAG.getCondCode(CC), Mask, VL); 2515 Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL); 2516 } 2517 } 2518 2519 if (Invert) { 2520 SDValue AllOnes = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2521 Cmp = DAG.getNode(RISCVISD::VMXOR_VL, DL, MaskVT, Cmp, AllOnes, VL); 2522 } 2523 2524 return convertFromScalableVector(VT, Cmp, DAG, Subtarget); 2525 } 2526 2527 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV( 2528 SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const { 2529 MVT VT = Op.getSimpleValueType(); 2530 2531 if (VT.getVectorElementType() == MVT::i1) 2532 return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false); 2533 2534 return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true); 2535 } 2536 2537 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( 2538 SDValue Op, SelectionDAG &DAG) const { 2539 MVT VT = Op.getSimpleValueType(); 2540 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2541 2542 MVT I1ContainerVT = 2543 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2544 2545 SDValue CC = 2546 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget); 2547 SDValue Op1 = 2548 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 2549 SDValue Op2 = 2550 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget); 2551 2552 SDLoc DL(Op); 2553 SDValue Mask, VL; 2554 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2555 2556 SDValue Select = 2557 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL); 2558 2559 return convertFromScalableVector(VT, Select, DAG, Subtarget); 2560 } 2561 2562 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, 2563 unsigned NewOpc, 2564 bool HasMask) const { 2565 MVT VT = Op.getSimpleValueType(); 2566 assert(useRVVForFixedLengthVectorVT(VT) && 2567 "Only expected to lower fixed length vector operation!"); 2568 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2569 2570 // Create list of operands by converting existing ones to scalable types. 2571 SmallVector<SDValue, 6> Ops; 2572 for (const SDValue &V : Op->op_values()) { 2573 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 2574 2575 // Pass through non-vector operands. 2576 if (!V.getValueType().isVector()) { 2577 Ops.push_back(V); 2578 continue; 2579 } 2580 2581 // "cast" fixed length vector to a scalable vector. 2582 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && 2583 "Only fixed length vectors are supported!"); 2584 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 2585 } 2586 2587 SDLoc DL(Op); 2588 SDValue Mask, VL; 2589 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2590 if (HasMask) 2591 Ops.push_back(Mask); 2592 Ops.push_back(VL); 2593 2594 SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops); 2595 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 2596 } 2597 2598 // Returns the opcode of the target-specific SDNode that implements the 32-bit 2599 // form of the given Opcode. 2600 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 2601 switch (Opcode) { 2602 default: 2603 llvm_unreachable("Unexpected opcode"); 2604 case ISD::SHL: 2605 return RISCVISD::SLLW; 2606 case ISD::SRA: 2607 return RISCVISD::SRAW; 2608 case ISD::SRL: 2609 return RISCVISD::SRLW; 2610 case ISD::SDIV: 2611 return RISCVISD::DIVW; 2612 case ISD::UDIV: 2613 return RISCVISD::DIVUW; 2614 case ISD::UREM: 2615 return RISCVISD::REMUW; 2616 case ISD::ROTL: 2617 return RISCVISD::ROLW; 2618 case ISD::ROTR: 2619 return RISCVISD::RORW; 2620 case RISCVISD::GREVI: 2621 return RISCVISD::GREVIW; 2622 case RISCVISD::GORCI: 2623 return RISCVISD::GORCIW; 2624 } 2625 } 2626 2627 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 2628 // Because i32 isn't a legal type for RV64, these operations would otherwise 2629 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 2630 // later one because the fact the operation was originally of type i32 is 2631 // lost. 2632 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 2633 unsigned ExtOpc = ISD::ANY_EXTEND) { 2634 SDLoc DL(N); 2635 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 2636 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 2637 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 2638 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 2639 // ReplaceNodeResults requires we maintain the same type for the return value. 2640 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 2641 } 2642 2643 // Converts the given 32-bit operation to a i64 operation with signed extension 2644 // semantic to reduce the signed extension instructions. 2645 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 2646 SDLoc DL(N); 2647 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 2648 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 2649 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 2650 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 2651 DAG.getValueType(MVT::i32)); 2652 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 2653 } 2654 2655 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 2656 SmallVectorImpl<SDValue> &Results, 2657 SelectionDAG &DAG) const { 2658 SDLoc DL(N); 2659 switch (N->getOpcode()) { 2660 default: 2661 llvm_unreachable("Don't know how to custom type legalize this operation!"); 2662 case ISD::STRICT_FP_TO_SINT: 2663 case ISD::STRICT_FP_TO_UINT: 2664 case ISD::FP_TO_SINT: 2665 case ISD::FP_TO_UINT: { 2666 bool IsStrict = N->isStrictFPOpcode(); 2667 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2668 "Unexpected custom legalisation"); 2669 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 2670 // If the FP type needs to be softened, emit a library call using the 'si' 2671 // version. If we left it to default legalization we'd end up with 'di'. If 2672 // the FP type doesn't need to be softened just let generic type 2673 // legalization promote the result type. 2674 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 2675 TargetLowering::TypeSoftenFloat) 2676 return; 2677 RTLIB::Libcall LC; 2678 if (N->getOpcode() == ISD::FP_TO_SINT || 2679 N->getOpcode() == ISD::STRICT_FP_TO_SINT) 2680 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 2681 else 2682 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 2683 MakeLibCallOptions CallOptions; 2684 EVT OpVT = Op0.getValueType(); 2685 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 2686 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 2687 SDValue Result; 2688 std::tie(Result, Chain) = 2689 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 2690 Results.push_back(Result); 2691 if (IsStrict) 2692 Results.push_back(Chain); 2693 break; 2694 } 2695 case ISD::READCYCLECOUNTER: { 2696 assert(!Subtarget.is64Bit() && 2697 "READCYCLECOUNTER only has custom type legalization on riscv32"); 2698 2699 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 2700 SDValue RCW = 2701 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 2702 2703 Results.push_back( 2704 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 2705 Results.push_back(RCW.getValue(2)); 2706 break; 2707 } 2708 case ISD::ADD: 2709 case ISD::SUB: 2710 case ISD::MUL: 2711 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2712 "Unexpected custom legalisation"); 2713 if (N->getOperand(1).getOpcode() == ISD::Constant) 2714 return; 2715 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 2716 break; 2717 case ISD::SHL: 2718 case ISD::SRA: 2719 case ISD::SRL: 2720 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2721 "Unexpected custom legalisation"); 2722 if (N->getOperand(1).getOpcode() == ISD::Constant) 2723 return; 2724 Results.push_back(customLegalizeToWOp(N, DAG)); 2725 break; 2726 case ISD::ROTL: 2727 case ISD::ROTR: 2728 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2729 "Unexpected custom legalisation"); 2730 Results.push_back(customLegalizeToWOp(N, DAG)); 2731 break; 2732 case ISD::SDIV: 2733 case ISD::UDIV: 2734 case ISD::UREM: { 2735 MVT VT = N->getSimpleValueType(0); 2736 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 2737 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 2738 "Unexpected custom legalisation"); 2739 if (N->getOperand(0).getOpcode() == ISD::Constant || 2740 N->getOperand(1).getOpcode() == ISD::Constant) 2741 return; 2742 2743 // If the input is i32, use ANY_EXTEND since the W instructions don't read 2744 // the upper 32 bits. For other types we need to sign or zero extend 2745 // based on the opcode. 2746 unsigned ExtOpc = ISD::ANY_EXTEND; 2747 if (VT != MVT::i32) 2748 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 2749 : ISD::ZERO_EXTEND; 2750 2751 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 2752 break; 2753 } 2754 case ISD::BITCAST: { 2755 assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2756 Subtarget.hasStdExtF()) || 2757 (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) && 2758 "Unexpected custom legalisation"); 2759 SDValue Op0 = N->getOperand(0); 2760 if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) { 2761 if (Op0.getValueType() != MVT::f16) 2762 return; 2763 SDValue FPConv = 2764 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0); 2765 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 2766 } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2767 Subtarget.hasStdExtF()) { 2768 if (Op0.getValueType() != MVT::f32) 2769 return; 2770 SDValue FPConv = 2771 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 2772 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 2773 } 2774 break; 2775 } 2776 case RISCVISD::GREVI: 2777 case RISCVISD::GORCI: { 2778 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2779 "Unexpected custom legalisation"); 2780 // This is similar to customLegalizeToWOp, except that we pass the second 2781 // operand (a TargetConstant) straight through: it is already of type 2782 // XLenVT. 2783 SDLoc DL(N); 2784 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 2785 SDValue NewOp0 = 2786 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 2787 SDValue NewRes = 2788 DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1)); 2789 // ReplaceNodeResults requires we maintain the same type for the return 2790 // value. 2791 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 2792 break; 2793 } 2794 case RISCVISD::SHFLI: { 2795 // There is no SHFLIW instruction, but we can just promote the operation. 2796 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2797 "Unexpected custom legalisation"); 2798 SDLoc DL(N); 2799 SDValue NewOp0 = 2800 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 2801 SDValue NewRes = 2802 DAG.getNode(RISCVISD::SHFLI, DL, MVT::i64, NewOp0, N->getOperand(1)); 2803 // ReplaceNodeResults requires we maintain the same type for the return 2804 // value. 2805 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 2806 break; 2807 } 2808 case ISD::BSWAP: 2809 case ISD::BITREVERSE: { 2810 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2811 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 2812 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 2813 N->getOperand(0)); 2814 unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24; 2815 SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0, 2816 DAG.getTargetConstant(Imm, DL, 2817 Subtarget.getXLenVT())); 2818 // ReplaceNodeResults requires we maintain the same type for the return 2819 // value. 2820 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW)); 2821 break; 2822 } 2823 case ISD::FSHL: 2824 case ISD::FSHR: { 2825 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2826 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); 2827 SDValue NewOp0 = 2828 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 2829 SDValue NewOp1 = 2830 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 2831 SDValue NewOp2 = 2832 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 2833 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. 2834 // Mask the shift amount to 5 bits. 2835 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, 2836 DAG.getConstant(0x1f, DL, MVT::i64)); 2837 unsigned Opc = 2838 N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; 2839 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); 2840 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); 2841 break; 2842 } 2843 case ISD::EXTRACT_VECTOR_ELT: { 2844 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 2845 // type is illegal (currently only vXi64 RV32). 2846 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 2847 // transferred to the destination register. We issue two of these from the 2848 // upper- and lower- halves of the SEW-bit vector element, slid down to the 2849 // first element. 2850 SDLoc DL(N); 2851 SDValue Vec = N->getOperand(0); 2852 SDValue Idx = N->getOperand(1); 2853 EVT VecVT = Vec.getValueType(); 2854 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 2855 VecVT.getVectorElementType() == MVT::i64 && 2856 "Unexpected EXTRACT_VECTOR_ELT legalization"); 2857 2858 if (!VecVT.isScalableVector()) 2859 return; 2860 2861 SDValue Slidedown = Vec; 2862 MVT XLenVT = Subtarget.getXLenVT(); 2863 // Unless the index is known to be 0, we must slide the vector down to get 2864 // the desired element into index 0. 2865 if (!isNullConstant(Idx)) { 2866 SDValue Mask, VL; 2867 std::tie(Mask, VL) = 2868 getDefaultScalableVLOps(VecVT.getSimpleVT(), DL, DAG, Subtarget); 2869 Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VecVT, 2870 DAG.getUNDEF(VecVT), Vec, Idx, Mask, VL); 2871 } 2872 2873 // Extract the lower XLEN bits of the correct vector element. 2874 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Slidedown, Idx); 2875 2876 // To extract the upper XLEN bits of the vector element, shift the first 2877 // element right by 32 bits and re-extract the lower XLEN bits. 2878 SDValue ThirtyTwoV = 2879 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 2880 DAG.getConstant(32, DL, Subtarget.getXLenVT())); 2881 SDValue LShr32 = DAG.getNode(ISD::SRL, DL, VecVT, Slidedown, ThirtyTwoV); 2882 2883 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32, Idx); 2884 2885 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 2886 break; 2887 } 2888 case ISD::INTRINSIC_WO_CHAIN: { 2889 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 2890 switch (IntNo) { 2891 default: 2892 llvm_unreachable( 2893 "Don't know how to custom type legalize this intrinsic!"); 2894 case Intrinsic::riscv_vmv_x_s: { 2895 EVT VT = N->getValueType(0); 2896 assert((VT == MVT::i8 || VT == MVT::i16 || 2897 (Subtarget.is64Bit() && VT == MVT::i32)) && 2898 "Unexpected custom legalisation!"); 2899 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 2900 Subtarget.getXLenVT(), N->getOperand(1)); 2901 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 2902 break; 2903 } 2904 } 2905 break; 2906 } 2907 case ISD::VECREDUCE_ADD: 2908 case ISD::VECREDUCE_AND: 2909 case ISD::VECREDUCE_OR: 2910 case ISD::VECREDUCE_XOR: 2911 case ISD::VECREDUCE_SMAX: 2912 case ISD::VECREDUCE_UMAX: 2913 case ISD::VECREDUCE_SMIN: 2914 case ISD::VECREDUCE_UMIN: 2915 // The custom-lowering for these nodes returns a vector whose first element 2916 // is the result of the reduction. Extract its first element and let the 2917 // legalization for EXTRACT_VECTOR_ELT do the rest of the job. 2918 Results.push_back(lowerVECREDUCE(SDValue(N, 0), DAG)); 2919 break; 2920 } 2921 } 2922 2923 // A structure to hold one of the bit-manipulation patterns below. Together, a 2924 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source: 2925 // (or (and (shl x, 1), 0xAAAAAAAA), 2926 // (and (srl x, 1), 0x55555555)) 2927 struct RISCVBitmanipPat { 2928 SDValue Op; 2929 unsigned ShAmt; 2930 bool IsSHL; 2931 2932 bool formsPairWith(const RISCVBitmanipPat &Other) const { 2933 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; 2934 } 2935 }; 2936 2937 // Matches patterns of the form 2938 // (and (shl x, C2), (C1 << C2)) 2939 // (and (srl x, C2), C1) 2940 // (shl (and x, C1), C2) 2941 // (srl (and x, (C1 << C2)), C2) 2942 // Where C2 is a power of 2 and C1 has at least that many leading zeroes. 2943 // The expected masks for each shift amount are specified in BitmanipMasks where 2944 // BitmanipMasks[log2(C2)] specifies the expected C1 value. 2945 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether 2946 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible 2947 // XLen is 64. 2948 static Optional<RISCVBitmanipPat> 2949 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) { 2950 assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) && 2951 "Unexpected number of masks"); 2952 Optional<uint64_t> Mask; 2953 // Optionally consume a mask around the shift operation. 2954 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { 2955 Mask = Op.getConstantOperandVal(1); 2956 Op = Op.getOperand(0); 2957 } 2958 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) 2959 return None; 2960 bool IsSHL = Op.getOpcode() == ISD::SHL; 2961 2962 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2963 return None; 2964 uint64_t ShAmt = Op.getConstantOperandVal(1); 2965 2966 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 2967 if (ShAmt >= Width && !isPowerOf2_64(ShAmt)) 2968 return None; 2969 // If we don't have enough masks for 64 bit, then we must be trying to 2970 // match SHFL so we're only allowed to shift 1/4 of the width. 2971 if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2)) 2972 return None; 2973 2974 SDValue Src = Op.getOperand(0); 2975 2976 // The expected mask is shifted left when the AND is found around SHL 2977 // patterns. 2978 // ((x >> 1) & 0x55555555) 2979 // ((x << 1) & 0xAAAAAAAA) 2980 bool SHLExpMask = IsSHL; 2981 2982 if (!Mask) { 2983 // Sometimes LLVM keeps the mask as an operand of the shift, typically when 2984 // the mask is all ones: consume that now. 2985 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { 2986 Mask = Src.getConstantOperandVal(1); 2987 Src = Src.getOperand(0); 2988 // The expected mask is now in fact shifted left for SRL, so reverse the 2989 // decision. 2990 // ((x & 0xAAAAAAAA) >> 1) 2991 // ((x & 0x55555555) << 1) 2992 SHLExpMask = !SHLExpMask; 2993 } else { 2994 // Use a default shifted mask of all-ones if there's no AND, truncated 2995 // down to the expected width. This simplifies the logic later on. 2996 Mask = maskTrailingOnes<uint64_t>(Width); 2997 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); 2998 } 2999 } 3000 3001 unsigned MaskIdx = Log2_32(ShAmt); 3002 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 3003 3004 if (SHLExpMask) 3005 ExpMask <<= ShAmt; 3006 3007 if (Mask != ExpMask) 3008 return None; 3009 3010 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; 3011 } 3012 3013 // Matches any of the following bit-manipulation patterns: 3014 // (and (shl x, 1), (0x55555555 << 1)) 3015 // (and (srl x, 1), 0x55555555) 3016 // (shl (and x, 0x55555555), 1) 3017 // (srl (and x, (0x55555555 << 1)), 1) 3018 // where the shift amount and mask may vary thus: 3019 // [1] = 0x55555555 / 0xAAAAAAAA 3020 // [2] = 0x33333333 / 0xCCCCCCCC 3021 // [4] = 0x0F0F0F0F / 0xF0F0F0F0 3022 // [8] = 0x00FF00FF / 0xFF00FF00 3023 // [16] = 0x0000FFFF / 0xFFFFFFFF 3024 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) 3025 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) { 3026 // These are the unshifted masks which we use to match bit-manipulation 3027 // patterns. They may be shifted left in certain circumstances. 3028 static const uint64_t BitmanipMasks[] = { 3029 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 3030 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; 3031 3032 return matchRISCVBitmanipPat(Op, BitmanipMasks); 3033 } 3034 3035 // Match the following pattern as a GREVI(W) operation 3036 // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) 3037 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, 3038 const RISCVSubtarget &Subtarget) { 3039 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 3040 EVT VT = Op.getValueType(); 3041 3042 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 3043 auto LHS = matchGREVIPat(Op.getOperand(0)); 3044 auto RHS = matchGREVIPat(Op.getOperand(1)); 3045 if (LHS && RHS && LHS->formsPairWith(*RHS)) { 3046 SDLoc DL(Op); 3047 return DAG.getNode( 3048 RISCVISD::GREVI, DL, VT, LHS->Op, 3049 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 3050 } 3051 } 3052 return SDValue(); 3053 } 3054 3055 // Matches any the following pattern as a GORCI(W) operation 3056 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2 3057 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2 3058 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) 3059 // Note that with the variant of 3., 3060 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) 3061 // the inner pattern will first be matched as GREVI and then the outer 3062 // pattern will be matched to GORC via the first rule above. 3063 // 4. (or (rotl/rotr x, bitwidth/2), x) 3064 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, 3065 const RISCVSubtarget &Subtarget) { 3066 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 3067 EVT VT = Op.getValueType(); 3068 3069 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 3070 SDLoc DL(Op); 3071 SDValue Op0 = Op.getOperand(0); 3072 SDValue Op1 = Op.getOperand(1); 3073 3074 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { 3075 if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X && 3076 isPowerOf2_32(Reverse.getConstantOperandVal(1))) 3077 return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1)); 3078 // We can also form GORCI from ROTL/ROTR by half the bitwidth. 3079 if ((Reverse.getOpcode() == ISD::ROTL || 3080 Reverse.getOpcode() == ISD::ROTR) && 3081 Reverse.getOperand(0) == X && 3082 isa<ConstantSDNode>(Reverse.getOperand(1))) { 3083 uint64_t RotAmt = Reverse.getConstantOperandVal(1); 3084 if (RotAmt == (VT.getSizeInBits() / 2)) 3085 return DAG.getNode( 3086 RISCVISD::GORCI, DL, VT, X, 3087 DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT())); 3088 } 3089 return SDValue(); 3090 }; 3091 3092 // Check for either commutable permutation of (or (GREVI x, shamt), x) 3093 if (SDValue V = MatchOROfReverse(Op0, Op1)) 3094 return V; 3095 if (SDValue V = MatchOROfReverse(Op1, Op0)) 3096 return V; 3097 3098 // OR is commutable so canonicalize its OR operand to the left 3099 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) 3100 std::swap(Op0, Op1); 3101 if (Op0.getOpcode() != ISD::OR) 3102 return SDValue(); 3103 SDValue OrOp0 = Op0.getOperand(0); 3104 SDValue OrOp1 = Op0.getOperand(1); 3105 auto LHS = matchGREVIPat(OrOp0); 3106 // OR is commutable so swap the operands and try again: x might have been 3107 // on the left 3108 if (!LHS) { 3109 std::swap(OrOp0, OrOp1); 3110 LHS = matchGREVIPat(OrOp0); 3111 } 3112 auto RHS = matchGREVIPat(Op1); 3113 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { 3114 return DAG.getNode( 3115 RISCVISD::GORCI, DL, VT, LHS->Op, 3116 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 3117 } 3118 } 3119 return SDValue(); 3120 } 3121 3122 // Matches any of the following bit-manipulation patterns: 3123 // (and (shl x, 1), (0x22222222 << 1)) 3124 // (and (srl x, 1), 0x22222222) 3125 // (shl (and x, 0x22222222), 1) 3126 // (srl (and x, (0x22222222 << 1)), 1) 3127 // where the shift amount and mask may vary thus: 3128 // [1] = 0x22222222 / 0x44444444 3129 // [2] = 0x0C0C0C0C / 0x3C3C3C3C 3130 // [4] = 0x00F000F0 / 0x0F000F00 3131 // [8] = 0x0000FF00 / 0x00FF0000 3132 // [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64) 3133 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) { 3134 // These are the unshifted masks which we use to match bit-manipulation 3135 // patterns. They may be shifted left in certain circumstances. 3136 static const uint64_t BitmanipMasks[] = { 3137 0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL, 3138 0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL}; 3139 3140 return matchRISCVBitmanipPat(Op, BitmanipMasks); 3141 } 3142 3143 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x) 3144 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG, 3145 const RISCVSubtarget &Subtarget) { 3146 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 3147 EVT VT = Op.getValueType(); 3148 3149 if (VT != MVT::i32 && VT != Subtarget.getXLenVT()) 3150 return SDValue(); 3151 3152 SDValue Op0 = Op.getOperand(0); 3153 SDValue Op1 = Op.getOperand(1); 3154 3155 // Or is commutable so canonicalize the second OR to the LHS. 3156 if (Op0.getOpcode() != ISD::OR) 3157 std::swap(Op0, Op1); 3158 if (Op0.getOpcode() != ISD::OR) 3159 return SDValue(); 3160 3161 // We found an inner OR, so our operands are the operands of the inner OR 3162 // and the other operand of the outer OR. 3163 SDValue A = Op0.getOperand(0); 3164 SDValue B = Op0.getOperand(1); 3165 SDValue C = Op1; 3166 3167 auto Match1 = matchSHFLPat(A); 3168 auto Match2 = matchSHFLPat(B); 3169 3170 // If neither matched, we failed. 3171 if (!Match1 && !Match2) 3172 return SDValue(); 3173 3174 // We had at least one match. if one failed, try the remaining C operand. 3175 if (!Match1) { 3176 std::swap(A, C); 3177 Match1 = matchSHFLPat(A); 3178 if (!Match1) 3179 return SDValue(); 3180 } else if (!Match2) { 3181 std::swap(B, C); 3182 Match2 = matchSHFLPat(B); 3183 if (!Match2) 3184 return SDValue(); 3185 } 3186 assert(Match1 && Match2); 3187 3188 // Make sure our matches pair up. 3189 if (!Match1->formsPairWith(*Match2)) 3190 return SDValue(); 3191 3192 // All the remains is to make sure C is an AND with the same input, that masks 3193 // out the bits that are being shuffled. 3194 if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) || 3195 C.getOperand(0) != Match1->Op) 3196 return SDValue(); 3197 3198 uint64_t Mask = C.getConstantOperandVal(1); 3199 3200 static const uint64_t BitmanipMasks[] = { 3201 0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL, 3202 0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL, 3203 }; 3204 3205 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 3206 unsigned MaskIdx = Log2_32(Match1->ShAmt); 3207 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 3208 3209 if (Mask != ExpMask) 3210 return SDValue(); 3211 3212 SDLoc DL(Op); 3213 return DAG.getNode( 3214 RISCVISD::SHFLI, DL, VT, Match1->Op, 3215 DAG.getTargetConstant(Match1->ShAmt, DL, Subtarget.getXLenVT())); 3216 } 3217 3218 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is 3219 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. 3220 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does 3221 // not undo itself, but they are redundant. 3222 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { 3223 unsigned ShAmt1 = N->getConstantOperandVal(1); 3224 SDValue Src = N->getOperand(0); 3225 3226 if (Src.getOpcode() != N->getOpcode()) 3227 return SDValue(); 3228 3229 unsigned ShAmt2 = Src.getConstantOperandVal(1); 3230 Src = Src.getOperand(0); 3231 3232 unsigned CombinedShAmt; 3233 if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW) 3234 CombinedShAmt = ShAmt1 | ShAmt2; 3235 else 3236 CombinedShAmt = ShAmt1 ^ ShAmt2; 3237 3238 if (CombinedShAmt == 0) 3239 return Src; 3240 3241 SDLoc DL(N); 3242 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src, 3243 DAG.getTargetConstant(CombinedShAmt, DL, 3244 N->getOperand(1).getValueType())); 3245 } 3246 3247 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 3248 DAGCombinerInfo &DCI) const { 3249 SelectionDAG &DAG = DCI.DAG; 3250 3251 switch (N->getOpcode()) { 3252 default: 3253 break; 3254 case RISCVISD::SplitF64: { 3255 SDValue Op0 = N->getOperand(0); 3256 // If the input to SplitF64 is just BuildPairF64 then the operation is 3257 // redundant. Instead, use BuildPairF64's operands directly. 3258 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 3259 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 3260 3261 SDLoc DL(N); 3262 3263 // It's cheaper to materialise two 32-bit integers than to load a double 3264 // from the constant pool and transfer it to integer registers through the 3265 // stack. 3266 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 3267 APInt V = C->getValueAPF().bitcastToAPInt(); 3268 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 3269 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 3270 return DCI.CombineTo(N, Lo, Hi); 3271 } 3272 3273 // This is a target-specific version of a DAGCombine performed in 3274 // DAGCombiner::visitBITCAST. It performs the equivalent of: 3275 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 3276 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 3277 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 3278 !Op0.getNode()->hasOneUse()) 3279 break; 3280 SDValue NewSplitF64 = 3281 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 3282 Op0.getOperand(0)); 3283 SDValue Lo = NewSplitF64.getValue(0); 3284 SDValue Hi = NewSplitF64.getValue(1); 3285 APInt SignBit = APInt::getSignMask(32); 3286 if (Op0.getOpcode() == ISD::FNEG) { 3287 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 3288 DAG.getConstant(SignBit, DL, MVT::i32)); 3289 return DCI.CombineTo(N, Lo, NewHi); 3290 } 3291 assert(Op0.getOpcode() == ISD::FABS); 3292 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 3293 DAG.getConstant(~SignBit, DL, MVT::i32)); 3294 return DCI.CombineTo(N, Lo, NewHi); 3295 } 3296 case RISCVISD::SLLW: 3297 case RISCVISD::SRAW: 3298 case RISCVISD::SRLW: 3299 case RISCVISD::ROLW: 3300 case RISCVISD::RORW: { 3301 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 3302 SDValue LHS = N->getOperand(0); 3303 SDValue RHS = N->getOperand(1); 3304 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 3305 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 3306 if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) || 3307 SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) { 3308 if (N->getOpcode() != ISD::DELETED_NODE) 3309 DCI.AddToWorklist(N); 3310 return SDValue(N, 0); 3311 } 3312 break; 3313 } 3314 case RISCVISD::FSL: 3315 case RISCVISD::FSR: { 3316 // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read. 3317 SDValue ShAmt = N->getOperand(2); 3318 unsigned BitWidth = ShAmt.getValueSizeInBits(); 3319 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width"); 3320 APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1); 3321 if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 3322 if (N->getOpcode() != ISD::DELETED_NODE) 3323 DCI.AddToWorklist(N); 3324 return SDValue(N, 0); 3325 } 3326 break; 3327 } 3328 case RISCVISD::FSLW: 3329 case RISCVISD::FSRW: { 3330 // Only the lower 32 bits of Values and lower 6 bits of shift amount are 3331 // read. 3332 SDValue Op0 = N->getOperand(0); 3333 SDValue Op1 = N->getOperand(1); 3334 SDValue ShAmt = N->getOperand(2); 3335 APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 3336 APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6); 3337 if (SimplifyDemandedBits(Op0, OpMask, DCI) || 3338 SimplifyDemandedBits(Op1, OpMask, DCI) || 3339 SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 3340 if (N->getOpcode() != ISD::DELETED_NODE) 3341 DCI.AddToWorklist(N); 3342 return SDValue(N, 0); 3343 } 3344 break; 3345 } 3346 case RISCVISD::GREVIW: 3347 case RISCVISD::GORCIW: { 3348 // Only the lower 32 bits of the first operand are read 3349 SDValue Op0 = N->getOperand(0); 3350 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 3351 if (SimplifyDemandedBits(Op0, Mask, DCI)) { 3352 if (N->getOpcode() != ISD::DELETED_NODE) 3353 DCI.AddToWorklist(N); 3354 return SDValue(N, 0); 3355 } 3356 3357 return combineGREVI_GORCI(N, DCI.DAG); 3358 } 3359 case RISCVISD::FMV_X_ANYEXTW_RV64: { 3360 SDLoc DL(N); 3361 SDValue Op0 = N->getOperand(0); 3362 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 3363 // conversion is unnecessary and can be replaced with an ANY_EXTEND 3364 // of the FMV_W_X_RV64 operand. 3365 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 3366 assert(Op0.getOperand(0).getValueType() == MVT::i64 && 3367 "Unexpected value type!"); 3368 return Op0.getOperand(0); 3369 } 3370 3371 // This is a target-specific version of a DAGCombine performed in 3372 // DAGCombiner::visitBITCAST. It performs the equivalent of: 3373 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 3374 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 3375 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 3376 !Op0.getNode()->hasOneUse()) 3377 break; 3378 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 3379 Op0.getOperand(0)); 3380 APInt SignBit = APInt::getSignMask(32).sext(64); 3381 if (Op0.getOpcode() == ISD::FNEG) 3382 return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 3383 DAG.getConstant(SignBit, DL, MVT::i64)); 3384 3385 assert(Op0.getOpcode() == ISD::FABS); 3386 return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 3387 DAG.getConstant(~SignBit, DL, MVT::i64)); 3388 } 3389 case RISCVISD::GREVI: 3390 case RISCVISD::GORCI: 3391 return combineGREVI_GORCI(N, DCI.DAG); 3392 case ISD::OR: 3393 if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) 3394 return GREV; 3395 if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) 3396 return GORC; 3397 if (auto SHFL = combineORToSHFL(SDValue(N, 0), DCI.DAG, Subtarget)) 3398 return SHFL; 3399 break; 3400 case RISCVISD::SELECT_CC: { 3401 // Transform 3402 // (select_cc (xor X, 1), 0, setne, trueV, falseV) -> 3403 // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. 3404 // This can occur when legalizing some floating point comparisons. 3405 SDValue LHS = N->getOperand(0); 3406 SDValue RHS = N->getOperand(1); 3407 auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2)); 3408 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 3409 if (ISD::isIntEqualitySetCC(CCVal) && isNullConstant(RHS) && 3410 LHS.getOpcode() == ISD::XOR && isOneConstant(LHS.getOperand(1)) && 3411 DAG.MaskedValueIsZero(LHS.getOperand(0), Mask)) { 3412 SDLoc DL(N); 3413 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 3414 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 3415 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), 3416 {LHS.getOperand(0), RHS, TargetCC, N->getOperand(3), 3417 N->getOperand(4)}); 3418 } 3419 break; 3420 } 3421 case ISD::SETCC: { 3422 // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1. 3423 // Comparing with 0 may allow us to fold into bnez/beqz. 3424 SDValue LHS = N->getOperand(0); 3425 SDValue RHS = N->getOperand(1); 3426 if (LHS.getValueType().isScalableVector()) 3427 break; 3428 auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 3429 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 3430 if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) && 3431 DAG.MaskedValueIsZero(LHS, Mask)) { 3432 SDLoc DL(N); 3433 SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType()); 3434 CC = ISD::getSetCCInverse(CC, LHS.getValueType()); 3435 return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC); 3436 } 3437 break; 3438 } 3439 case ISD::FCOPYSIGN: { 3440 EVT VT = N->getValueType(0); 3441 if (!VT.isVector()) 3442 break; 3443 // There is a form of VFSGNJ which injects the negated sign of its second 3444 // operand. Try and bubble any FNEG up after the extend/round to produce 3445 // this optimized pattern. Avoid modifying cases where FP_ROUND and 3446 // TRUNC=1. 3447 SDValue In2 = N->getOperand(1); 3448 // Avoid cases where the extend/round has multiple uses, as duplicating 3449 // those is typically more expensive than removing a fneg. 3450 if (!In2.hasOneUse()) 3451 break; 3452 if (In2.getOpcode() != ISD::FP_EXTEND && 3453 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0)) 3454 break; 3455 In2 = In2.getOperand(0); 3456 if (In2.getOpcode() != ISD::FNEG) 3457 break; 3458 SDLoc DL(N); 3459 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT); 3460 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), 3461 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); 3462 } 3463 } 3464 3465 return SDValue(); 3466 } 3467 3468 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 3469 const SDNode *N, CombineLevel Level) const { 3470 // The following folds are only desirable if `(OP _, c1 << c2)` can be 3471 // materialised in fewer instructions than `(OP _, c1)`: 3472 // 3473 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 3474 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 3475 SDValue N0 = N->getOperand(0); 3476 EVT Ty = N0.getValueType(); 3477 if (Ty.isScalarInteger() && 3478 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 3479 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 3480 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3481 if (C1 && C2) { 3482 const APInt &C1Int = C1->getAPIntValue(); 3483 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 3484 3485 // We can materialise `c1 << c2` into an add immediate, so it's "free", 3486 // and the combine should happen, to potentially allow further combines 3487 // later. 3488 if (ShiftedC1Int.getMinSignedBits() <= 64 && 3489 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 3490 return true; 3491 3492 // We can materialise `c1` in an add immediate, so it's "free", and the 3493 // combine should be prevented. 3494 if (C1Int.getMinSignedBits() <= 64 && 3495 isLegalAddImmediate(C1Int.getSExtValue())) 3496 return false; 3497 3498 // Neither constant will fit into an immediate, so find materialisation 3499 // costs. 3500 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 3501 Subtarget.is64Bit()); 3502 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 3503 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); 3504 3505 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 3506 // combine should be prevented. 3507 if (C1Cost < ShiftedC1Cost) 3508 return false; 3509 } 3510 } 3511 return true; 3512 } 3513 3514 bool RISCVTargetLowering::targetShrinkDemandedConstant( 3515 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 3516 TargetLoweringOpt &TLO) const { 3517 // Delay this optimization as late as possible. 3518 if (!TLO.LegalOps) 3519 return false; 3520 3521 EVT VT = Op.getValueType(); 3522 if (VT.isVector()) 3523 return false; 3524 3525 // Only handle AND for now. 3526 if (Op.getOpcode() != ISD::AND) 3527 return false; 3528 3529 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 3530 if (!C) 3531 return false; 3532 3533 const APInt &Mask = C->getAPIntValue(); 3534 3535 // Clear all non-demanded bits initially. 3536 APInt ShrunkMask = Mask & DemandedBits; 3537 3538 // If the shrunk mask fits in sign extended 12 bits, let the target 3539 // independent code apply it. 3540 if (ShrunkMask.isSignedIntN(12)) 3541 return false; 3542 3543 // Try to make a smaller immediate by setting undemanded bits. 3544 3545 // We need to be able to make a negative number through a combination of mask 3546 // and undemanded bits. 3547 APInt ExpandedMask = Mask | ~DemandedBits; 3548 if (!ExpandedMask.isNegative()) 3549 return false; 3550 3551 // What is the fewest number of bits we need to represent the negative number. 3552 unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); 3553 3554 // Try to make a 12 bit negative immediate. If that fails try to make a 32 3555 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 3556 APInt NewMask = ShrunkMask; 3557 if (MinSignedBits <= 12) 3558 NewMask.setBitsFrom(11); 3559 else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 3560 NewMask.setBitsFrom(31); 3561 else 3562 return false; 3563 3564 // Sanity check that our new mask is a subset of the demanded mask. 3565 assert(NewMask.isSubsetOf(ExpandedMask)); 3566 3567 // If we aren't changing the mask, just return true to keep it and prevent 3568 // the caller from optimizing. 3569 if (NewMask == Mask) 3570 return true; 3571 3572 // Replace the constant with the new mask. 3573 SDLoc DL(Op); 3574 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); 3575 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); 3576 return TLO.CombineTo(Op, NewOp); 3577 } 3578 3579 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 3580 KnownBits &Known, 3581 const APInt &DemandedElts, 3582 const SelectionDAG &DAG, 3583 unsigned Depth) const { 3584 unsigned BitWidth = Known.getBitWidth(); 3585 unsigned Opc = Op.getOpcode(); 3586 assert((Opc >= ISD::BUILTIN_OP_END || 3587 Opc == ISD::INTRINSIC_WO_CHAIN || 3588 Opc == ISD::INTRINSIC_W_CHAIN || 3589 Opc == ISD::INTRINSIC_VOID) && 3590 "Should use MaskedValueIsZero if you don't know whether Op" 3591 " is a target node!"); 3592 3593 Known.resetAll(); 3594 switch (Opc) { 3595 default: break; 3596 case RISCVISD::REMUW: { 3597 KnownBits Known2; 3598 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3599 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3600 // We only care about the lower 32 bits. 3601 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 3602 // Restore the original width by sign extending. 3603 Known = Known.sext(BitWidth); 3604 break; 3605 } 3606 case RISCVISD::DIVUW: { 3607 KnownBits Known2; 3608 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3609 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3610 // We only care about the lower 32 bits. 3611 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 3612 // Restore the original width by sign extending. 3613 Known = Known.sext(BitWidth); 3614 break; 3615 } 3616 case RISCVISD::READ_VLENB: 3617 // We assume VLENB is at least 8 bytes. 3618 // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits. 3619 Known.Zero.setLowBits(3); 3620 break; 3621 } 3622 } 3623 3624 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 3625 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 3626 unsigned Depth) const { 3627 switch (Op.getOpcode()) { 3628 default: 3629 break; 3630 case RISCVISD::SLLW: 3631 case RISCVISD::SRAW: 3632 case RISCVISD::SRLW: 3633 case RISCVISD::DIVW: 3634 case RISCVISD::DIVUW: 3635 case RISCVISD::REMUW: 3636 case RISCVISD::ROLW: 3637 case RISCVISD::RORW: 3638 case RISCVISD::GREVIW: 3639 case RISCVISD::GORCIW: 3640 case RISCVISD::FSLW: 3641 case RISCVISD::FSRW: 3642 // TODO: As the result is sign-extended, this is conservatively correct. A 3643 // more precise answer could be calculated for SRAW depending on known 3644 // bits in the shift amount. 3645 return 33; 3646 case RISCVISD::SHFLI: { 3647 // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word 3648 // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but 3649 // will stay within the upper 32 bits. If there were more than 32 sign bits 3650 // before there will be at least 33 sign bits after. 3651 if (Op.getValueType() == MVT::i64 && 3652 (Op.getConstantOperandVal(1) & 0x10) == 0) { 3653 unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); 3654 if (Tmp > 32) 3655 return 33; 3656 } 3657 break; 3658 } 3659 case RISCVISD::VMV_X_S: 3660 // The number of sign bits of the scalar result is computed by obtaining the 3661 // element type of the input vector operand, subtracting its width from the 3662 // XLEN, and then adding one (sign bit within the element type). If the 3663 // element type is wider than XLen, the least-significant XLEN bits are 3664 // taken. 3665 if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen()) 3666 return 1; 3667 return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; 3668 } 3669 3670 return 1; 3671 } 3672 3673 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 3674 MachineBasicBlock *BB) { 3675 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 3676 3677 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 3678 // Should the count have wrapped while it was being read, we need to try 3679 // again. 3680 // ... 3681 // read: 3682 // rdcycleh x3 # load high word of cycle 3683 // rdcycle x2 # load low word of cycle 3684 // rdcycleh x4 # load high word of cycle 3685 // bne x3, x4, read # check if high word reads match, otherwise try again 3686 // ... 3687 3688 MachineFunction &MF = *BB->getParent(); 3689 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3690 MachineFunction::iterator It = ++BB->getIterator(); 3691 3692 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 3693 MF.insert(It, LoopMBB); 3694 3695 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 3696 MF.insert(It, DoneMBB); 3697 3698 // Transfer the remainder of BB and its successor edges to DoneMBB. 3699 DoneMBB->splice(DoneMBB->begin(), BB, 3700 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 3701 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 3702 3703 BB->addSuccessor(LoopMBB); 3704 3705 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3706 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 3707 Register LoReg = MI.getOperand(0).getReg(); 3708 Register HiReg = MI.getOperand(1).getReg(); 3709 DebugLoc DL = MI.getDebugLoc(); 3710 3711 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 3712 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 3713 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 3714 .addReg(RISCV::X0); 3715 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 3716 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 3717 .addReg(RISCV::X0); 3718 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 3719 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 3720 .addReg(RISCV::X0); 3721 3722 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 3723 .addReg(HiReg) 3724 .addReg(ReadAgainReg) 3725 .addMBB(LoopMBB); 3726 3727 LoopMBB->addSuccessor(LoopMBB); 3728 LoopMBB->addSuccessor(DoneMBB); 3729 3730 MI.eraseFromParent(); 3731 3732 return DoneMBB; 3733 } 3734 3735 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 3736 MachineBasicBlock *BB) { 3737 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 3738 3739 MachineFunction &MF = *BB->getParent(); 3740 DebugLoc DL = MI.getDebugLoc(); 3741 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 3742 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 3743 Register LoReg = MI.getOperand(0).getReg(); 3744 Register HiReg = MI.getOperand(1).getReg(); 3745 Register SrcReg = MI.getOperand(2).getReg(); 3746 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 3747 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 3748 3749 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 3750 RI); 3751 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 3752 MachineMemOperand *MMOLo = 3753 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 3754 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 3755 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 3756 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 3757 .addFrameIndex(FI) 3758 .addImm(0) 3759 .addMemOperand(MMOLo); 3760 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 3761 .addFrameIndex(FI) 3762 .addImm(4) 3763 .addMemOperand(MMOHi); 3764 MI.eraseFromParent(); // The pseudo instruction is gone now. 3765 return BB; 3766 } 3767 3768 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 3769 MachineBasicBlock *BB) { 3770 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 3771 "Unexpected instruction"); 3772 3773 MachineFunction &MF = *BB->getParent(); 3774 DebugLoc DL = MI.getDebugLoc(); 3775 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 3776 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 3777 Register DstReg = MI.getOperand(0).getReg(); 3778 Register LoReg = MI.getOperand(1).getReg(); 3779 Register HiReg = MI.getOperand(2).getReg(); 3780 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 3781 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 3782 3783 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 3784 MachineMemOperand *MMOLo = 3785 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 3786 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 3787 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 3788 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 3789 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 3790 .addFrameIndex(FI) 3791 .addImm(0) 3792 .addMemOperand(MMOLo); 3793 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 3794 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 3795 .addFrameIndex(FI) 3796 .addImm(4) 3797 .addMemOperand(MMOHi); 3798 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 3799 MI.eraseFromParent(); // The pseudo instruction is gone now. 3800 return BB; 3801 } 3802 3803 static bool isSelectPseudo(MachineInstr &MI) { 3804 switch (MI.getOpcode()) { 3805 default: 3806 return false; 3807 case RISCV::Select_GPR_Using_CC_GPR: 3808 case RISCV::Select_FPR16_Using_CC_GPR: 3809 case RISCV::Select_FPR32_Using_CC_GPR: 3810 case RISCV::Select_FPR64_Using_CC_GPR: 3811 return true; 3812 } 3813 } 3814 3815 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 3816 MachineBasicBlock *BB) { 3817 // To "insert" Select_* instructions, we actually have to insert the triangle 3818 // control-flow pattern. The incoming instructions know the destination vreg 3819 // to set, the condition code register to branch on, the true/false values to 3820 // select between, and the condcode to use to select the appropriate branch. 3821 // 3822 // We produce the following control flow: 3823 // HeadMBB 3824 // | \ 3825 // | IfFalseMBB 3826 // | / 3827 // TailMBB 3828 // 3829 // When we find a sequence of selects we attempt to optimize their emission 3830 // by sharing the control flow. Currently we only handle cases where we have 3831 // multiple selects with the exact same condition (same LHS, RHS and CC). 3832 // The selects may be interleaved with other instructions if the other 3833 // instructions meet some requirements we deem safe: 3834 // - They are debug instructions. Otherwise, 3835 // - They do not have side-effects, do not access memory and their inputs do 3836 // not depend on the results of the select pseudo-instructions. 3837 // The TrueV/FalseV operands of the selects cannot depend on the result of 3838 // previous selects in the sequence. 3839 // These conditions could be further relaxed. See the X86 target for a 3840 // related approach and more information. 3841 Register LHS = MI.getOperand(1).getReg(); 3842 Register RHS = MI.getOperand(2).getReg(); 3843 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 3844 3845 SmallVector<MachineInstr *, 4> SelectDebugValues; 3846 SmallSet<Register, 4> SelectDests; 3847 SelectDests.insert(MI.getOperand(0).getReg()); 3848 3849 MachineInstr *LastSelectPseudo = &MI; 3850 3851 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 3852 SequenceMBBI != E; ++SequenceMBBI) { 3853 if (SequenceMBBI->isDebugInstr()) 3854 continue; 3855 else if (isSelectPseudo(*SequenceMBBI)) { 3856 if (SequenceMBBI->getOperand(1).getReg() != LHS || 3857 SequenceMBBI->getOperand(2).getReg() != RHS || 3858 SequenceMBBI->getOperand(3).getImm() != CC || 3859 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 3860 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 3861 break; 3862 LastSelectPseudo = &*SequenceMBBI; 3863 SequenceMBBI->collectDebugValues(SelectDebugValues); 3864 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 3865 } else { 3866 if (SequenceMBBI->hasUnmodeledSideEffects() || 3867 SequenceMBBI->mayLoadOrStore()) 3868 break; 3869 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 3870 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 3871 })) 3872 break; 3873 } 3874 } 3875 3876 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 3877 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3878 DebugLoc DL = MI.getDebugLoc(); 3879 MachineFunction::iterator I = ++BB->getIterator(); 3880 3881 MachineBasicBlock *HeadMBB = BB; 3882 MachineFunction *F = BB->getParent(); 3883 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 3884 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 3885 3886 F->insert(I, IfFalseMBB); 3887 F->insert(I, TailMBB); 3888 3889 // Transfer debug instructions associated with the selects to TailMBB. 3890 for (MachineInstr *DebugInstr : SelectDebugValues) { 3891 TailMBB->push_back(DebugInstr->removeFromParent()); 3892 } 3893 3894 // Move all instructions after the sequence to TailMBB. 3895 TailMBB->splice(TailMBB->end(), HeadMBB, 3896 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 3897 // Update machine-CFG edges by transferring all successors of the current 3898 // block to the new block which will contain the Phi nodes for the selects. 3899 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 3900 // Set the successors for HeadMBB. 3901 HeadMBB->addSuccessor(IfFalseMBB); 3902 HeadMBB->addSuccessor(TailMBB); 3903 3904 // Insert appropriate branch. 3905 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 3906 3907 BuildMI(HeadMBB, DL, TII.get(Opcode)) 3908 .addReg(LHS) 3909 .addReg(RHS) 3910 .addMBB(TailMBB); 3911 3912 // IfFalseMBB just falls through to TailMBB. 3913 IfFalseMBB->addSuccessor(TailMBB); 3914 3915 // Create PHIs for all of the select pseudo-instructions. 3916 auto SelectMBBI = MI.getIterator(); 3917 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 3918 auto InsertionPoint = TailMBB->begin(); 3919 while (SelectMBBI != SelectEnd) { 3920 auto Next = std::next(SelectMBBI); 3921 if (isSelectPseudo(*SelectMBBI)) { 3922 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 3923 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 3924 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 3925 .addReg(SelectMBBI->getOperand(4).getReg()) 3926 .addMBB(HeadMBB) 3927 .addReg(SelectMBBI->getOperand(5).getReg()) 3928 .addMBB(IfFalseMBB); 3929 SelectMBBI->eraseFromParent(); 3930 } 3931 SelectMBBI = Next; 3932 } 3933 3934 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 3935 return TailMBB; 3936 } 3937 3938 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, 3939 int VLIndex, unsigned SEWIndex, 3940 RISCVVLMUL VLMul, bool ForceTailAgnostic) { 3941 MachineFunction &MF = *BB->getParent(); 3942 DebugLoc DL = MI.getDebugLoc(); 3943 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 3944 3945 unsigned SEW = MI.getOperand(SEWIndex).getImm(); 3946 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 3947 RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8)); 3948 3949 MachineRegisterInfo &MRI = MF.getRegInfo(); 3950 3951 // VL and VTYPE are alive here. 3952 MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)); 3953 3954 if (VLIndex >= 0) { 3955 // Set VL (rs1 != X0). 3956 Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3957 MIB.addReg(DestReg, RegState::Define | RegState::Dead) 3958 .addReg(MI.getOperand(VLIndex).getReg()); 3959 } else 3960 // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). 3961 MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead) 3962 .addReg(RISCV::X0, RegState::Kill); 3963 3964 // Default to tail agnostic unless the destination is tied to a source. In 3965 // that case the user would have some control over the tail values. The tail 3966 // policy is also ignored on instructions that only update element 0 like 3967 // vmv.s.x or reductions so use agnostic there to match the common case. 3968 // FIXME: This is conservatively correct, but we might want to detect that 3969 // the input is undefined. 3970 bool TailAgnostic = true; 3971 unsigned UseOpIdx; 3972 if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 3973 TailAgnostic = false; 3974 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 3975 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 3976 MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); 3977 if (UseMI && UseMI->isImplicitDef()) 3978 TailAgnostic = true; 3979 } 3980 3981 // For simplicity we reuse the vtype representation here. 3982 MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth, 3983 /*TailAgnostic*/ TailAgnostic, 3984 /*MaskAgnostic*/ false)); 3985 3986 // Remove (now) redundant operands from pseudo 3987 MI.getOperand(SEWIndex).setImm(-1); 3988 if (VLIndex >= 0) { 3989 MI.getOperand(VLIndex).setReg(RISCV::NoRegister); 3990 MI.getOperand(VLIndex).setIsKill(false); 3991 } 3992 3993 return BB; 3994 } 3995 3996 MachineBasicBlock * 3997 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 3998 MachineBasicBlock *BB) const { 3999 uint64_t TSFlags = MI.getDesc().TSFlags; 4000 4001 if (TSFlags & RISCVII::HasSEWOpMask) { 4002 unsigned NumOperands = MI.getNumExplicitOperands(); 4003 int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1; 4004 unsigned SEWIndex = NumOperands - 1; 4005 bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask; 4006 4007 RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >> 4008 RISCVII::VLMulShift); 4009 return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic); 4010 } 4011 4012 switch (MI.getOpcode()) { 4013 default: 4014 llvm_unreachable("Unexpected instr type to insert"); 4015 case RISCV::ReadCycleWide: 4016 assert(!Subtarget.is64Bit() && 4017 "ReadCycleWrite is only to be used on riscv32"); 4018 return emitReadCycleWidePseudo(MI, BB); 4019 case RISCV::Select_GPR_Using_CC_GPR: 4020 case RISCV::Select_FPR16_Using_CC_GPR: 4021 case RISCV::Select_FPR32_Using_CC_GPR: 4022 case RISCV::Select_FPR64_Using_CC_GPR: 4023 return emitSelectPseudo(MI, BB); 4024 case RISCV::BuildPairF64Pseudo: 4025 return emitBuildPairF64Pseudo(MI, BB); 4026 case RISCV::SplitF64Pseudo: 4027 return emitSplitF64Pseudo(MI, BB); 4028 } 4029 } 4030 4031 // Calling Convention Implementation. 4032 // The expectations for frontend ABI lowering vary from target to target. 4033 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 4034 // details, but this is a longer term goal. For now, we simply try to keep the 4035 // role of the frontend as simple and well-defined as possible. The rules can 4036 // be summarised as: 4037 // * Never split up large scalar arguments. We handle them here. 4038 // * If a hardfloat calling convention is being used, and the struct may be 4039 // passed in a pair of registers (fp+fp, int+fp), and both registers are 4040 // available, then pass as two separate arguments. If either the GPRs or FPRs 4041 // are exhausted, then pass according to the rule below. 4042 // * If a struct could never be passed in registers or directly in a stack 4043 // slot (as it is larger than 2*XLEN and the floating point rules don't 4044 // apply), then pass it using a pointer with the byval attribute. 4045 // * If a struct is less than 2*XLEN, then coerce to either a two-element 4046 // word-sized array or a 2*XLEN scalar (depending on alignment). 4047 // * The frontend can determine whether a struct is returned by reference or 4048 // not based on its size and fields. If it will be returned by reference, the 4049 // frontend must modify the prototype so a pointer with the sret annotation is 4050 // passed as the first argument. This is not necessary for large scalar 4051 // returns. 4052 // * Struct return values and varargs should be coerced to structs containing 4053 // register-size fields in the same situations they would be for fixed 4054 // arguments. 4055 4056 static const MCPhysReg ArgGPRs[] = { 4057 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 4058 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 4059 }; 4060 static const MCPhysReg ArgFPR16s[] = { 4061 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 4062 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 4063 }; 4064 static const MCPhysReg ArgFPR32s[] = { 4065 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 4066 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 4067 }; 4068 static const MCPhysReg ArgFPR64s[] = { 4069 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 4070 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 4071 }; 4072 // This is an interim calling convention and it may be changed in the future. 4073 static const MCPhysReg ArgVRs[] = { 4074 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 4075 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 4076 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 4077 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 4078 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 4079 RISCV::V20M2, RISCV::V22M2}; 4080 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 4081 RISCV::V20M4}; 4082 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 4083 4084 // Pass a 2*XLEN argument that has been split into two XLEN values through 4085 // registers or the stack as necessary. 4086 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 4087 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 4088 MVT ValVT2, MVT LocVT2, 4089 ISD::ArgFlagsTy ArgFlags2) { 4090 unsigned XLenInBytes = XLen / 8; 4091 if (Register Reg = State.AllocateReg(ArgGPRs)) { 4092 // At least one half can be passed via register. 4093 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 4094 VA1.getLocVT(), CCValAssign::Full)); 4095 } else { 4096 // Both halves must be passed on the stack, with proper alignment. 4097 Align StackAlign = 4098 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 4099 State.addLoc( 4100 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 4101 State.AllocateStack(XLenInBytes, StackAlign), 4102 VA1.getLocVT(), CCValAssign::Full)); 4103 State.addLoc(CCValAssign::getMem( 4104 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 4105 LocVT2, CCValAssign::Full)); 4106 return false; 4107 } 4108 4109 if (Register Reg = State.AllocateReg(ArgGPRs)) { 4110 // The second half can also be passed via register. 4111 State.addLoc( 4112 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 4113 } else { 4114 // The second half is passed via the stack, without additional alignment. 4115 State.addLoc(CCValAssign::getMem( 4116 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 4117 LocVT2, CCValAssign::Full)); 4118 } 4119 4120 return false; 4121 } 4122 4123 // Implements the RISC-V calling convention. Returns true upon failure. 4124 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 4125 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 4126 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 4127 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 4128 Optional<unsigned> FirstMaskArgument) { 4129 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 4130 assert(XLen == 32 || XLen == 64); 4131 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 4132 4133 // Any return value split in to more than two values can't be returned 4134 // directly. 4135 if (IsRet && ValNo > 1) 4136 return true; 4137 4138 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 4139 // variadic argument, or if no F16/F32 argument registers are available. 4140 bool UseGPRForF16_F32 = true; 4141 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 4142 // variadic argument, or if no F64 argument registers are available. 4143 bool UseGPRForF64 = true; 4144 4145 switch (ABI) { 4146 default: 4147 llvm_unreachable("Unexpected ABI"); 4148 case RISCVABI::ABI_ILP32: 4149 case RISCVABI::ABI_LP64: 4150 break; 4151 case RISCVABI::ABI_ILP32F: 4152 case RISCVABI::ABI_LP64F: 4153 UseGPRForF16_F32 = !IsFixed; 4154 break; 4155 case RISCVABI::ABI_ILP32D: 4156 case RISCVABI::ABI_LP64D: 4157 UseGPRForF16_F32 = !IsFixed; 4158 UseGPRForF64 = !IsFixed; 4159 break; 4160 } 4161 4162 // FPR16, FPR32, and FPR64 alias each other. 4163 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { 4164 UseGPRForF16_F32 = true; 4165 UseGPRForF64 = true; 4166 } 4167 4168 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 4169 // similar local variables rather than directly checking against the target 4170 // ABI. 4171 4172 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { 4173 LocVT = XLenVT; 4174 LocInfo = CCValAssign::BCvt; 4175 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 4176 LocVT = MVT::i64; 4177 LocInfo = CCValAssign::BCvt; 4178 } 4179 4180 // If this is a variadic argument, the RISC-V calling convention requires 4181 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 4182 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 4183 // be used regardless of whether the original argument was split during 4184 // legalisation or not. The argument will not be passed by registers if the 4185 // original type is larger than 2*XLEN, so the register alignment rule does 4186 // not apply. 4187 unsigned TwoXLenInBytes = (2 * XLen) / 8; 4188 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 4189 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 4190 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 4191 // Skip 'odd' register if necessary. 4192 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 4193 State.AllocateReg(ArgGPRs); 4194 } 4195 4196 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 4197 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 4198 State.getPendingArgFlags(); 4199 4200 assert(PendingLocs.size() == PendingArgFlags.size() && 4201 "PendingLocs and PendingArgFlags out of sync"); 4202 4203 // Handle passing f64 on RV32D with a soft float ABI or when floating point 4204 // registers are exhausted. 4205 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 4206 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 4207 "Can't lower f64 if it is split"); 4208 // Depending on available argument GPRS, f64 may be passed in a pair of 4209 // GPRs, split between a GPR and the stack, or passed completely on the 4210 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 4211 // cases. 4212 Register Reg = State.AllocateReg(ArgGPRs); 4213 LocVT = MVT::i32; 4214 if (!Reg) { 4215 unsigned StackOffset = State.AllocateStack(8, Align(8)); 4216 State.addLoc( 4217 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 4218 return false; 4219 } 4220 if (!State.AllocateReg(ArgGPRs)) 4221 State.AllocateStack(4, Align(4)); 4222 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4223 return false; 4224 } 4225 4226 // Split arguments might be passed indirectly, so keep track of the pending 4227 // values. 4228 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 4229 LocVT = XLenVT; 4230 LocInfo = CCValAssign::Indirect; 4231 PendingLocs.push_back( 4232 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 4233 PendingArgFlags.push_back(ArgFlags); 4234 if (!ArgFlags.isSplitEnd()) { 4235 return false; 4236 } 4237 } 4238 4239 // If the split argument only had two elements, it should be passed directly 4240 // in registers or on the stack. 4241 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 4242 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 4243 // Apply the normal calling convention rules to the first half of the 4244 // split argument. 4245 CCValAssign VA = PendingLocs[0]; 4246 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 4247 PendingLocs.clear(); 4248 PendingArgFlags.clear(); 4249 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 4250 ArgFlags); 4251 } 4252 4253 // Allocate to a register if possible, or else a stack slot. 4254 Register Reg; 4255 if (ValVT == MVT::f16 && !UseGPRForF16_F32) 4256 Reg = State.AllocateReg(ArgFPR16s); 4257 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 4258 Reg = State.AllocateReg(ArgFPR32s); 4259 else if (ValVT == MVT::f64 && !UseGPRForF64) 4260 Reg = State.AllocateReg(ArgFPR64s); 4261 else if (ValVT.isScalableVector()) { 4262 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 4263 if (RC == &RISCV::VRRegClass) { 4264 // Assign the first mask argument to V0. 4265 // This is an interim calling convention and it may be changed in the 4266 // future. 4267 if (FirstMaskArgument.hasValue() && 4268 ValNo == FirstMaskArgument.getValue()) { 4269 Reg = State.AllocateReg(RISCV::V0); 4270 } else { 4271 Reg = State.AllocateReg(ArgVRs); 4272 } 4273 } else if (RC == &RISCV::VRM2RegClass) { 4274 Reg = State.AllocateReg(ArgVRM2s); 4275 } else if (RC == &RISCV::VRM4RegClass) { 4276 Reg = State.AllocateReg(ArgVRM4s); 4277 } else if (RC == &RISCV::VRM8RegClass) { 4278 Reg = State.AllocateReg(ArgVRM8s); 4279 } else { 4280 llvm_unreachable("Unhandled class register for ValueType"); 4281 } 4282 if (!Reg) { 4283 LocInfo = CCValAssign::Indirect; 4284 // Try using a GPR to pass the address 4285 Reg = State.AllocateReg(ArgGPRs); 4286 LocVT = XLenVT; 4287 } 4288 } else 4289 Reg = State.AllocateReg(ArgGPRs); 4290 unsigned StackOffset = 4291 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); 4292 4293 // If we reach this point and PendingLocs is non-empty, we must be at the 4294 // end of a split argument that must be passed indirectly. 4295 if (!PendingLocs.empty()) { 4296 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 4297 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 4298 4299 for (auto &It : PendingLocs) { 4300 if (Reg) 4301 It.convertToReg(Reg); 4302 else 4303 It.convertToMem(StackOffset); 4304 State.addLoc(It); 4305 } 4306 PendingLocs.clear(); 4307 PendingArgFlags.clear(); 4308 return false; 4309 } 4310 4311 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 4312 (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) && 4313 "Expected an XLenVT or scalable vector types at this stage"); 4314 4315 if (Reg) { 4316 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4317 return false; 4318 } 4319 4320 // When a floating-point value is passed on the stack, no bit-conversion is 4321 // needed. 4322 if (ValVT.isFloatingPoint()) { 4323 LocVT = ValVT; 4324 LocInfo = CCValAssign::Full; 4325 } 4326 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 4327 return false; 4328 } 4329 4330 template <typename ArgTy> 4331 static Optional<unsigned> preAssignMask(const ArgTy &Args) { 4332 for (const auto &ArgIdx : enumerate(Args)) { 4333 MVT ArgVT = ArgIdx.value().VT; 4334 if (ArgVT.isScalableVector() && 4335 ArgVT.getVectorElementType().SimpleTy == MVT::i1) 4336 return ArgIdx.index(); 4337 } 4338 return None; 4339 } 4340 4341 void RISCVTargetLowering::analyzeInputArgs( 4342 MachineFunction &MF, CCState &CCInfo, 4343 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 4344 unsigned NumArgs = Ins.size(); 4345 FunctionType *FType = MF.getFunction().getFunctionType(); 4346 4347 Optional<unsigned> FirstMaskArgument; 4348 if (Subtarget.hasStdExtV()) 4349 FirstMaskArgument = preAssignMask(Ins); 4350 4351 for (unsigned i = 0; i != NumArgs; ++i) { 4352 MVT ArgVT = Ins[i].VT; 4353 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 4354 4355 Type *ArgTy = nullptr; 4356 if (IsRet) 4357 ArgTy = FType->getReturnType(); 4358 else if (Ins[i].isOrigArg()) 4359 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 4360 4361 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 4362 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 4363 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 4364 FirstMaskArgument)) { 4365 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 4366 << EVT(ArgVT).getEVTString() << '\n'); 4367 llvm_unreachable(nullptr); 4368 } 4369 } 4370 } 4371 4372 void RISCVTargetLowering::analyzeOutputArgs( 4373 MachineFunction &MF, CCState &CCInfo, 4374 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 4375 CallLoweringInfo *CLI) const { 4376 unsigned NumArgs = Outs.size(); 4377 4378 Optional<unsigned> FirstMaskArgument; 4379 if (Subtarget.hasStdExtV()) 4380 FirstMaskArgument = preAssignMask(Outs); 4381 4382 for (unsigned i = 0; i != NumArgs; i++) { 4383 MVT ArgVT = Outs[i].VT; 4384 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 4385 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 4386 4387 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 4388 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 4389 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 4390 FirstMaskArgument)) { 4391 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 4392 << EVT(ArgVT).getEVTString() << "\n"); 4393 llvm_unreachable(nullptr); 4394 } 4395 } 4396 } 4397 4398 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 4399 // values. 4400 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 4401 const CCValAssign &VA, const SDLoc &DL) { 4402 switch (VA.getLocInfo()) { 4403 default: 4404 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 4405 case CCValAssign::Full: 4406 break; 4407 case CCValAssign::BCvt: 4408 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 4409 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); 4410 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 4411 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 4412 else 4413 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 4414 break; 4415 } 4416 return Val; 4417 } 4418 4419 // The caller is responsible for loading the full value if the argument is 4420 // passed with CCValAssign::Indirect. 4421 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 4422 const CCValAssign &VA, const SDLoc &DL, 4423 const RISCVTargetLowering &TLI) { 4424 MachineFunction &MF = DAG.getMachineFunction(); 4425 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4426 EVT LocVT = VA.getLocVT(); 4427 SDValue Val; 4428 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 4429 Register VReg = RegInfo.createVirtualRegister(RC); 4430 RegInfo.addLiveIn(VA.getLocReg(), VReg); 4431 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 4432 4433 if (VA.getLocInfo() == CCValAssign::Indirect) 4434 return Val; 4435 4436 return convertLocVTToValVT(DAG, Val, VA, DL); 4437 } 4438 4439 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 4440 const CCValAssign &VA, const SDLoc &DL) { 4441 EVT LocVT = VA.getLocVT(); 4442 4443 switch (VA.getLocInfo()) { 4444 default: 4445 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 4446 case CCValAssign::Full: 4447 break; 4448 case CCValAssign::BCvt: 4449 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 4450 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 4451 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 4452 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 4453 else 4454 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 4455 break; 4456 } 4457 return Val; 4458 } 4459 4460 // The caller is responsible for loading the full value if the argument is 4461 // passed with CCValAssign::Indirect. 4462 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 4463 const CCValAssign &VA, const SDLoc &DL) { 4464 MachineFunction &MF = DAG.getMachineFunction(); 4465 MachineFrameInfo &MFI = MF.getFrameInfo(); 4466 EVT LocVT = VA.getLocVT(); 4467 EVT ValVT = VA.getValVT(); 4468 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 4469 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 4470 VA.getLocMemOffset(), /*Immutable=*/true); 4471 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 4472 SDValue Val; 4473 4474 ISD::LoadExtType ExtType; 4475 switch (VA.getLocInfo()) { 4476 default: 4477 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 4478 case CCValAssign::Full: 4479 case CCValAssign::Indirect: 4480 case CCValAssign::BCvt: 4481 ExtType = ISD::NON_EXTLOAD; 4482 break; 4483 } 4484 Val = DAG.getExtLoad( 4485 ExtType, DL, LocVT, Chain, FIN, 4486 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 4487 return Val; 4488 } 4489 4490 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 4491 const CCValAssign &VA, const SDLoc &DL) { 4492 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 4493 "Unexpected VA"); 4494 MachineFunction &MF = DAG.getMachineFunction(); 4495 MachineFrameInfo &MFI = MF.getFrameInfo(); 4496 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4497 4498 if (VA.isMemLoc()) { 4499 // f64 is passed on the stack. 4500 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 4501 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 4502 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 4503 MachinePointerInfo::getFixedStack(MF, FI)); 4504 } 4505 4506 assert(VA.isRegLoc() && "Expected register VA assignment"); 4507 4508 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 4509 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 4510 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 4511 SDValue Hi; 4512 if (VA.getLocReg() == RISCV::X17) { 4513 // Second half of f64 is passed on the stack. 4514 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 4515 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 4516 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 4517 MachinePointerInfo::getFixedStack(MF, FI)); 4518 } else { 4519 // Second half of f64 is passed in another GPR. 4520 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 4521 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 4522 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 4523 } 4524 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 4525 } 4526 4527 // FastCC has less than 1% performance improvement for some particular 4528 // benchmark. But theoretically, it may has benenfit for some cases. 4529 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, 4530 CCValAssign::LocInfo LocInfo, 4531 ISD::ArgFlagsTy ArgFlags, CCState &State) { 4532 4533 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 4534 // X5 and X6 might be used for save-restore libcall. 4535 static const MCPhysReg GPRList[] = { 4536 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 4537 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 4538 RISCV::X29, RISCV::X30, RISCV::X31}; 4539 if (unsigned Reg = State.AllocateReg(GPRList)) { 4540 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4541 return false; 4542 } 4543 } 4544 4545 if (LocVT == MVT::f16) { 4546 static const MCPhysReg FPR16List[] = { 4547 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 4548 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 4549 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 4550 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 4551 if (unsigned Reg = State.AllocateReg(FPR16List)) { 4552 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4553 return false; 4554 } 4555 } 4556 4557 if (LocVT == MVT::f32) { 4558 static const MCPhysReg FPR32List[] = { 4559 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 4560 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 4561 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 4562 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 4563 if (unsigned Reg = State.AllocateReg(FPR32List)) { 4564 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4565 return false; 4566 } 4567 } 4568 4569 if (LocVT == MVT::f64) { 4570 static const MCPhysReg FPR64List[] = { 4571 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 4572 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 4573 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 4574 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 4575 if (unsigned Reg = State.AllocateReg(FPR64List)) { 4576 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4577 return false; 4578 } 4579 } 4580 4581 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 4582 unsigned Offset4 = State.AllocateStack(4, Align(4)); 4583 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 4584 return false; 4585 } 4586 4587 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 4588 unsigned Offset5 = State.AllocateStack(8, Align(8)); 4589 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 4590 return false; 4591 } 4592 4593 return true; // CC didn't match. 4594 } 4595 4596 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 4597 CCValAssign::LocInfo LocInfo, 4598 ISD::ArgFlagsTy ArgFlags, CCState &State) { 4599 4600 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 4601 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 4602 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 4603 static const MCPhysReg GPRList[] = { 4604 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 4605 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 4606 if (unsigned Reg = State.AllocateReg(GPRList)) { 4607 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4608 return false; 4609 } 4610 } 4611 4612 if (LocVT == MVT::f32) { 4613 // Pass in STG registers: F1, ..., F6 4614 // fs0 ... fs5 4615 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 4616 RISCV::F18_F, RISCV::F19_F, 4617 RISCV::F20_F, RISCV::F21_F}; 4618 if (unsigned Reg = State.AllocateReg(FPR32List)) { 4619 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4620 return false; 4621 } 4622 } 4623 4624 if (LocVT == MVT::f64) { 4625 // Pass in STG registers: D1, ..., D6 4626 // fs6 ... fs11 4627 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 4628 RISCV::F24_D, RISCV::F25_D, 4629 RISCV::F26_D, RISCV::F27_D}; 4630 if (unsigned Reg = State.AllocateReg(FPR64List)) { 4631 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4632 return false; 4633 } 4634 } 4635 4636 report_fatal_error("No registers left in GHC calling convention"); 4637 return true; 4638 } 4639 4640 // Transform physical registers into virtual registers. 4641 SDValue RISCVTargetLowering::LowerFormalArguments( 4642 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 4643 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 4644 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 4645 4646 MachineFunction &MF = DAG.getMachineFunction(); 4647 4648 switch (CallConv) { 4649 default: 4650 report_fatal_error("Unsupported calling convention"); 4651 case CallingConv::C: 4652 case CallingConv::Fast: 4653 break; 4654 case CallingConv::GHC: 4655 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || 4656 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) 4657 report_fatal_error( 4658 "GHC calling convention requires the F and D instruction set extensions"); 4659 } 4660 4661 const Function &Func = MF.getFunction(); 4662 if (Func.hasFnAttribute("interrupt")) { 4663 if (!Func.arg_empty()) 4664 report_fatal_error( 4665 "Functions with the interrupt attribute cannot have arguments!"); 4666 4667 StringRef Kind = 4668 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 4669 4670 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 4671 report_fatal_error( 4672 "Function interrupt attribute argument not supported!"); 4673 } 4674 4675 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4676 MVT XLenVT = Subtarget.getXLenVT(); 4677 unsigned XLenInBytes = Subtarget.getXLen() / 8; 4678 // Used with vargs to acumulate store chains. 4679 std::vector<SDValue> OutChains; 4680 4681 // Assign locations to all of the incoming arguments. 4682 SmallVector<CCValAssign, 16> ArgLocs; 4683 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 4684 4685 if (CallConv == CallingConv::Fast) 4686 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); 4687 else if (CallConv == CallingConv::GHC) 4688 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); 4689 else 4690 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 4691 4692 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 4693 CCValAssign &VA = ArgLocs[i]; 4694 SDValue ArgValue; 4695 // Passing f64 on RV32D with a soft float ABI must be handled as a special 4696 // case. 4697 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 4698 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 4699 else if (VA.isRegLoc()) 4700 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 4701 else 4702 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 4703 4704 if (VA.getLocInfo() == CCValAssign::Indirect) { 4705 // If the original argument was split and passed by reference (e.g. i128 4706 // on RV32), we need to load all parts of it here (using the same 4707 // address). 4708 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 4709 MachinePointerInfo())); 4710 unsigned ArgIndex = Ins[i].OrigArgIndex; 4711 assert(Ins[i].PartOffset == 0); 4712 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 4713 CCValAssign &PartVA = ArgLocs[i + 1]; 4714 unsigned PartOffset = Ins[i + 1].PartOffset; 4715 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 4716 DAG.getIntPtrConstant(PartOffset, DL)); 4717 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 4718 MachinePointerInfo())); 4719 ++i; 4720 } 4721 continue; 4722 } 4723 InVals.push_back(ArgValue); 4724 } 4725 4726 if (IsVarArg) { 4727 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 4728 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 4729 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 4730 MachineFrameInfo &MFI = MF.getFrameInfo(); 4731 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4732 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 4733 4734 // Offset of the first variable argument from stack pointer, and size of 4735 // the vararg save area. For now, the varargs save area is either zero or 4736 // large enough to hold a0-a7. 4737 int VaArgOffset, VarArgsSaveSize; 4738 4739 // If all registers are allocated, then all varargs must be passed on the 4740 // stack and we don't need to save any argregs. 4741 if (ArgRegs.size() == Idx) { 4742 VaArgOffset = CCInfo.getNextStackOffset(); 4743 VarArgsSaveSize = 0; 4744 } else { 4745 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 4746 VaArgOffset = -VarArgsSaveSize; 4747 } 4748 4749 // Record the frame index of the first variable argument 4750 // which is a value necessary to VASTART. 4751 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 4752 RVFI->setVarArgsFrameIndex(FI); 4753 4754 // If saving an odd number of registers then create an extra stack slot to 4755 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 4756 // offsets to even-numbered registered remain 2*XLEN-aligned. 4757 if (Idx % 2) { 4758 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 4759 VarArgsSaveSize += XLenInBytes; 4760 } 4761 4762 // Copy the integer registers that may have been used for passing varargs 4763 // to the vararg save area. 4764 for (unsigned I = Idx; I < ArgRegs.size(); 4765 ++I, VaArgOffset += XLenInBytes) { 4766 const Register Reg = RegInfo.createVirtualRegister(RC); 4767 RegInfo.addLiveIn(ArgRegs[I], Reg); 4768 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 4769 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 4770 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 4771 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 4772 MachinePointerInfo::getFixedStack(MF, FI)); 4773 cast<StoreSDNode>(Store.getNode()) 4774 ->getMemOperand() 4775 ->setValue((Value *)nullptr); 4776 OutChains.push_back(Store); 4777 } 4778 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 4779 } 4780 4781 // All stores are grouped in one node to allow the matching between 4782 // the size of Ins and InVals. This only happens for vararg functions. 4783 if (!OutChains.empty()) { 4784 OutChains.push_back(Chain); 4785 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 4786 } 4787 4788 return Chain; 4789 } 4790 4791 /// isEligibleForTailCallOptimization - Check whether the call is eligible 4792 /// for tail call optimization. 4793 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 4794 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 4795 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 4796 const SmallVector<CCValAssign, 16> &ArgLocs) const { 4797 4798 auto &Callee = CLI.Callee; 4799 auto CalleeCC = CLI.CallConv; 4800 auto &Outs = CLI.Outs; 4801 auto &Caller = MF.getFunction(); 4802 auto CallerCC = Caller.getCallingConv(); 4803 4804 // Exception-handling functions need a special set of instructions to 4805 // indicate a return to the hardware. Tail-calling another function would 4806 // probably break this. 4807 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 4808 // should be expanded as new function attributes are introduced. 4809 if (Caller.hasFnAttribute("interrupt")) 4810 return false; 4811 4812 // Do not tail call opt if the stack is used to pass parameters. 4813 if (CCInfo.getNextStackOffset() != 0) 4814 return false; 4815 4816 // Do not tail call opt if any parameters need to be passed indirectly. 4817 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 4818 // passed indirectly. So the address of the value will be passed in a 4819 // register, or if not available, then the address is put on the stack. In 4820 // order to pass indirectly, space on the stack often needs to be allocated 4821 // in order to store the value. In this case the CCInfo.getNextStackOffset() 4822 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 4823 // are passed CCValAssign::Indirect. 4824 for (auto &VA : ArgLocs) 4825 if (VA.getLocInfo() == CCValAssign::Indirect) 4826 return false; 4827 4828 // Do not tail call opt if either caller or callee uses struct return 4829 // semantics. 4830 auto IsCallerStructRet = Caller.hasStructRetAttr(); 4831 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 4832 if (IsCallerStructRet || IsCalleeStructRet) 4833 return false; 4834 4835 // Externally-defined functions with weak linkage should not be 4836 // tail-called. The behaviour of branch instructions in this situation (as 4837 // used for tail calls) is implementation-defined, so we cannot rely on the 4838 // linker replacing the tail call with a return. 4839 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 4840 const GlobalValue *GV = G->getGlobal(); 4841 if (GV->hasExternalWeakLinkage()) 4842 return false; 4843 } 4844 4845 // The callee has to preserve all registers the caller needs to preserve. 4846 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4847 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 4848 if (CalleeCC != CallerCC) { 4849 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 4850 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 4851 return false; 4852 } 4853 4854 // Byval parameters hand the function a pointer directly into the stack area 4855 // we want to reuse during a tail call. Working around this *is* possible 4856 // but less efficient and uglier in LowerCall. 4857 for (auto &Arg : Outs) 4858 if (Arg.Flags.isByVal()) 4859 return false; 4860 4861 return true; 4862 } 4863 4864 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 4865 // and output parameter nodes. 4866 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 4867 SmallVectorImpl<SDValue> &InVals) const { 4868 SelectionDAG &DAG = CLI.DAG; 4869 SDLoc &DL = CLI.DL; 4870 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 4871 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 4872 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 4873 SDValue Chain = CLI.Chain; 4874 SDValue Callee = CLI.Callee; 4875 bool &IsTailCall = CLI.IsTailCall; 4876 CallingConv::ID CallConv = CLI.CallConv; 4877 bool IsVarArg = CLI.IsVarArg; 4878 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4879 MVT XLenVT = Subtarget.getXLenVT(); 4880 4881 MachineFunction &MF = DAG.getMachineFunction(); 4882 4883 // Analyze the operands of the call, assigning locations to each operand. 4884 SmallVector<CCValAssign, 16> ArgLocs; 4885 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 4886 4887 if (CallConv == CallingConv::Fast) 4888 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); 4889 else if (CallConv == CallingConv::GHC) 4890 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); 4891 else 4892 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 4893 4894 // Check if it's really possible to do a tail call. 4895 if (IsTailCall) 4896 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 4897 4898 if (IsTailCall) 4899 ++NumTailCalls; 4900 else if (CLI.CB && CLI.CB->isMustTailCall()) 4901 report_fatal_error("failed to perform tail call elimination on a call " 4902 "site marked musttail"); 4903 4904 // Get a count of how many bytes are to be pushed on the stack. 4905 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 4906 4907 // Create local copies for byval args 4908 SmallVector<SDValue, 8> ByValArgs; 4909 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 4910 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4911 if (!Flags.isByVal()) 4912 continue; 4913 4914 SDValue Arg = OutVals[i]; 4915 unsigned Size = Flags.getByValSize(); 4916 Align Alignment = Flags.getNonZeroByValAlign(); 4917 4918 int FI = 4919 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 4920 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 4921 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 4922 4923 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 4924 /*IsVolatile=*/false, 4925 /*AlwaysInline=*/false, IsTailCall, 4926 MachinePointerInfo(), MachinePointerInfo()); 4927 ByValArgs.push_back(FIPtr); 4928 } 4929 4930 if (!IsTailCall) 4931 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 4932 4933 // Copy argument values to their designated locations. 4934 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 4935 SmallVector<SDValue, 8> MemOpChains; 4936 SDValue StackPtr; 4937 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 4938 CCValAssign &VA = ArgLocs[i]; 4939 SDValue ArgValue = OutVals[i]; 4940 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4941 4942 // Handle passing f64 on RV32D with a soft float ABI as a special case. 4943 bool IsF64OnRV32DSoftABI = 4944 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 4945 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 4946 SDValue SplitF64 = DAG.getNode( 4947 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 4948 SDValue Lo = SplitF64.getValue(0); 4949 SDValue Hi = SplitF64.getValue(1); 4950 4951 Register RegLo = VA.getLocReg(); 4952 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 4953 4954 if (RegLo == RISCV::X17) { 4955 // Second half of f64 is passed on the stack. 4956 // Work out the address of the stack slot. 4957 if (!StackPtr.getNode()) 4958 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 4959 // Emit the store. 4960 MemOpChains.push_back( 4961 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 4962 } else { 4963 // Second half of f64 is passed in another GPR. 4964 assert(RegLo < RISCV::X31 && "Invalid register pair"); 4965 Register RegHigh = RegLo + 1; 4966 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 4967 } 4968 continue; 4969 } 4970 4971 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 4972 // as any other MemLoc. 4973 4974 // Promote the value if needed. 4975 // For now, only handle fully promoted and indirect arguments. 4976 if (VA.getLocInfo() == CCValAssign::Indirect) { 4977 // Store the argument in a stack slot and pass its address. 4978 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 4979 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 4980 MemOpChains.push_back( 4981 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 4982 MachinePointerInfo::getFixedStack(MF, FI))); 4983 // If the original argument was split (e.g. i128), we need 4984 // to store all parts of it here (and pass just one address). 4985 unsigned ArgIndex = Outs[i].OrigArgIndex; 4986 assert(Outs[i].PartOffset == 0); 4987 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 4988 SDValue PartValue = OutVals[i + 1]; 4989 unsigned PartOffset = Outs[i + 1].PartOffset; 4990 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 4991 DAG.getIntPtrConstant(PartOffset, DL)); 4992 MemOpChains.push_back( 4993 DAG.getStore(Chain, DL, PartValue, Address, 4994 MachinePointerInfo::getFixedStack(MF, FI))); 4995 ++i; 4996 } 4997 ArgValue = SpillSlot; 4998 } else { 4999 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 5000 } 5001 5002 // Use local copy if it is a byval arg. 5003 if (Flags.isByVal()) 5004 ArgValue = ByValArgs[j++]; 5005 5006 if (VA.isRegLoc()) { 5007 // Queue up the argument copies and emit them at the end. 5008 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 5009 } else { 5010 assert(VA.isMemLoc() && "Argument not register or memory"); 5011 assert(!IsTailCall && "Tail call not allowed if stack is used " 5012 "for passing parameters"); 5013 5014 // Work out the address of the stack slot. 5015 if (!StackPtr.getNode()) 5016 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 5017 SDValue Address = 5018 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 5019 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 5020 5021 // Emit the store. 5022 MemOpChains.push_back( 5023 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 5024 } 5025 } 5026 5027 // Join the stores, which are independent of one another. 5028 if (!MemOpChains.empty()) 5029 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 5030 5031 SDValue Glue; 5032 5033 // Build a sequence of copy-to-reg nodes, chained and glued together. 5034 for (auto &Reg : RegsToPass) { 5035 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 5036 Glue = Chain.getValue(1); 5037 } 5038 5039 // Validate that none of the argument registers have been marked as 5040 // reserved, if so report an error. Do the same for the return address if this 5041 // is not a tailcall. 5042 validateCCReservedRegs(RegsToPass, MF); 5043 if (!IsTailCall && 5044 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 5045 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 5046 MF.getFunction(), 5047 "Return address register required, but has been reserved."}); 5048 5049 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 5050 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 5051 // split it and then direct call can be matched by PseudoCALL. 5052 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 5053 const GlobalValue *GV = S->getGlobal(); 5054 5055 unsigned OpFlags = RISCVII::MO_CALL; 5056 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 5057 OpFlags = RISCVII::MO_PLT; 5058 5059 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 5060 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 5061 unsigned OpFlags = RISCVII::MO_CALL; 5062 5063 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 5064 nullptr)) 5065 OpFlags = RISCVII::MO_PLT; 5066 5067 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 5068 } 5069 5070 // The first call operand is the chain and the second is the target address. 5071 SmallVector<SDValue, 8> Ops; 5072 Ops.push_back(Chain); 5073 Ops.push_back(Callee); 5074 5075 // Add argument registers to the end of the list so that they are 5076 // known live into the call. 5077 for (auto &Reg : RegsToPass) 5078 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 5079 5080 if (!IsTailCall) { 5081 // Add a register mask operand representing the call-preserved registers. 5082 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 5083 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 5084 assert(Mask && "Missing call preserved mask for calling convention"); 5085 Ops.push_back(DAG.getRegisterMask(Mask)); 5086 } 5087 5088 // Glue the call to the argument copies, if any. 5089 if (Glue.getNode()) 5090 Ops.push_back(Glue); 5091 5092 // Emit the call. 5093 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 5094 5095 if (IsTailCall) { 5096 MF.getFrameInfo().setHasTailCall(); 5097 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 5098 } 5099 5100 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 5101 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 5102 Glue = Chain.getValue(1); 5103 5104 // Mark the end of the call, which is glued to the call itself. 5105 Chain = DAG.getCALLSEQ_END(Chain, 5106 DAG.getConstant(NumBytes, DL, PtrVT, true), 5107 DAG.getConstant(0, DL, PtrVT, true), 5108 Glue, DL); 5109 Glue = Chain.getValue(1); 5110 5111 // Assign locations to each value returned by this call. 5112 SmallVector<CCValAssign, 16> RVLocs; 5113 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 5114 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 5115 5116 // Copy all of the result registers out of their specified physreg. 5117 for (auto &VA : RVLocs) { 5118 // Copy the value out 5119 SDValue RetValue = 5120 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 5121 // Glue the RetValue to the end of the call sequence 5122 Chain = RetValue.getValue(1); 5123 Glue = RetValue.getValue(2); 5124 5125 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 5126 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 5127 SDValue RetValue2 = 5128 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 5129 Chain = RetValue2.getValue(1); 5130 Glue = RetValue2.getValue(2); 5131 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 5132 RetValue2); 5133 } 5134 5135 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 5136 5137 InVals.push_back(RetValue); 5138 } 5139 5140 return Chain; 5141 } 5142 5143 bool RISCVTargetLowering::CanLowerReturn( 5144 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 5145 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 5146 SmallVector<CCValAssign, 16> RVLocs; 5147 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 5148 5149 Optional<unsigned> FirstMaskArgument; 5150 if (Subtarget.hasStdExtV()) 5151 FirstMaskArgument = preAssignMask(Outs); 5152 5153 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 5154 MVT VT = Outs[i].VT; 5155 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 5156 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 5157 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 5158 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 5159 *this, FirstMaskArgument)) 5160 return false; 5161 } 5162 return true; 5163 } 5164 5165 SDValue 5166 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 5167 bool IsVarArg, 5168 const SmallVectorImpl<ISD::OutputArg> &Outs, 5169 const SmallVectorImpl<SDValue> &OutVals, 5170 const SDLoc &DL, SelectionDAG &DAG) const { 5171 const MachineFunction &MF = DAG.getMachineFunction(); 5172 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 5173 5174 // Stores the assignment of the return value to a location. 5175 SmallVector<CCValAssign, 16> RVLocs; 5176 5177 // Info about the registers and stack slot. 5178 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 5179 *DAG.getContext()); 5180 5181 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 5182 nullptr); 5183 5184 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 5185 report_fatal_error("GHC functions return void only"); 5186 5187 SDValue Glue; 5188 SmallVector<SDValue, 4> RetOps(1, Chain); 5189 5190 // Copy the result values into the output registers. 5191 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 5192 SDValue Val = OutVals[i]; 5193 CCValAssign &VA = RVLocs[i]; 5194 assert(VA.isRegLoc() && "Can only return in registers!"); 5195 5196 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 5197 // Handle returning f64 on RV32D with a soft float ABI. 5198 assert(VA.isRegLoc() && "Expected return via registers"); 5199 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 5200 DAG.getVTList(MVT::i32, MVT::i32), Val); 5201 SDValue Lo = SplitF64.getValue(0); 5202 SDValue Hi = SplitF64.getValue(1); 5203 Register RegLo = VA.getLocReg(); 5204 assert(RegLo < RISCV::X31 && "Invalid register pair"); 5205 Register RegHi = RegLo + 1; 5206 5207 if (STI.isRegisterReservedByUser(RegLo) || 5208 STI.isRegisterReservedByUser(RegHi)) 5209 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 5210 MF.getFunction(), 5211 "Return value register required, but has been reserved."}); 5212 5213 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 5214 Glue = Chain.getValue(1); 5215 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 5216 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 5217 Glue = Chain.getValue(1); 5218 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 5219 } else { 5220 // Handle a 'normal' return. 5221 Val = convertValVTToLocVT(DAG, Val, VA, DL); 5222 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 5223 5224 if (STI.isRegisterReservedByUser(VA.getLocReg())) 5225 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 5226 MF.getFunction(), 5227 "Return value register required, but has been reserved."}); 5228 5229 // Guarantee that all emitted copies are stuck together. 5230 Glue = Chain.getValue(1); 5231 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 5232 } 5233 } 5234 5235 RetOps[0] = Chain; // Update chain. 5236 5237 // Add the glue node if we have it. 5238 if (Glue.getNode()) { 5239 RetOps.push_back(Glue); 5240 } 5241 5242 // Interrupt service routines use different return instructions. 5243 const Function &Func = DAG.getMachineFunction().getFunction(); 5244 if (Func.hasFnAttribute("interrupt")) { 5245 if (!Func.getReturnType()->isVoidTy()) 5246 report_fatal_error( 5247 "Functions with the interrupt attribute must have void return type!"); 5248 5249 MachineFunction &MF = DAG.getMachineFunction(); 5250 StringRef Kind = 5251 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 5252 5253 unsigned RetOpc; 5254 if (Kind == "user") 5255 RetOpc = RISCVISD::URET_FLAG; 5256 else if (Kind == "supervisor") 5257 RetOpc = RISCVISD::SRET_FLAG; 5258 else 5259 RetOpc = RISCVISD::MRET_FLAG; 5260 5261 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 5262 } 5263 5264 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 5265 } 5266 5267 void RISCVTargetLowering::validateCCReservedRegs( 5268 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 5269 MachineFunction &MF) const { 5270 const Function &F = MF.getFunction(); 5271 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 5272 5273 if (llvm::any_of(Regs, [&STI](auto Reg) { 5274 return STI.isRegisterReservedByUser(Reg.first); 5275 })) 5276 F.getContext().diagnose(DiagnosticInfoUnsupported{ 5277 F, "Argument register required, but has been reserved."}); 5278 } 5279 5280 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 5281 return CI->isTailCall(); 5282 } 5283 5284 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 5285 #define NODE_NAME_CASE(NODE) \ 5286 case RISCVISD::NODE: \ 5287 return "RISCVISD::" #NODE; 5288 // clang-format off 5289 switch ((RISCVISD::NodeType)Opcode) { 5290 case RISCVISD::FIRST_NUMBER: 5291 break; 5292 NODE_NAME_CASE(RET_FLAG) 5293 NODE_NAME_CASE(URET_FLAG) 5294 NODE_NAME_CASE(SRET_FLAG) 5295 NODE_NAME_CASE(MRET_FLAG) 5296 NODE_NAME_CASE(CALL) 5297 NODE_NAME_CASE(SELECT_CC) 5298 NODE_NAME_CASE(BuildPairF64) 5299 NODE_NAME_CASE(SplitF64) 5300 NODE_NAME_CASE(TAIL) 5301 NODE_NAME_CASE(SLLW) 5302 NODE_NAME_CASE(SRAW) 5303 NODE_NAME_CASE(SRLW) 5304 NODE_NAME_CASE(DIVW) 5305 NODE_NAME_CASE(DIVUW) 5306 NODE_NAME_CASE(REMUW) 5307 NODE_NAME_CASE(ROLW) 5308 NODE_NAME_CASE(RORW) 5309 NODE_NAME_CASE(FSLW) 5310 NODE_NAME_CASE(FSRW) 5311 NODE_NAME_CASE(FSL) 5312 NODE_NAME_CASE(FSR) 5313 NODE_NAME_CASE(FMV_H_X) 5314 NODE_NAME_CASE(FMV_X_ANYEXTH) 5315 NODE_NAME_CASE(FMV_W_X_RV64) 5316 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 5317 NODE_NAME_CASE(READ_CYCLE_WIDE) 5318 NODE_NAME_CASE(GREVI) 5319 NODE_NAME_CASE(GREVIW) 5320 NODE_NAME_CASE(GORCI) 5321 NODE_NAME_CASE(GORCIW) 5322 NODE_NAME_CASE(SHFLI) 5323 NODE_NAME_CASE(VMV_V_X_VL) 5324 NODE_NAME_CASE(VFMV_V_F_VL) 5325 NODE_NAME_CASE(VMV_X_S) 5326 NODE_NAME_CASE(SPLAT_VECTOR_I64) 5327 NODE_NAME_CASE(READ_VLENB) 5328 NODE_NAME_CASE(TRUNCATE_VECTOR) 5329 NODE_NAME_CASE(VSLIDEUP_VL) 5330 NODE_NAME_CASE(VSLIDEDOWN_VL) 5331 NODE_NAME_CASE(VID_VL) 5332 NODE_NAME_CASE(VFNCVT_ROD) 5333 NODE_NAME_CASE(VECREDUCE_ADD) 5334 NODE_NAME_CASE(VECREDUCE_UMAX) 5335 NODE_NAME_CASE(VECREDUCE_SMAX) 5336 NODE_NAME_CASE(VECREDUCE_UMIN) 5337 NODE_NAME_CASE(VECREDUCE_SMIN) 5338 NODE_NAME_CASE(VECREDUCE_AND) 5339 NODE_NAME_CASE(VECREDUCE_OR) 5340 NODE_NAME_CASE(VECREDUCE_XOR) 5341 NODE_NAME_CASE(VECREDUCE_FADD) 5342 NODE_NAME_CASE(VECREDUCE_SEQ_FADD) 5343 NODE_NAME_CASE(ADD_VL) 5344 NODE_NAME_CASE(AND_VL) 5345 NODE_NAME_CASE(MUL_VL) 5346 NODE_NAME_CASE(OR_VL) 5347 NODE_NAME_CASE(SDIV_VL) 5348 NODE_NAME_CASE(SHL_VL) 5349 NODE_NAME_CASE(SREM_VL) 5350 NODE_NAME_CASE(SRA_VL) 5351 NODE_NAME_CASE(SRL_VL) 5352 NODE_NAME_CASE(SUB_VL) 5353 NODE_NAME_CASE(UDIV_VL) 5354 NODE_NAME_CASE(UREM_VL) 5355 NODE_NAME_CASE(XOR_VL) 5356 NODE_NAME_CASE(FADD_VL) 5357 NODE_NAME_CASE(FSUB_VL) 5358 NODE_NAME_CASE(FMUL_VL) 5359 NODE_NAME_CASE(FDIV_VL) 5360 NODE_NAME_CASE(FNEG_VL) 5361 NODE_NAME_CASE(FABS_VL) 5362 NODE_NAME_CASE(FSQRT_VL) 5363 NODE_NAME_CASE(FMA_VL) 5364 NODE_NAME_CASE(SMIN_VL) 5365 NODE_NAME_CASE(SMAX_VL) 5366 NODE_NAME_CASE(UMIN_VL) 5367 NODE_NAME_CASE(UMAX_VL) 5368 NODE_NAME_CASE(MULHS_VL) 5369 NODE_NAME_CASE(MULHU_VL) 5370 NODE_NAME_CASE(SETCC_VL) 5371 NODE_NAME_CASE(VSELECT_VL) 5372 NODE_NAME_CASE(VMAND_VL) 5373 NODE_NAME_CASE(VMOR_VL) 5374 NODE_NAME_CASE(VMXOR_VL) 5375 NODE_NAME_CASE(VMCLR_VL) 5376 NODE_NAME_CASE(VMSET_VL) 5377 NODE_NAME_CASE(VRGATHER_VX_VL) 5378 NODE_NAME_CASE(VLE_VL) 5379 NODE_NAME_CASE(VSE_VL) 5380 } 5381 // clang-format on 5382 return nullptr; 5383 #undef NODE_NAME_CASE 5384 } 5385 5386 /// getConstraintType - Given a constraint letter, return the type of 5387 /// constraint it is for this target. 5388 RISCVTargetLowering::ConstraintType 5389 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 5390 if (Constraint.size() == 1) { 5391 switch (Constraint[0]) { 5392 default: 5393 break; 5394 case 'f': 5395 return C_RegisterClass; 5396 case 'I': 5397 case 'J': 5398 case 'K': 5399 return C_Immediate; 5400 case 'A': 5401 return C_Memory; 5402 } 5403 } 5404 return TargetLowering::getConstraintType(Constraint); 5405 } 5406 5407 std::pair<unsigned, const TargetRegisterClass *> 5408 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 5409 StringRef Constraint, 5410 MVT VT) const { 5411 // First, see if this is a constraint that directly corresponds to a 5412 // RISCV register class. 5413 if (Constraint.size() == 1) { 5414 switch (Constraint[0]) { 5415 case 'r': 5416 return std::make_pair(0U, &RISCV::GPRRegClass); 5417 case 'f': 5418 if (Subtarget.hasStdExtZfh() && VT == MVT::f16) 5419 return std::make_pair(0U, &RISCV::FPR16RegClass); 5420 if (Subtarget.hasStdExtF() && VT == MVT::f32) 5421 return std::make_pair(0U, &RISCV::FPR32RegClass); 5422 if (Subtarget.hasStdExtD() && VT == MVT::f64) 5423 return std::make_pair(0U, &RISCV::FPR64RegClass); 5424 break; 5425 default: 5426 break; 5427 } 5428 } 5429 5430 // Clang will correctly decode the usage of register name aliases into their 5431 // official names. However, other frontends like `rustc` do not. This allows 5432 // users of these frontends to use the ABI names for registers in LLVM-style 5433 // register constraints. 5434 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 5435 .Case("{zero}", RISCV::X0) 5436 .Case("{ra}", RISCV::X1) 5437 .Case("{sp}", RISCV::X2) 5438 .Case("{gp}", RISCV::X3) 5439 .Case("{tp}", RISCV::X4) 5440 .Case("{t0}", RISCV::X5) 5441 .Case("{t1}", RISCV::X6) 5442 .Case("{t2}", RISCV::X7) 5443 .Cases("{s0}", "{fp}", RISCV::X8) 5444 .Case("{s1}", RISCV::X9) 5445 .Case("{a0}", RISCV::X10) 5446 .Case("{a1}", RISCV::X11) 5447 .Case("{a2}", RISCV::X12) 5448 .Case("{a3}", RISCV::X13) 5449 .Case("{a4}", RISCV::X14) 5450 .Case("{a5}", RISCV::X15) 5451 .Case("{a6}", RISCV::X16) 5452 .Case("{a7}", RISCV::X17) 5453 .Case("{s2}", RISCV::X18) 5454 .Case("{s3}", RISCV::X19) 5455 .Case("{s4}", RISCV::X20) 5456 .Case("{s5}", RISCV::X21) 5457 .Case("{s6}", RISCV::X22) 5458 .Case("{s7}", RISCV::X23) 5459 .Case("{s8}", RISCV::X24) 5460 .Case("{s9}", RISCV::X25) 5461 .Case("{s10}", RISCV::X26) 5462 .Case("{s11}", RISCV::X27) 5463 .Case("{t3}", RISCV::X28) 5464 .Case("{t4}", RISCV::X29) 5465 .Case("{t5}", RISCV::X30) 5466 .Case("{t6}", RISCV::X31) 5467 .Default(RISCV::NoRegister); 5468 if (XRegFromAlias != RISCV::NoRegister) 5469 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 5470 5471 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 5472 // TableGen record rather than the AsmName to choose registers for InlineAsm 5473 // constraints, plus we want to match those names to the widest floating point 5474 // register type available, manually select floating point registers here. 5475 // 5476 // The second case is the ABI name of the register, so that frontends can also 5477 // use the ABI names in register constraint lists. 5478 if (Subtarget.hasStdExtF()) { 5479 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 5480 .Cases("{f0}", "{ft0}", RISCV::F0_F) 5481 .Cases("{f1}", "{ft1}", RISCV::F1_F) 5482 .Cases("{f2}", "{ft2}", RISCV::F2_F) 5483 .Cases("{f3}", "{ft3}", RISCV::F3_F) 5484 .Cases("{f4}", "{ft4}", RISCV::F4_F) 5485 .Cases("{f5}", "{ft5}", RISCV::F5_F) 5486 .Cases("{f6}", "{ft6}", RISCV::F6_F) 5487 .Cases("{f7}", "{ft7}", RISCV::F7_F) 5488 .Cases("{f8}", "{fs0}", RISCV::F8_F) 5489 .Cases("{f9}", "{fs1}", RISCV::F9_F) 5490 .Cases("{f10}", "{fa0}", RISCV::F10_F) 5491 .Cases("{f11}", "{fa1}", RISCV::F11_F) 5492 .Cases("{f12}", "{fa2}", RISCV::F12_F) 5493 .Cases("{f13}", "{fa3}", RISCV::F13_F) 5494 .Cases("{f14}", "{fa4}", RISCV::F14_F) 5495 .Cases("{f15}", "{fa5}", RISCV::F15_F) 5496 .Cases("{f16}", "{fa6}", RISCV::F16_F) 5497 .Cases("{f17}", "{fa7}", RISCV::F17_F) 5498 .Cases("{f18}", "{fs2}", RISCV::F18_F) 5499 .Cases("{f19}", "{fs3}", RISCV::F19_F) 5500 .Cases("{f20}", "{fs4}", RISCV::F20_F) 5501 .Cases("{f21}", "{fs5}", RISCV::F21_F) 5502 .Cases("{f22}", "{fs6}", RISCV::F22_F) 5503 .Cases("{f23}", "{fs7}", RISCV::F23_F) 5504 .Cases("{f24}", "{fs8}", RISCV::F24_F) 5505 .Cases("{f25}", "{fs9}", RISCV::F25_F) 5506 .Cases("{f26}", "{fs10}", RISCV::F26_F) 5507 .Cases("{f27}", "{fs11}", RISCV::F27_F) 5508 .Cases("{f28}", "{ft8}", RISCV::F28_F) 5509 .Cases("{f29}", "{ft9}", RISCV::F29_F) 5510 .Cases("{f30}", "{ft10}", RISCV::F30_F) 5511 .Cases("{f31}", "{ft11}", RISCV::F31_F) 5512 .Default(RISCV::NoRegister); 5513 if (FReg != RISCV::NoRegister) { 5514 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 5515 if (Subtarget.hasStdExtD()) { 5516 unsigned RegNo = FReg - RISCV::F0_F; 5517 unsigned DReg = RISCV::F0_D + RegNo; 5518 return std::make_pair(DReg, &RISCV::FPR64RegClass); 5519 } 5520 return std::make_pair(FReg, &RISCV::FPR32RegClass); 5521 } 5522 } 5523 5524 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 5525 } 5526 5527 unsigned 5528 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 5529 // Currently only support length 1 constraints. 5530 if (ConstraintCode.size() == 1) { 5531 switch (ConstraintCode[0]) { 5532 case 'A': 5533 return InlineAsm::Constraint_A; 5534 default: 5535 break; 5536 } 5537 } 5538 5539 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 5540 } 5541 5542 void RISCVTargetLowering::LowerAsmOperandForConstraint( 5543 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 5544 SelectionDAG &DAG) const { 5545 // Currently only support length 1 constraints. 5546 if (Constraint.length() == 1) { 5547 switch (Constraint[0]) { 5548 case 'I': 5549 // Validate & create a 12-bit signed immediate operand. 5550 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 5551 uint64_t CVal = C->getSExtValue(); 5552 if (isInt<12>(CVal)) 5553 Ops.push_back( 5554 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 5555 } 5556 return; 5557 case 'J': 5558 // Validate & create an integer zero operand. 5559 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 5560 if (C->getZExtValue() == 0) 5561 Ops.push_back( 5562 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 5563 return; 5564 case 'K': 5565 // Validate & create a 5-bit unsigned immediate operand. 5566 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 5567 uint64_t CVal = C->getZExtValue(); 5568 if (isUInt<5>(CVal)) 5569 Ops.push_back( 5570 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 5571 } 5572 return; 5573 default: 5574 break; 5575 } 5576 } 5577 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 5578 } 5579 5580 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 5581 Instruction *Inst, 5582 AtomicOrdering Ord) const { 5583 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 5584 return Builder.CreateFence(Ord); 5585 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 5586 return Builder.CreateFence(AtomicOrdering::Release); 5587 return nullptr; 5588 } 5589 5590 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 5591 Instruction *Inst, 5592 AtomicOrdering Ord) const { 5593 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 5594 return Builder.CreateFence(AtomicOrdering::Acquire); 5595 return nullptr; 5596 } 5597 5598 TargetLowering::AtomicExpansionKind 5599 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 5600 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 5601 // point operations can't be used in an lr/sc sequence without breaking the 5602 // forward-progress guarantee. 5603 if (AI->isFloatingPointOperation()) 5604 return AtomicExpansionKind::CmpXChg; 5605 5606 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 5607 if (Size == 8 || Size == 16) 5608 return AtomicExpansionKind::MaskedIntrinsic; 5609 return AtomicExpansionKind::None; 5610 } 5611 5612 static Intrinsic::ID 5613 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 5614 if (XLen == 32) { 5615 switch (BinOp) { 5616 default: 5617 llvm_unreachable("Unexpected AtomicRMW BinOp"); 5618 case AtomicRMWInst::Xchg: 5619 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 5620 case AtomicRMWInst::Add: 5621 return Intrinsic::riscv_masked_atomicrmw_add_i32; 5622 case AtomicRMWInst::Sub: 5623 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 5624 case AtomicRMWInst::Nand: 5625 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 5626 case AtomicRMWInst::Max: 5627 return Intrinsic::riscv_masked_atomicrmw_max_i32; 5628 case AtomicRMWInst::Min: 5629 return Intrinsic::riscv_masked_atomicrmw_min_i32; 5630 case AtomicRMWInst::UMax: 5631 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 5632 case AtomicRMWInst::UMin: 5633 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 5634 } 5635 } 5636 5637 if (XLen == 64) { 5638 switch (BinOp) { 5639 default: 5640 llvm_unreachable("Unexpected AtomicRMW BinOp"); 5641 case AtomicRMWInst::Xchg: 5642 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 5643 case AtomicRMWInst::Add: 5644 return Intrinsic::riscv_masked_atomicrmw_add_i64; 5645 case AtomicRMWInst::Sub: 5646 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 5647 case AtomicRMWInst::Nand: 5648 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 5649 case AtomicRMWInst::Max: 5650 return Intrinsic::riscv_masked_atomicrmw_max_i64; 5651 case AtomicRMWInst::Min: 5652 return Intrinsic::riscv_masked_atomicrmw_min_i64; 5653 case AtomicRMWInst::UMax: 5654 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 5655 case AtomicRMWInst::UMin: 5656 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 5657 } 5658 } 5659 5660 llvm_unreachable("Unexpected XLen\n"); 5661 } 5662 5663 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 5664 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 5665 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 5666 unsigned XLen = Subtarget.getXLen(); 5667 Value *Ordering = 5668 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 5669 Type *Tys[] = {AlignedAddr->getType()}; 5670 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 5671 AI->getModule(), 5672 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 5673 5674 if (XLen == 64) { 5675 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 5676 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 5677 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 5678 } 5679 5680 Value *Result; 5681 5682 // Must pass the shift amount needed to sign extend the loaded value prior 5683 // to performing a signed comparison for min/max. ShiftAmt is the number of 5684 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 5685 // is the number of bits to left+right shift the value in order to 5686 // sign-extend. 5687 if (AI->getOperation() == AtomicRMWInst::Min || 5688 AI->getOperation() == AtomicRMWInst::Max) { 5689 const DataLayout &DL = AI->getModule()->getDataLayout(); 5690 unsigned ValWidth = 5691 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 5692 Value *SextShamt = 5693 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 5694 Result = Builder.CreateCall(LrwOpScwLoop, 5695 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 5696 } else { 5697 Result = 5698 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 5699 } 5700 5701 if (XLen == 64) 5702 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 5703 return Result; 5704 } 5705 5706 TargetLowering::AtomicExpansionKind 5707 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 5708 AtomicCmpXchgInst *CI) const { 5709 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 5710 if (Size == 8 || Size == 16) 5711 return AtomicExpansionKind::MaskedIntrinsic; 5712 return AtomicExpansionKind::None; 5713 } 5714 5715 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 5716 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 5717 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 5718 unsigned XLen = Subtarget.getXLen(); 5719 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 5720 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 5721 if (XLen == 64) { 5722 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 5723 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 5724 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 5725 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 5726 } 5727 Type *Tys[] = {AlignedAddr->getType()}; 5728 Function *MaskedCmpXchg = 5729 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 5730 Value *Result = Builder.CreateCall( 5731 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 5732 if (XLen == 64) 5733 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 5734 return Result; 5735 } 5736 5737 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 5738 EVT VT) const { 5739 VT = VT.getScalarType(); 5740 5741 if (!VT.isSimple()) 5742 return false; 5743 5744 switch (VT.getSimpleVT().SimpleTy) { 5745 case MVT::f16: 5746 return Subtarget.hasStdExtZfh(); 5747 case MVT::f32: 5748 return Subtarget.hasStdExtF(); 5749 case MVT::f64: 5750 return Subtarget.hasStdExtD(); 5751 default: 5752 break; 5753 } 5754 5755 return false; 5756 } 5757 5758 Register RISCVTargetLowering::getExceptionPointerRegister( 5759 const Constant *PersonalityFn) const { 5760 return RISCV::X10; 5761 } 5762 5763 Register RISCVTargetLowering::getExceptionSelectorRegister( 5764 const Constant *PersonalityFn) const { 5765 return RISCV::X11; 5766 } 5767 5768 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 5769 // Return false to suppress the unnecessary extensions if the LibCall 5770 // arguments or return value is f32 type for LP64 ABI. 5771 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 5772 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 5773 return false; 5774 5775 return true; 5776 } 5777 5778 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 5779 if (Subtarget.is64Bit() && Type == MVT::i32) 5780 return true; 5781 5782 return IsSigned; 5783 } 5784 5785 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 5786 SDValue C) const { 5787 // Check integral scalar types. 5788 if (VT.isScalarInteger()) { 5789 // Omit the optimization if the sub target has the M extension and the data 5790 // size exceeds XLen. 5791 if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) 5792 return false; 5793 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 5794 // Break the MUL to a SLLI and an ADD/SUB. 5795 const APInt &Imm = ConstNode->getAPIntValue(); 5796 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 5797 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 5798 return true; 5799 // Omit the following optimization if the sub target has the M extension 5800 // and the data size >= XLen. 5801 if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) 5802 return false; 5803 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 5804 // a pair of LUI/ADDI. 5805 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { 5806 APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); 5807 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 5808 (1 - ImmS).isPowerOf2()) 5809 return true; 5810 } 5811 } 5812 } 5813 5814 return false; 5815 } 5816 5817 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { 5818 if (!Subtarget.useRVVForFixedLengthVectors()) 5819 return false; 5820 5821 if (!VT.isFixedLengthVector()) 5822 return false; 5823 5824 // Don't use RVV for vectors we cannot scalarize if required. 5825 switch (VT.getVectorElementType().SimpleTy) { 5826 // i1 is supported but has different rules. 5827 default: 5828 return false; 5829 case MVT::i1: 5830 // Masks can only use a single register. 5831 if (VT.getVectorNumElements() > Subtarget.getMinRVVVectorSizeInBits()) 5832 return false; 5833 break; 5834 case MVT::i8: 5835 case MVT::i16: 5836 case MVT::i32: 5837 case MVT::i64: 5838 break; 5839 case MVT::f16: 5840 if (!Subtarget.hasStdExtZfh()) 5841 return false; 5842 break; 5843 case MVT::f32: 5844 if (!Subtarget.hasStdExtF()) 5845 return false; 5846 break; 5847 case MVT::f64: 5848 if (!Subtarget.hasStdExtD()) 5849 return false; 5850 break; 5851 } 5852 5853 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 5854 // Don't use RVV for types that don't fit. 5855 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) 5856 return false; 5857 5858 // TODO: Perhaps an artificial restriction, but worth having whilst getting 5859 // the base fixed length RVV support in place. 5860 if (!VT.isPow2VectorType()) 5861 return false; 5862 5863 return true; 5864 } 5865 5866 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( 5867 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 5868 bool *Fast) const { 5869 if (!VT.isScalableVector()) 5870 return false; 5871 5872 EVT ElemVT = VT.getVectorElementType(); 5873 if (Alignment >= ElemVT.getStoreSize()) { 5874 if (Fast) 5875 *Fast = true; 5876 return true; 5877 } 5878 5879 return false; 5880 } 5881 5882 #define GET_REGISTER_MATCHER 5883 #include "RISCVGenAsmMatcher.inc" 5884 5885 Register 5886 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 5887 const MachineFunction &MF) const { 5888 Register Reg = MatchRegisterAltName(RegName); 5889 if (Reg == RISCV::NoRegister) 5890 Reg = MatchRegisterName(RegName); 5891 if (Reg == RISCV::NoRegister) 5892 report_fatal_error( 5893 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 5894 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 5895 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 5896 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 5897 StringRef(RegName) + "\".")); 5898 return Reg; 5899 } 5900 5901 namespace llvm { 5902 namespace RISCVVIntrinsicsTable { 5903 5904 #define GET_RISCVVIntrinsicsTable_IMPL 5905 #include "RISCVGenSearchableTables.inc" 5906 5907 } // namespace RISCVVIntrinsicsTable 5908 5909 } // namespace llvm 5910