1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/IntrinsicsRISCV.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/KnownBits.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "riscv-lower" 42 43 STATISTIC(NumTailCalls, "Number of tail calls"); 44 45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 46 const RISCVSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 if (Subtarget.isRV32E()) 50 report_fatal_error("Codegen not yet implemented for RV32E"); 51 52 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 53 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 54 55 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 56 !Subtarget.hasStdExtF()) { 57 errs() << "Hard-float 'f' ABI can't be used for a target that " 58 "doesn't support the F instruction set extension (ignoring " 59 "target-abi)\n"; 60 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 61 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 62 !Subtarget.hasStdExtD()) { 63 errs() << "Hard-float 'd' ABI can't be used for a target that " 64 "doesn't support the D instruction set extension (ignoring " 65 "target-abi)\n"; 66 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 67 } 68 69 switch (ABI) { 70 default: 71 report_fatal_error("Don't know how to lower this ABI"); 72 case RISCVABI::ABI_ILP32: 73 case RISCVABI::ABI_ILP32F: 74 case RISCVABI::ABI_ILP32D: 75 case RISCVABI::ABI_LP64: 76 case RISCVABI::ABI_LP64F: 77 case RISCVABI::ABI_LP64D: 78 break; 79 } 80 81 MVT XLenVT = Subtarget.getXLenVT(); 82 83 // Set up the register classes. 84 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 85 86 if (Subtarget.hasStdExtZfh()) 87 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 88 if (Subtarget.hasStdExtF()) 89 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 90 if (Subtarget.hasStdExtD()) 91 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 92 93 static const MVT::SimpleValueType BoolVecVTs[] = { 94 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 95 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 96 static const MVT::SimpleValueType IntVecVTs[] = { 97 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 98 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 99 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 100 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 101 MVT::nxv4i64, MVT::nxv8i64}; 102 static const MVT::SimpleValueType F16VecVTs[] = { 103 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 104 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 105 static const MVT::SimpleValueType F32VecVTs[] = { 106 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 107 static const MVT::SimpleValueType F64VecVTs[] = { 108 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 109 110 if (Subtarget.hasStdExtV()) { 111 auto addRegClassForRVV = [this](MVT VT) { 112 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 113 assert(Size <= 512 && isPowerOf2_32(Size)); 114 const TargetRegisterClass *RC; 115 if (Size <= 64) 116 RC = &RISCV::VRRegClass; 117 else if (Size == 128) 118 RC = &RISCV::VRM2RegClass; 119 else if (Size == 256) 120 RC = &RISCV::VRM4RegClass; 121 else 122 RC = &RISCV::VRM8RegClass; 123 124 addRegisterClass(VT, RC); 125 }; 126 127 for (MVT VT : BoolVecVTs) 128 addRegClassForRVV(VT); 129 for (MVT VT : IntVecVTs) 130 addRegClassForRVV(VT); 131 132 if (Subtarget.hasStdExtZfh()) 133 for (MVT VT : F16VecVTs) 134 addRegClassForRVV(VT); 135 136 if (Subtarget.hasStdExtF()) 137 for (MVT VT : F32VecVTs) 138 addRegClassForRVV(VT); 139 140 if (Subtarget.hasStdExtD()) 141 for (MVT VT : F64VecVTs) 142 addRegClassForRVV(VT); 143 144 if (Subtarget.useRVVForFixedLengthVectors()) { 145 auto addRegClassForFixedVectors = [this](MVT VT) { 146 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 147 const TargetRegisterClass *RC; 148 if (LMul == 1) 149 RC = &RISCV::VRRegClass; 150 else if (LMul == 2) 151 RC = &RISCV::VRM2RegClass; 152 else if (LMul == 4) 153 RC = &RISCV::VRM4RegClass; 154 else if (LMul == 8) 155 RC = &RISCV::VRM8RegClass; 156 else 157 llvm_unreachable("Unexpected LMul!"); 158 159 addRegisterClass(VT, RC); 160 }; 161 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 162 if (useRVVForFixedLengthVectorVT(VT)) 163 addRegClassForFixedVectors(VT); 164 165 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) 166 if (useRVVForFixedLengthVectorVT(VT)) 167 addRegClassForFixedVectors(VT); 168 } 169 } 170 171 // Compute derived properties from the register classes. 172 computeRegisterProperties(STI.getRegisterInfo()); 173 174 setStackPointerRegisterToSaveRestore(RISCV::X2); 175 176 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 177 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 178 179 // TODO: add all necessary setOperationAction calls. 180 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 181 182 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 183 setOperationAction(ISD::BR_CC, XLenVT, Expand); 184 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 185 186 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 187 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 188 189 setOperationAction(ISD::VASTART, MVT::Other, Custom); 190 setOperationAction(ISD::VAARG, MVT::Other, Expand); 191 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 192 setOperationAction(ISD::VAEND, MVT::Other, Expand); 193 194 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 195 if (!Subtarget.hasStdExtZbb()) { 196 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 197 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 198 } 199 200 if (Subtarget.is64Bit()) { 201 setOperationAction(ISD::ADD, MVT::i32, Custom); 202 setOperationAction(ISD::SUB, MVT::i32, Custom); 203 setOperationAction(ISD::SHL, MVT::i32, Custom); 204 setOperationAction(ISD::SRA, MVT::i32, Custom); 205 setOperationAction(ISD::SRL, MVT::i32, Custom); 206 } 207 208 if (!Subtarget.hasStdExtM()) { 209 setOperationAction(ISD::MUL, XLenVT, Expand); 210 setOperationAction(ISD::MULHS, XLenVT, Expand); 211 setOperationAction(ISD::MULHU, XLenVT, Expand); 212 setOperationAction(ISD::SDIV, XLenVT, Expand); 213 setOperationAction(ISD::UDIV, XLenVT, Expand); 214 setOperationAction(ISD::SREM, XLenVT, Expand); 215 setOperationAction(ISD::UREM, XLenVT, Expand); 216 } 217 218 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 219 setOperationAction(ISD::MUL, MVT::i32, Custom); 220 221 setOperationAction(ISD::SDIV, MVT::i8, Custom); 222 setOperationAction(ISD::UDIV, MVT::i8, Custom); 223 setOperationAction(ISD::UREM, MVT::i8, Custom); 224 setOperationAction(ISD::SDIV, MVT::i16, Custom); 225 setOperationAction(ISD::UDIV, MVT::i16, Custom); 226 setOperationAction(ISD::UREM, MVT::i16, Custom); 227 setOperationAction(ISD::SDIV, MVT::i32, Custom); 228 setOperationAction(ISD::UDIV, MVT::i32, Custom); 229 setOperationAction(ISD::UREM, MVT::i32, Custom); 230 } 231 232 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 233 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 234 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 235 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 236 237 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 238 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 239 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 240 241 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { 242 if (Subtarget.is64Bit()) { 243 setOperationAction(ISD::ROTL, MVT::i32, Custom); 244 setOperationAction(ISD::ROTR, MVT::i32, Custom); 245 } 246 } else { 247 setOperationAction(ISD::ROTL, XLenVT, Expand); 248 setOperationAction(ISD::ROTR, XLenVT, Expand); 249 } 250 251 if (Subtarget.hasStdExtZbp()) { 252 // Custom lower bswap/bitreverse so we can convert them to GREVI to enable 253 // more combining. 254 setOperationAction(ISD::BITREVERSE, XLenVT, Custom); 255 setOperationAction(ISD::BSWAP, XLenVT, Custom); 256 257 if (Subtarget.is64Bit()) { 258 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 259 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 260 } 261 } else { 262 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 263 // pattern match it directly in isel. 264 setOperationAction(ISD::BSWAP, XLenVT, 265 Subtarget.hasStdExtZbb() ? Legal : Expand); 266 } 267 268 if (Subtarget.hasStdExtZbb()) { 269 setOperationAction(ISD::SMIN, XLenVT, Legal); 270 setOperationAction(ISD::SMAX, XLenVT, Legal); 271 setOperationAction(ISD::UMIN, XLenVT, Legal); 272 setOperationAction(ISD::UMAX, XLenVT, Legal); 273 } else { 274 setOperationAction(ISD::CTTZ, XLenVT, Expand); 275 setOperationAction(ISD::CTLZ, XLenVT, Expand); 276 setOperationAction(ISD::CTPOP, XLenVT, Expand); 277 } 278 279 if (Subtarget.hasStdExtZbt()) { 280 setOperationAction(ISD::FSHL, XLenVT, Custom); 281 setOperationAction(ISD::FSHR, XLenVT, Custom); 282 setOperationAction(ISD::SELECT, XLenVT, Legal); 283 284 if (Subtarget.is64Bit()) { 285 setOperationAction(ISD::FSHL, MVT::i32, Custom); 286 setOperationAction(ISD::FSHR, MVT::i32, Custom); 287 } 288 } else { 289 setOperationAction(ISD::SELECT, XLenVT, Custom); 290 } 291 292 ISD::CondCode FPCCToExpand[] = { 293 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 294 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 295 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 296 297 ISD::NodeType FPOpToExpand[] = { 298 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, 299 ISD::FP_TO_FP16}; 300 301 if (Subtarget.hasStdExtZfh()) 302 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 303 304 if (Subtarget.hasStdExtZfh()) { 305 setOperationAction(ISD::FMINNUM, MVT::f16, Legal); 306 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); 307 for (auto CC : FPCCToExpand) 308 setCondCodeAction(CC, MVT::f16, Expand); 309 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 310 setOperationAction(ISD::SELECT, MVT::f16, Custom); 311 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 312 for (auto Op : FPOpToExpand) 313 setOperationAction(Op, MVT::f16, Expand); 314 } 315 316 if (Subtarget.hasStdExtF()) { 317 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 318 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 319 for (auto CC : FPCCToExpand) 320 setCondCodeAction(CC, MVT::f32, Expand); 321 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 322 setOperationAction(ISD::SELECT, MVT::f32, Custom); 323 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 324 for (auto Op : FPOpToExpand) 325 setOperationAction(Op, MVT::f32, Expand); 326 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 327 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 328 } 329 330 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 331 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 332 333 if (Subtarget.hasStdExtD()) { 334 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 335 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 336 for (auto CC : FPCCToExpand) 337 setCondCodeAction(CC, MVT::f64, Expand); 338 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 339 setOperationAction(ISD::SELECT, MVT::f64, Custom); 340 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 341 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 342 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 343 for (auto Op : FPOpToExpand) 344 setOperationAction(Op, MVT::f64, Expand); 345 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 346 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 347 } 348 349 if (Subtarget.is64Bit()) { 350 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 351 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 352 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 353 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 354 } 355 356 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 357 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 358 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 359 setOperationAction(ISD::JumpTable, XLenVT, Custom); 360 361 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 362 363 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 364 // Unfortunately this can't be determined just from the ISA naming string. 365 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 366 Subtarget.is64Bit() ? Legal : Custom); 367 368 setOperationAction(ISD::TRAP, MVT::Other, Legal); 369 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 370 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 371 372 if (Subtarget.hasStdExtA()) { 373 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 374 setMinCmpXchgSizeInBits(32); 375 } else { 376 setMaxAtomicSizeInBitsSupported(0); 377 } 378 379 setBooleanContents(ZeroOrOneBooleanContent); 380 381 if (Subtarget.hasStdExtV()) { 382 setBooleanVectorContents(ZeroOrOneBooleanContent); 383 384 setOperationAction(ISD::VSCALE, XLenVT, Custom); 385 386 // RVV intrinsics may have illegal operands. 387 // We also need to custom legalize vmv.x.s. 388 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); 389 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); 390 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 391 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); 392 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 393 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 394 395 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 396 397 if (Subtarget.is64Bit()) { 398 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 399 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 400 } else { 401 // We must custom-lower certain vXi64 operations on RV32 due to the vector 402 // element type being illegal. 403 setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom); 404 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); 405 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); 406 407 setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom); 408 setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom); 409 setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom); 410 setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom); 411 setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom); 412 setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom); 413 setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom); 414 setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom); 415 } 416 417 for (MVT VT : BoolVecVTs) { 418 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 419 420 // Mask VTs are custom-expanded into a series of standard nodes 421 setOperationAction(ISD::TRUNCATE, VT, Custom); 422 } 423 424 for (MVT VT : IntVecVTs) { 425 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 426 427 setOperationAction(ISD::SMIN, VT, Legal); 428 setOperationAction(ISD::SMAX, VT, Legal); 429 setOperationAction(ISD::UMIN, VT, Legal); 430 setOperationAction(ISD::UMAX, VT, Legal); 431 432 setOperationAction(ISD::ROTL, VT, Expand); 433 setOperationAction(ISD::ROTR, VT, Expand); 434 435 // Custom-lower extensions and truncations from/to mask types. 436 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 437 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 438 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 439 440 // RVV has native int->float & float->int conversions where the 441 // element type sizes are within one power-of-two of each other. Any 442 // wider distances between type sizes have to be lowered as sequences 443 // which progressively narrow the gap in stages. 444 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 445 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 446 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 447 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 448 449 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR" 450 // nodes which truncate by one power of two at a time. 451 setOperationAction(ISD::TRUNCATE, VT, Custom); 452 453 // Custom-lower insert/extract operations to simplify patterns. 454 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 455 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 456 457 // Custom-lower reduction operations to set up the corresponding custom 458 // nodes' operands. 459 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 460 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 461 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 462 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 463 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 464 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 465 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 466 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 467 } 468 469 // Expand various CCs to best match the RVV ISA, which natively supports UNE 470 // but no other unordered comparisons, and supports all ordered comparisons 471 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 472 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 473 // and we pattern-match those back to the "original", swapping operands once 474 // more. This way we catch both operations and both "vf" and "fv" forms with 475 // fewer patterns. 476 ISD::CondCode VFPCCToExpand[] = { 477 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 478 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 479 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 480 }; 481 482 // Sets common operation actions on RVV floating-point vector types. 483 const auto SetCommonVFPActions = [&](MVT VT) { 484 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 485 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 486 // sizes are within one power-of-two of each other. Therefore conversions 487 // between vXf16 and vXf64 must be lowered as sequences which convert via 488 // vXf32. 489 setOperationAction(ISD::FP_ROUND, VT, Custom); 490 setOperationAction(ISD::FP_EXTEND, VT, Custom); 491 // Custom-lower insert/extract operations to simplify patterns. 492 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 493 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 494 // Expand various condition codes (explained above). 495 for (auto CC : VFPCCToExpand) 496 setCondCodeAction(CC, VT, Expand); 497 498 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 499 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 500 }; 501 502 if (Subtarget.hasStdExtZfh()) 503 for (MVT VT : F16VecVTs) 504 SetCommonVFPActions(VT); 505 506 if (Subtarget.hasStdExtF()) 507 for (MVT VT : F32VecVTs) 508 SetCommonVFPActions(VT); 509 510 if (Subtarget.hasStdExtD()) 511 for (MVT VT : F64VecVTs) 512 SetCommonVFPActions(VT); 513 514 if (Subtarget.useRVVForFixedLengthVectors()) { 515 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 516 if (!useRVVForFixedLengthVectorVT(VT)) 517 continue; 518 519 // By default everything must be expanded. 520 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 521 setOperationAction(Op, VT, Expand); 522 523 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 524 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 525 526 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 527 528 setOperationAction(ISD::LOAD, VT, Custom); 529 setOperationAction(ISD::STORE, VT, Custom); 530 531 // Operations below are not valid for masks. 532 if (VT.getVectorElementType() == MVT::i1) 533 continue; 534 535 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 536 537 setOperationAction(ISD::ADD, VT, Custom); 538 setOperationAction(ISD::MUL, VT, Custom); 539 setOperationAction(ISD::SUB, VT, Custom); 540 setOperationAction(ISD::AND, VT, Custom); 541 setOperationAction(ISD::OR, VT, Custom); 542 setOperationAction(ISD::XOR, VT, Custom); 543 setOperationAction(ISD::SDIV, VT, Custom); 544 setOperationAction(ISD::SREM, VT, Custom); 545 setOperationAction(ISD::UDIV, VT, Custom); 546 setOperationAction(ISD::UREM, VT, Custom); 547 setOperationAction(ISD::SHL, VT, Custom); 548 setOperationAction(ISD::SRA, VT, Custom); 549 setOperationAction(ISD::SRL, VT, Custom); 550 } 551 552 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { 553 if (!useRVVForFixedLengthVectorVT(VT)) 554 continue; 555 556 // By default everything must be expanded. 557 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 558 setOperationAction(Op, VT, Expand); 559 560 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 561 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 562 563 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 564 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 565 566 setOperationAction(ISD::LOAD, VT, Custom); 567 setOperationAction(ISD::STORE, VT, Custom); 568 setOperationAction(ISD::FADD, VT, Custom); 569 setOperationAction(ISD::FSUB, VT, Custom); 570 setOperationAction(ISD::FMUL, VT, Custom); 571 setOperationAction(ISD::FDIV, VT, Custom); 572 setOperationAction(ISD::FNEG, VT, Custom); 573 setOperationAction(ISD::FMA, VT, Custom); 574 } 575 } 576 } 577 578 // Function alignments. 579 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 580 setMinFunctionAlignment(FunctionAlignment); 581 setPrefFunctionAlignment(FunctionAlignment); 582 583 setMinimumJumpTableEntries(5); 584 585 // Jumps are expensive, compared to logic 586 setJumpIsExpensive(); 587 588 // We can use any register for comparisons 589 setHasMultipleConditionRegisters(); 590 591 setTargetDAGCombine(ISD::SETCC); 592 if (Subtarget.hasStdExtZbp()) { 593 setTargetDAGCombine(ISD::OR); 594 } 595 } 596 597 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 598 LLVMContext &Context, 599 EVT VT) const { 600 if (!VT.isVector()) 601 return getPointerTy(DL); 602 if (Subtarget.hasStdExtV() && VT.isScalableVector()) 603 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 604 return VT.changeVectorElementTypeToInteger(); 605 } 606 607 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 608 const CallInst &I, 609 MachineFunction &MF, 610 unsigned Intrinsic) const { 611 switch (Intrinsic) { 612 default: 613 return false; 614 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 615 case Intrinsic::riscv_masked_atomicrmw_add_i32: 616 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 617 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 618 case Intrinsic::riscv_masked_atomicrmw_max_i32: 619 case Intrinsic::riscv_masked_atomicrmw_min_i32: 620 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 621 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 622 case Intrinsic::riscv_masked_cmpxchg_i32: 623 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 624 Info.opc = ISD::INTRINSIC_W_CHAIN; 625 Info.memVT = MVT::getVT(PtrTy->getElementType()); 626 Info.ptrVal = I.getArgOperand(0); 627 Info.offset = 0; 628 Info.align = Align(4); 629 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 630 MachineMemOperand::MOVolatile; 631 return true; 632 } 633 } 634 635 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 636 const AddrMode &AM, Type *Ty, 637 unsigned AS, 638 Instruction *I) const { 639 // No global is ever allowed as a base. 640 if (AM.BaseGV) 641 return false; 642 643 // Require a 12-bit signed offset. 644 if (!isInt<12>(AM.BaseOffs)) 645 return false; 646 647 switch (AM.Scale) { 648 case 0: // "r+i" or just "i", depending on HasBaseReg. 649 break; 650 case 1: 651 if (!AM.HasBaseReg) // allow "r+i". 652 break; 653 return false; // disallow "r+r" or "r+r+i". 654 default: 655 return false; 656 } 657 658 return true; 659 } 660 661 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 662 return isInt<12>(Imm); 663 } 664 665 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 666 return isInt<12>(Imm); 667 } 668 669 // On RV32, 64-bit integers are split into their high and low parts and held 670 // in two different registers, so the trunc is free since the low register can 671 // just be used. 672 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 673 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 674 return false; 675 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 676 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 677 return (SrcBits == 64 && DestBits == 32); 678 } 679 680 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 681 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 682 !SrcVT.isInteger() || !DstVT.isInteger()) 683 return false; 684 unsigned SrcBits = SrcVT.getSizeInBits(); 685 unsigned DestBits = DstVT.getSizeInBits(); 686 return (SrcBits == 64 && DestBits == 32); 687 } 688 689 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 690 // Zexts are free if they can be combined with a load. 691 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 692 EVT MemVT = LD->getMemoryVT(); 693 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 694 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 695 (LD->getExtensionType() == ISD::NON_EXTLOAD || 696 LD->getExtensionType() == ISD::ZEXTLOAD)) 697 return true; 698 } 699 700 return TargetLowering::isZExtFree(Val, VT2); 701 } 702 703 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 704 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 705 } 706 707 bool RISCVTargetLowering::isCheapToSpeculateCttz() const { 708 return Subtarget.hasStdExtZbb(); 709 } 710 711 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { 712 return Subtarget.hasStdExtZbb(); 713 } 714 715 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 716 bool ForCodeSize) const { 717 if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) 718 return false; 719 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 720 return false; 721 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 722 return false; 723 if (Imm.isNegZero()) 724 return false; 725 return Imm.isZero(); 726 } 727 728 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 729 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || 730 (VT == MVT::f32 && Subtarget.hasStdExtF()) || 731 (VT == MVT::f64 && Subtarget.hasStdExtD()); 732 } 733 734 // Changes the condition code and swaps operands if necessary, so the SetCC 735 // operation matches one of the comparisons supported directly in the RISC-V 736 // ISA. 737 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 738 switch (CC) { 739 default: 740 break; 741 case ISD::SETGT: 742 case ISD::SETLE: 743 case ISD::SETUGT: 744 case ISD::SETULE: 745 CC = ISD::getSetCCSwappedOperands(CC); 746 std::swap(LHS, RHS); 747 break; 748 } 749 } 750 751 // Return the RISC-V branch opcode that matches the given DAG integer 752 // condition code. The CondCode must be one of those supported by the RISC-V 753 // ISA (see normaliseSetCC). 754 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 755 switch (CC) { 756 default: 757 llvm_unreachable("Unsupported CondCode"); 758 case ISD::SETEQ: 759 return RISCV::BEQ; 760 case ISD::SETNE: 761 return RISCV::BNE; 762 case ISD::SETLT: 763 return RISCV::BLT; 764 case ISD::SETGE: 765 return RISCV::BGE; 766 case ISD::SETULT: 767 return RISCV::BLTU; 768 case ISD::SETUGE: 769 return RISCV::BGEU; 770 } 771 } 772 773 // Return the largest legal scalable vector type that matches VT's element type. 774 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, 775 const RISCVSubtarget &Subtarget) { 776 assert(VT.isFixedLengthVector() && 777 DAG.getTargetLoweringInfo().isTypeLegal(VT) && 778 "Expected legal fixed length vector!"); 779 780 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 781 assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!"); 782 783 MVT EltVT = VT.getVectorElementType(); 784 switch (EltVT.SimpleTy) { 785 default: 786 llvm_unreachable("unexpected element type for RVV container"); 787 case MVT::i1: { 788 // Masks are calculated assuming 8-bit elements since that's when we need 789 // the most elements. 790 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8; 791 return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock); 792 } 793 case MVT::i8: 794 case MVT::i16: 795 case MVT::i32: 796 case MVT::i64: 797 case MVT::f16: 798 case MVT::f32: 799 case MVT::f64: { 800 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits(); 801 return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock); 802 } 803 } 804 } 805 806 // Grow V to consume an entire RVV register. 807 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 808 const RISCVSubtarget &Subtarget) { 809 assert(VT.isScalableVector() && 810 "Expected to convert into a scalable vector!"); 811 assert(V.getValueType().isFixedLengthVector() && 812 "Expected a fixed length vector operand!"); 813 SDLoc DL(V); 814 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 815 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); 816 } 817 818 // Shrink V so it's just big enough to maintain a VT's worth of data. 819 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 820 const RISCVSubtarget &Subtarget) { 821 assert(VT.isFixedLengthVector() && 822 "Expected to convert into a fixed length vector!"); 823 assert(V.getValueType().isScalableVector() && 824 "Expected a scalable vector operand!"); 825 SDLoc DL(V); 826 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 827 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); 828 } 829 830 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 831 const RISCVSubtarget &Subtarget) { 832 MVT VT = Op.getSimpleValueType(); 833 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 834 835 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 836 837 SDLoc DL(Op); 838 SDValue VL = 839 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 840 841 if (VT.getVectorElementType() == MVT::i1) { 842 if (ISD::isBuildVectorAllZeros(Op.getNode())) { 843 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); 844 return convertFromScalableVector(VT, VMClr, DAG, Subtarget); 845 } 846 847 if (ISD::isBuildVectorAllOnes(Op.getNode())) { 848 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 849 return convertFromScalableVector(VT, VMSet, DAG, Subtarget); 850 } 851 852 return SDValue(); 853 } 854 855 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 856 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 857 : RISCVISD::VMV_V_X_VL; 858 Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL); 859 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 860 } 861 862 // Try and match an index sequence, which we can lower directly to the vid 863 // instruction. An all-undef vector is matched by getSplatValue, above. 864 bool IsVID = true; 865 if (VT.isInteger()) 866 for (unsigned i = 0, e = Op.getNumOperands(); i < e && IsVID; i++) 867 IsVID &= Op.getOperand(i).isUndef() || 868 (isa<ConstantSDNode>(Op.getOperand(i)) && 869 Op.getConstantOperandVal(i) == i); 870 871 if (IsVID) { 872 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 873 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 874 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); 875 return convertFromScalableVector(VT, VID, DAG, Subtarget); 876 } 877 878 return SDValue(); 879 } 880 881 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, 882 const RISCVSubtarget &Subtarget) { 883 SDValue V1 = Op.getOperand(0); 884 SDLoc DL(Op); 885 MVT VT = Op.getSimpleValueType(); 886 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 887 888 if (SVN->isSplat()) { 889 int Lane = SVN->getSplatIndex(); 890 if (Lane >= 0) { 891 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 892 893 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 894 assert(Lane < (int)VT.getVectorNumElements() && "Unexpected lane!"); 895 896 MVT XLenVT = Subtarget.getXLenVT(); 897 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 898 MVT MaskVT = 899 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 900 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 901 SDValue Gather = 902 DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1, 903 DAG.getConstant(Lane, DL, XLenVT), Mask, VL); 904 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 905 } 906 } 907 908 return SDValue(); 909 } 910 911 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 912 SelectionDAG &DAG) const { 913 switch (Op.getOpcode()) { 914 default: 915 report_fatal_error("unimplemented operand"); 916 case ISD::GlobalAddress: 917 return lowerGlobalAddress(Op, DAG); 918 case ISD::BlockAddress: 919 return lowerBlockAddress(Op, DAG); 920 case ISD::ConstantPool: 921 return lowerConstantPool(Op, DAG); 922 case ISD::JumpTable: 923 return lowerJumpTable(Op, DAG); 924 case ISD::GlobalTLSAddress: 925 return lowerGlobalTLSAddress(Op, DAG); 926 case ISD::SELECT: 927 return lowerSELECT(Op, DAG); 928 case ISD::VASTART: 929 return lowerVASTART(Op, DAG); 930 case ISD::FRAMEADDR: 931 return lowerFRAMEADDR(Op, DAG); 932 case ISD::RETURNADDR: 933 return lowerRETURNADDR(Op, DAG); 934 case ISD::SHL_PARTS: 935 return lowerShiftLeftParts(Op, DAG); 936 case ISD::SRA_PARTS: 937 return lowerShiftRightParts(Op, DAG, true); 938 case ISD::SRL_PARTS: 939 return lowerShiftRightParts(Op, DAG, false); 940 case ISD::BITCAST: { 941 assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) || 942 Subtarget.hasStdExtZfh()) && 943 "Unexpected custom legalisation"); 944 SDLoc DL(Op); 945 SDValue Op0 = Op.getOperand(0); 946 if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) { 947 if (Op0.getValueType() != MVT::i16) 948 return SDValue(); 949 SDValue NewOp0 = 950 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0); 951 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 952 return FPConv; 953 } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && 954 Subtarget.hasStdExtF()) { 955 if (Op0.getValueType() != MVT::i32) 956 return SDValue(); 957 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 958 SDValue FPConv = 959 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 960 return FPConv; 961 } 962 return SDValue(); 963 } 964 case ISD::INTRINSIC_WO_CHAIN: 965 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 966 case ISD::INTRINSIC_W_CHAIN: 967 return LowerINTRINSIC_W_CHAIN(Op, DAG); 968 case ISD::BSWAP: 969 case ISD::BITREVERSE: { 970 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. 971 assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 972 MVT VT = Op.getSimpleValueType(); 973 SDLoc DL(Op); 974 // Start with the maximum immediate value which is the bitwidth - 1. 975 unsigned Imm = VT.getSizeInBits() - 1; 976 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. 977 if (Op.getOpcode() == ISD::BSWAP) 978 Imm &= ~0x7U; 979 return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0), 980 DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT())); 981 } 982 case ISD::FSHL: 983 case ISD::FSHR: { 984 MVT VT = Op.getSimpleValueType(); 985 assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization"); 986 SDLoc DL(Op); 987 // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only 988 // use log(XLen) bits. Mask the shift amount accordingly. 989 unsigned ShAmtWidth = Subtarget.getXLen() - 1; 990 SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2), 991 DAG.getConstant(ShAmtWidth, DL, VT)); 992 unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR; 993 return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt); 994 } 995 case ISD::TRUNCATE: { 996 SDLoc DL(Op); 997 EVT VT = Op.getValueType(); 998 // Only custom-lower vector truncates 999 if (!VT.isVector()) 1000 return Op; 1001 1002 // Truncates to mask types are handled differently 1003 if (VT.getVectorElementType() == MVT::i1) 1004 return lowerVectorMaskTrunc(Op, DAG); 1005 1006 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 1007 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR" nodes which 1008 // truncate by one power of two at a time. 1009 EVT DstEltVT = VT.getVectorElementType(); 1010 1011 SDValue Src = Op.getOperand(0); 1012 EVT SrcVT = Src.getValueType(); 1013 EVT SrcEltVT = SrcVT.getVectorElementType(); 1014 1015 assert(DstEltVT.bitsLT(SrcEltVT) && 1016 isPowerOf2_64(DstEltVT.getSizeInBits()) && 1017 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 1018 "Unexpected vector truncate lowering"); 1019 1020 SDValue Result = Src; 1021 LLVMContext &Context = *DAG.getContext(); 1022 const ElementCount Count = SrcVT.getVectorElementCount(); 1023 do { 1024 SrcEltVT = EVT::getIntegerVT(Context, SrcEltVT.getSizeInBits() / 2); 1025 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 1026 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR, DL, ResultVT, Result); 1027 } while (SrcEltVT != DstEltVT); 1028 1029 return Result; 1030 } 1031 case ISD::ANY_EXTEND: 1032 case ISD::ZERO_EXTEND: 1033 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 1034 case ISD::SIGN_EXTEND: 1035 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 1036 case ISD::SPLAT_VECTOR: 1037 return lowerSPLATVECTOR(Op, DAG); 1038 case ISD::INSERT_VECTOR_ELT: 1039 return lowerINSERT_VECTOR_ELT(Op, DAG); 1040 case ISD::EXTRACT_VECTOR_ELT: 1041 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 1042 case ISD::VSCALE: { 1043 MVT VT = Op.getSimpleValueType(); 1044 SDLoc DL(Op); 1045 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); 1046 // We define our scalable vector types for lmul=1 to use a 64 bit known 1047 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 1048 // vscale as VLENB / 8. 1049 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, 1050 DAG.getConstant(3, DL, VT)); 1051 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); 1052 } 1053 case ISD::FP_EXTEND: { 1054 // RVV can only do fp_extend to types double the size as the source. We 1055 // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going 1056 // via f32. 1057 MVT VT = Op.getSimpleValueType(); 1058 MVT SrcVT = Op.getOperand(0).getSimpleValueType(); 1059 // We only need to close the gap between vXf16->vXf64. 1060 if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 || 1061 SrcVT.getVectorElementType() != MVT::f16) 1062 return Op; 1063 SDLoc DL(Op); 1064 MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1065 SDValue IntermediateRound = 1066 DAG.getFPExtendOrRound(Op.getOperand(0), DL, InterVT); 1067 return DAG.getFPExtendOrRound(IntermediateRound, DL, VT); 1068 } 1069 case ISD::FP_ROUND: { 1070 // RVV can only do fp_round to types half the size as the source. We 1071 // custom-lower f64->f16 rounds via RVV's round-to-odd float 1072 // conversion instruction. 1073 MVT VT = Op.getSimpleValueType(); 1074 MVT SrcVT = Op.getOperand(0).getSimpleValueType(); 1075 // We only need to close the gap between vXf64<->vXf16. 1076 if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || 1077 SrcVT.getVectorElementType() != MVT::f64) 1078 return Op; 1079 SDLoc DL(Op); 1080 MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1081 SDValue IntermediateRound = 1082 DAG.getNode(RISCVISD::VFNCVT_ROD, DL, InterVT, Op.getOperand(0)); 1083 return DAG.getFPExtendOrRound(IntermediateRound, DL, VT); 1084 } 1085 case ISD::FP_TO_SINT: 1086 case ISD::FP_TO_UINT: 1087 case ISD::SINT_TO_FP: 1088 case ISD::UINT_TO_FP: { 1089 // RVV can only do fp<->int conversions to types half/double the size as 1090 // the source. We custom-lower any conversions that do two hops into 1091 // sequences. 1092 MVT VT = Op.getSimpleValueType(); 1093 if (!VT.isVector()) 1094 return Op; 1095 SDLoc DL(Op); 1096 SDValue Src = Op.getOperand(0); 1097 MVT EltVT = VT.getVectorElementType(); 1098 MVT SrcEltVT = Src.getSimpleValueType().getVectorElementType(); 1099 unsigned EltSize = EltVT.getSizeInBits(); 1100 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 1101 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 1102 "Unexpected vector element types"); 1103 bool IsInt2FP = SrcEltVT.isInteger(); 1104 // Widening conversions 1105 if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) { 1106 if (IsInt2FP) { 1107 // Do a regular integer sign/zero extension then convert to float. 1108 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()), 1109 VT.getVectorElementCount()); 1110 unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP 1111 ? ISD::ZERO_EXTEND 1112 : ISD::SIGN_EXTEND; 1113 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 1114 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 1115 } 1116 // FP2Int 1117 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 1118 // Do one doubling fp_extend then complete the operation by converting 1119 // to int. 1120 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1121 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 1122 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 1123 } 1124 1125 // Narrowing conversions 1126 if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) { 1127 if (IsInt2FP) { 1128 // One narrowing int_to_fp, then an fp_round. 1129 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 1130 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1131 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 1132 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 1133 } 1134 // FP2Int 1135 // One narrowing fp_to_int, then truncate the integer. If the float isn't 1136 // representable by the integer, the result is poison. 1137 MVT IVecVT = 1138 MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2), 1139 VT.getVectorElementCount()); 1140 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 1141 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 1142 } 1143 1144 return Op; 1145 } 1146 case ISD::VECREDUCE_ADD: 1147 case ISD::VECREDUCE_UMAX: 1148 case ISD::VECREDUCE_SMAX: 1149 case ISD::VECREDUCE_UMIN: 1150 case ISD::VECREDUCE_SMIN: 1151 case ISD::VECREDUCE_AND: 1152 case ISD::VECREDUCE_OR: 1153 case ISD::VECREDUCE_XOR: 1154 return lowerVECREDUCE(Op, DAG); 1155 case ISD::VECREDUCE_FADD: 1156 case ISD::VECREDUCE_SEQ_FADD: 1157 return lowerFPVECREDUCE(Op, DAG); 1158 case ISD::BUILD_VECTOR: 1159 return lowerBUILD_VECTOR(Op, DAG, Subtarget); 1160 case ISD::VECTOR_SHUFFLE: 1161 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); 1162 case ISD::LOAD: 1163 return lowerFixedLengthVectorLoadToRVV(Op, DAG); 1164 case ISD::STORE: 1165 return lowerFixedLengthVectorStoreToRVV(Op, DAG); 1166 case ISD::ADD: 1167 return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL); 1168 case ISD::SUB: 1169 return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL); 1170 case ISD::MUL: 1171 return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL); 1172 case ISD::AND: 1173 return lowerToScalableOp(Op, DAG, RISCVISD::AND_VL); 1174 case ISD::OR: 1175 return lowerToScalableOp(Op, DAG, RISCVISD::OR_VL); 1176 case ISD::XOR: 1177 return lowerToScalableOp(Op, DAG, RISCVISD::XOR_VL); 1178 case ISD::SDIV: 1179 return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL); 1180 case ISD::SREM: 1181 return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL); 1182 case ISD::UDIV: 1183 return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL); 1184 case ISD::UREM: 1185 return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL); 1186 case ISD::SHL: 1187 return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL); 1188 case ISD::SRA: 1189 return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL); 1190 case ISD::SRL: 1191 return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL); 1192 case ISD::FADD: 1193 return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL); 1194 case ISD::FSUB: 1195 return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL); 1196 case ISD::FMUL: 1197 return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL); 1198 case ISD::FDIV: 1199 return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL); 1200 case ISD::FNEG: 1201 return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL); 1202 case ISD::FMA: 1203 return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL); 1204 } 1205 } 1206 1207 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 1208 SelectionDAG &DAG, unsigned Flags) { 1209 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 1210 } 1211 1212 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 1213 SelectionDAG &DAG, unsigned Flags) { 1214 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 1215 Flags); 1216 } 1217 1218 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 1219 SelectionDAG &DAG, unsigned Flags) { 1220 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 1221 N->getOffset(), Flags); 1222 } 1223 1224 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 1225 SelectionDAG &DAG, unsigned Flags) { 1226 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 1227 } 1228 1229 template <class NodeTy> 1230 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 1231 bool IsLocal) const { 1232 SDLoc DL(N); 1233 EVT Ty = getPointerTy(DAG.getDataLayout()); 1234 1235 if (isPositionIndependent()) { 1236 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1237 if (IsLocal) 1238 // Use PC-relative addressing to access the symbol. This generates the 1239 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 1240 // %pcrel_lo(auipc)). 1241 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1242 1243 // Use PC-relative addressing to access the GOT for this symbol, then load 1244 // the address from the GOT. This generates the pattern (PseudoLA sym), 1245 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 1246 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 1247 } 1248 1249 switch (getTargetMachine().getCodeModel()) { 1250 default: 1251 report_fatal_error("Unsupported code model for lowering"); 1252 case CodeModel::Small: { 1253 // Generate a sequence for accessing addresses within the first 2 GiB of 1254 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 1255 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 1256 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 1257 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1258 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 1259 } 1260 case CodeModel::Medium: { 1261 // Generate a sequence for accessing addresses within any 2GiB range within 1262 // the address space. This generates the pattern (PseudoLLA sym), which 1263 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 1264 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1265 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1266 } 1267 } 1268 } 1269 1270 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 1271 SelectionDAG &DAG) const { 1272 SDLoc DL(Op); 1273 EVT Ty = Op.getValueType(); 1274 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1275 int64_t Offset = N->getOffset(); 1276 MVT XLenVT = Subtarget.getXLenVT(); 1277 1278 const GlobalValue *GV = N->getGlobal(); 1279 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 1280 SDValue Addr = getAddr(N, DAG, IsLocal); 1281 1282 // In order to maximise the opportunity for common subexpression elimination, 1283 // emit a separate ADD node for the global address offset instead of folding 1284 // it in the global address node. Later peephole optimisations may choose to 1285 // fold it back in when profitable. 1286 if (Offset != 0) 1287 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1288 DAG.getConstant(Offset, DL, XLenVT)); 1289 return Addr; 1290 } 1291 1292 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 1293 SelectionDAG &DAG) const { 1294 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 1295 1296 return getAddr(N, DAG); 1297 } 1298 1299 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 1300 SelectionDAG &DAG) const { 1301 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 1302 1303 return getAddr(N, DAG); 1304 } 1305 1306 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 1307 SelectionDAG &DAG) const { 1308 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 1309 1310 return getAddr(N, DAG); 1311 } 1312 1313 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 1314 SelectionDAG &DAG, 1315 bool UseGOT) const { 1316 SDLoc DL(N); 1317 EVT Ty = getPointerTy(DAG.getDataLayout()); 1318 const GlobalValue *GV = N->getGlobal(); 1319 MVT XLenVT = Subtarget.getXLenVT(); 1320 1321 if (UseGOT) { 1322 // Use PC-relative addressing to access the GOT for this TLS symbol, then 1323 // load the address from the GOT and add the thread pointer. This generates 1324 // the pattern (PseudoLA_TLS_IE sym), which expands to 1325 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 1326 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1327 SDValue Load = 1328 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 1329 1330 // Add the thread pointer. 1331 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1332 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 1333 } 1334 1335 // Generate a sequence for accessing the address relative to the thread 1336 // pointer, with the appropriate adjustment for the thread pointer offset. 1337 // This generates the pattern 1338 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 1339 SDValue AddrHi = 1340 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 1341 SDValue AddrAdd = 1342 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 1343 SDValue AddrLo = 1344 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 1345 1346 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1347 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1348 SDValue MNAdd = SDValue( 1349 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 1350 0); 1351 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 1352 } 1353 1354 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 1355 SelectionDAG &DAG) const { 1356 SDLoc DL(N); 1357 EVT Ty = getPointerTy(DAG.getDataLayout()); 1358 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 1359 const GlobalValue *GV = N->getGlobal(); 1360 1361 // Use a PC-relative addressing mode to access the global dynamic GOT address. 1362 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 1363 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 1364 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1365 SDValue Load = 1366 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 1367 1368 // Prepare argument list to generate call. 1369 ArgListTy Args; 1370 ArgListEntry Entry; 1371 Entry.Node = Load; 1372 Entry.Ty = CallTy; 1373 Args.push_back(Entry); 1374 1375 // Setup call to __tls_get_addr. 1376 TargetLowering::CallLoweringInfo CLI(DAG); 1377 CLI.setDebugLoc(DL) 1378 .setChain(DAG.getEntryNode()) 1379 .setLibCallee(CallingConv::C, CallTy, 1380 DAG.getExternalSymbol("__tls_get_addr", Ty), 1381 std::move(Args)); 1382 1383 return LowerCallTo(CLI).first; 1384 } 1385 1386 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 1387 SelectionDAG &DAG) const { 1388 SDLoc DL(Op); 1389 EVT Ty = Op.getValueType(); 1390 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1391 int64_t Offset = N->getOffset(); 1392 MVT XLenVT = Subtarget.getXLenVT(); 1393 1394 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 1395 1396 if (DAG.getMachineFunction().getFunction().getCallingConv() == 1397 CallingConv::GHC) 1398 report_fatal_error("In GHC calling convention TLS is not supported"); 1399 1400 SDValue Addr; 1401 switch (Model) { 1402 case TLSModel::LocalExec: 1403 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 1404 break; 1405 case TLSModel::InitialExec: 1406 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 1407 break; 1408 case TLSModel::LocalDynamic: 1409 case TLSModel::GeneralDynamic: 1410 Addr = getDynamicTLSAddr(N, DAG); 1411 break; 1412 } 1413 1414 // In order to maximise the opportunity for common subexpression elimination, 1415 // emit a separate ADD node for the global address offset instead of folding 1416 // it in the global address node. Later peephole optimisations may choose to 1417 // fold it back in when profitable. 1418 if (Offset != 0) 1419 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1420 DAG.getConstant(Offset, DL, XLenVT)); 1421 return Addr; 1422 } 1423 1424 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 1425 SDValue CondV = Op.getOperand(0); 1426 SDValue TrueV = Op.getOperand(1); 1427 SDValue FalseV = Op.getOperand(2); 1428 SDLoc DL(Op); 1429 MVT XLenVT = Subtarget.getXLenVT(); 1430 1431 // If the result type is XLenVT and CondV is the output of a SETCC node 1432 // which also operated on XLenVT inputs, then merge the SETCC node into the 1433 // lowered RISCVISD::SELECT_CC to take advantage of the integer 1434 // compare+branch instructions. i.e.: 1435 // (select (setcc lhs, rhs, cc), truev, falsev) 1436 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 1437 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 1438 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 1439 SDValue LHS = CondV.getOperand(0); 1440 SDValue RHS = CondV.getOperand(1); 1441 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 1442 ISD::CondCode CCVal = CC->get(); 1443 1444 normaliseSetCC(LHS, RHS, CCVal); 1445 1446 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 1447 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 1448 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1449 } 1450 1451 // Otherwise: 1452 // (select condv, truev, falsev) 1453 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 1454 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 1455 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 1456 1457 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 1458 1459 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1460 } 1461 1462 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 1463 MachineFunction &MF = DAG.getMachineFunction(); 1464 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 1465 1466 SDLoc DL(Op); 1467 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1468 getPointerTy(MF.getDataLayout())); 1469 1470 // vastart just stores the address of the VarArgsFrameIndex slot into the 1471 // memory location argument. 1472 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1473 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 1474 MachinePointerInfo(SV)); 1475 } 1476 1477 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 1478 SelectionDAG &DAG) const { 1479 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1480 MachineFunction &MF = DAG.getMachineFunction(); 1481 MachineFrameInfo &MFI = MF.getFrameInfo(); 1482 MFI.setFrameAddressIsTaken(true); 1483 Register FrameReg = RI.getFrameRegister(MF); 1484 int XLenInBytes = Subtarget.getXLen() / 8; 1485 1486 EVT VT = Op.getValueType(); 1487 SDLoc DL(Op); 1488 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 1489 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1490 while (Depth--) { 1491 int Offset = -(XLenInBytes * 2); 1492 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 1493 DAG.getIntPtrConstant(Offset, DL)); 1494 FrameAddr = 1495 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 1496 } 1497 return FrameAddr; 1498 } 1499 1500 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 1501 SelectionDAG &DAG) const { 1502 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1503 MachineFunction &MF = DAG.getMachineFunction(); 1504 MachineFrameInfo &MFI = MF.getFrameInfo(); 1505 MFI.setReturnAddressIsTaken(true); 1506 MVT XLenVT = Subtarget.getXLenVT(); 1507 int XLenInBytes = Subtarget.getXLen() / 8; 1508 1509 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 1510 return SDValue(); 1511 1512 EVT VT = Op.getValueType(); 1513 SDLoc DL(Op); 1514 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1515 if (Depth) { 1516 int Off = -XLenInBytes; 1517 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 1518 SDValue Offset = DAG.getConstant(Off, DL, VT); 1519 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 1520 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 1521 MachinePointerInfo()); 1522 } 1523 1524 // Return the value of the return address register, marking it an implicit 1525 // live-in. 1526 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 1527 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 1528 } 1529 1530 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 1531 SelectionDAG &DAG) const { 1532 SDLoc DL(Op); 1533 SDValue Lo = Op.getOperand(0); 1534 SDValue Hi = Op.getOperand(1); 1535 SDValue Shamt = Op.getOperand(2); 1536 EVT VT = Lo.getValueType(); 1537 1538 // if Shamt-XLEN < 0: // Shamt < XLEN 1539 // Lo = Lo << Shamt 1540 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 1541 // else: 1542 // Lo = 0 1543 // Hi = Lo << (Shamt-XLEN) 1544 1545 SDValue Zero = DAG.getConstant(0, DL, VT); 1546 SDValue One = DAG.getConstant(1, DL, VT); 1547 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1548 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1549 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1550 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1551 1552 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 1553 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 1554 SDValue ShiftRightLo = 1555 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 1556 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 1557 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 1558 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 1559 1560 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1561 1562 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 1563 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1564 1565 SDValue Parts[2] = {Lo, Hi}; 1566 return DAG.getMergeValues(Parts, DL); 1567 } 1568 1569 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 1570 bool IsSRA) const { 1571 SDLoc DL(Op); 1572 SDValue Lo = Op.getOperand(0); 1573 SDValue Hi = Op.getOperand(1); 1574 SDValue Shamt = Op.getOperand(2); 1575 EVT VT = Lo.getValueType(); 1576 1577 // SRA expansion: 1578 // if Shamt-XLEN < 0: // Shamt < XLEN 1579 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1580 // Hi = Hi >>s Shamt 1581 // else: 1582 // Lo = Hi >>s (Shamt-XLEN); 1583 // Hi = Hi >>s (XLEN-1) 1584 // 1585 // SRL expansion: 1586 // if Shamt-XLEN < 0: // Shamt < XLEN 1587 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1588 // Hi = Hi >>u Shamt 1589 // else: 1590 // Lo = Hi >>u (Shamt-XLEN); 1591 // Hi = 0; 1592 1593 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 1594 1595 SDValue Zero = DAG.getConstant(0, DL, VT); 1596 SDValue One = DAG.getConstant(1, DL, VT); 1597 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1598 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1599 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1600 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1601 1602 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 1603 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 1604 SDValue ShiftLeftHi = 1605 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 1606 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 1607 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 1608 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 1609 SDValue HiFalse = 1610 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 1611 1612 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1613 1614 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 1615 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1616 1617 SDValue Parts[2] = {Lo, Hi}; 1618 return DAG.getMergeValues(Parts, DL); 1619 } 1620 1621 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is 1622 // illegal (currently only vXi64 RV32). 1623 // FIXME: We could also catch non-constant sign-extended i32 values and lower 1624 // them to SPLAT_VECTOR_I64 1625 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op, 1626 SelectionDAG &DAG) const { 1627 SDLoc DL(Op); 1628 EVT VecVT = Op.getValueType(); 1629 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 1630 "Unexpected SPLAT_VECTOR lowering"); 1631 SDValue SplatVal = Op.getOperand(0); 1632 1633 // If we can prove that the value is a sign-extended 32-bit value, lower this 1634 // as a custom node in order to try and match RVV vector/scalar instructions. 1635 if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) { 1636 if (isInt<32>(CVal->getSExtValue())) 1637 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1638 DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32)); 1639 } 1640 1641 if (SplatVal.getOpcode() == ISD::SIGN_EXTEND && 1642 SplatVal.getOperand(0).getValueType() == MVT::i32) { 1643 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1644 SplatVal.getOperand(0)); 1645 } 1646 1647 // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not 1648 // to accidentally sign-extend the 32-bit halves to the e64 SEW: 1649 // vmv.v.x vX, hi 1650 // vsll.vx vX, vX, /*32*/ 1651 // vmv.v.x vY, lo 1652 // vsll.vx vY, vY, /*32*/ 1653 // vsrl.vx vY, vY, /*32*/ 1654 // vor.vv vX, vX, vY 1655 SDValue One = DAG.getConstant(1, DL, MVT::i32); 1656 SDValue Zero = DAG.getConstant(0, DL, MVT::i32); 1657 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT); 1658 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero); 1659 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One); 1660 1661 Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 1662 Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV); 1663 Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV); 1664 1665 if (isNullConstant(Hi)) 1666 return Lo; 1667 1668 Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi); 1669 Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV); 1670 1671 return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi); 1672 } 1673 1674 // Custom-lower extensions from mask vectors by using a vselect either with 1 1675 // for zero/any-extension or -1 for sign-extension: 1676 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 1677 // Note that any-extension is lowered identically to zero-extension. 1678 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 1679 int64_t ExtTrueVal) const { 1680 SDLoc DL(Op); 1681 EVT VecVT = Op.getValueType(); 1682 SDValue Src = Op.getOperand(0); 1683 // Only custom-lower extensions from mask types 1684 if (!Src.getValueType().isVector() || 1685 Src.getValueType().getVectorElementType() != MVT::i1) 1686 return Op; 1687 1688 // Be careful not to introduce illegal scalar types at this stage, and be 1689 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 1690 // illegal and must be expanded. Since we know that the constants are 1691 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 1692 bool IsRV32E64 = 1693 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 1694 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1695 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, Subtarget.getXLenVT()); 1696 1697 if (!IsRV32E64) { 1698 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 1699 SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal); 1700 } else { 1701 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 1702 SplatTrueVal = 1703 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal); 1704 } 1705 1706 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 1707 } 1708 1709 // Custom-lower truncations from vectors to mask vectors by using a mask and a 1710 // setcc operation: 1711 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 1712 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, 1713 SelectionDAG &DAG) const { 1714 SDLoc DL(Op); 1715 EVT MaskVT = Op.getValueType(); 1716 // Only expect to custom-lower truncations to mask types 1717 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 1718 "Unexpected type for vector mask lowering"); 1719 SDValue Src = Op.getOperand(0); 1720 EVT VecVT = Src.getValueType(); 1721 1722 // Be careful not to introduce illegal scalar types at this stage, and be 1723 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 1724 // illegal and must be expanded. Since we know that the constants are 1725 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 1726 bool IsRV32E64 = 1727 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 1728 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 1729 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1730 1731 if (!IsRV32E64) { 1732 SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne); 1733 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 1734 } else { 1735 SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne); 1736 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 1737 } 1738 1739 SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); 1740 1741 return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); 1742 } 1743 1744 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 1745 SelectionDAG &DAG) const { 1746 SDLoc DL(Op); 1747 EVT VecVT = Op.getValueType(); 1748 SDValue Vec = Op.getOperand(0); 1749 SDValue Val = Op.getOperand(1); 1750 SDValue Idx = Op.getOperand(2); 1751 1752 // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is 1753 // first slid down into position, the value is inserted into the first 1754 // position, and the vector is slid back up. We do this to simplify patterns. 1755 // (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx), 1756 if (Subtarget.is64Bit() || VecVT.getVectorElementType() != MVT::i64) { 1757 if (isNullConstant(Idx)) 1758 return Op; 1759 SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, 1760 DAG.getUNDEF(VecVT), Vec, Idx); 1761 SDValue InsertElt0 = 1762 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, Slidedown, Val, 1763 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 1764 1765 return DAG.getNode(RISCVISD::VSLIDEUP, DL, VecVT, Vec, InsertElt0, Idx); 1766 } 1767 1768 // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type 1769 // is illegal (currently only vXi64 RV32). 1770 // Since there is no easy way of getting a single element into a vector when 1771 // XLEN<SEW, we lower the operation to the following sequence: 1772 // splat vVal, rVal 1773 // vid.v vVid 1774 // vmseq.vx mMask, vVid, rIdx 1775 // vmerge.vvm vDest, vSrc, vVal, mMask 1776 // This essentially merges the original vector with the inserted element by 1777 // using a mask whose only set bit is that corresponding to the insert 1778 // index. 1779 SDValue SplattedVal = DAG.getSplatVector(VecVT, DL, Val); 1780 SDValue SplattedIdx = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Idx); 1781 1782 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()); 1783 MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); 1784 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 1785 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VecVT, Mask, VL); 1786 auto SetCCVT = 1787 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VecVT); 1788 SDValue SelectCond = DAG.getSetCC(DL, SetCCVT, VID, SplattedIdx, ISD::SETEQ); 1789 1790 return DAG.getNode(ISD::VSELECT, DL, VecVT, SelectCond, SplattedVal, Vec); 1791 } 1792 1793 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 1794 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer 1795 // types this is done using VMV_X_S to allow us to glean information about the 1796 // sign bits of the result. 1797 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 1798 SelectionDAG &DAG) const { 1799 SDLoc DL(Op); 1800 SDValue Idx = Op.getOperand(1); 1801 SDValue Vec = Op.getOperand(0); 1802 EVT EltVT = Op.getValueType(); 1803 EVT VecVT = Vec.getValueType(); 1804 MVT XLenVT = Subtarget.getXLenVT(); 1805 1806 // If the index is 0, the vector is already in the right position. 1807 if (!isNullConstant(Idx)) { 1808 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, DAG.getUNDEF(VecVT), Vec, 1809 Idx); 1810 } 1811 1812 if (!EltVT.isInteger()) { 1813 // Floating-point extracts are handled in TableGen. 1814 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 1815 DAG.getConstant(0, DL, XLenVT)); 1816 } 1817 1818 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 1819 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 1820 } 1821 1822 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 1823 SelectionDAG &DAG) const { 1824 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1825 SDLoc DL(Op); 1826 1827 if (Subtarget.hasStdExtV()) { 1828 // Some RVV intrinsics may claim that they want an integer operand to be 1829 // extended. 1830 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 1831 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 1832 if (II->ExtendedOperand) { 1833 assert(II->ExtendedOperand < Op.getNumOperands()); 1834 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 1835 SDValue &ScalarOp = Operands[II->ExtendedOperand]; 1836 EVT OpVT = ScalarOp.getValueType(); 1837 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 1838 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 1839 // If the operand is a constant, sign extend to increase our chances 1840 // of being able to use a .vi instruction. ANY_EXTEND would become a 1841 // a zero extend and the simm5 check in isel would fail. 1842 // FIXME: Should we ignore the upper bits in isel instead? 1843 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 1844 : ISD::ANY_EXTEND; 1845 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 1846 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), 1847 Operands); 1848 } 1849 } 1850 } 1851 } 1852 1853 switch (IntNo) { 1854 default: 1855 return SDValue(); // Don't custom lower most intrinsics. 1856 case Intrinsic::thread_pointer: { 1857 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1858 return DAG.getRegister(RISCV::X4, PtrVT); 1859 } 1860 case Intrinsic::riscv_vmv_x_s: 1861 assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!"); 1862 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 1863 Op.getOperand(1)); 1864 case Intrinsic::riscv_vmv_v_x: { 1865 SDValue Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), 1866 Op.getOperand(1)); 1867 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), 1868 Scalar, Op.getOperand(2)); 1869 } 1870 case Intrinsic::riscv_vfmv_v_f: 1871 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), 1872 Op.getOperand(1), Op.getOperand(2)); 1873 } 1874 } 1875 1876 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 1877 SelectionDAG &DAG) const { 1878 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1879 SDLoc DL(Op); 1880 1881 if (Subtarget.hasStdExtV()) { 1882 // Some RVV intrinsics may claim that they want an integer operand to be 1883 // extended. 1884 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 1885 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 1886 if (II->ExtendedOperand) { 1887 // The operands start from the second argument in INTRINSIC_W_CHAIN. 1888 unsigned ExtendOp = II->ExtendedOperand + 1; 1889 assert(ExtendOp < Op.getNumOperands()); 1890 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 1891 SDValue &ScalarOp = Operands[ExtendOp]; 1892 EVT OpVT = ScalarOp.getValueType(); 1893 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 1894 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 1895 // If the operand is a constant, sign extend to increase our chances 1896 // of being able to use a .vi instruction. ANY_EXTEND would become a 1897 // a zero extend and the simm5 check in isel would fail. 1898 // FIXME: Should we ignore the upper bits in isel instead? 1899 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 1900 : ISD::ANY_EXTEND; 1901 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 1902 return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(), 1903 Operands); 1904 } 1905 } 1906 } 1907 } 1908 1909 switch (IntNo) { 1910 default: 1911 return SDValue(); // Don't custom lower most intrinsics. 1912 case Intrinsic::riscv_vleff: { 1913 SDLoc DL(Op); 1914 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue); 1915 SDValue Load = DAG.getNode(RISCVISD::VLEFF, DL, VTs, Op.getOperand(0), 1916 Op.getOperand(2), Op.getOperand(3)); 1917 SDValue ReadVL = 1918 SDValue(DAG.getMachineNode(RISCV::PseudoReadVL, DL, Op->getValueType(1), 1919 Load.getValue(2)), 1920 0); 1921 return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL); 1922 } 1923 case Intrinsic::riscv_vleff_mask: { 1924 SDLoc DL(Op); 1925 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue); 1926 SDValue Load = DAG.getNode(RISCVISD::VLEFF_MASK, DL, VTs, Op.getOperand(0), 1927 Op.getOperand(2), Op.getOperand(3), 1928 Op.getOperand(4), Op.getOperand(5)); 1929 SDValue ReadVL = 1930 SDValue(DAG.getMachineNode(RISCV::PseudoReadVL, DL, Op->getValueType(1), 1931 Load.getValue(2)), 1932 0); 1933 return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL); 1934 } 1935 } 1936 } 1937 1938 static std::pair<unsigned, uint64_t> 1939 getRVVReductionOpAndIdentityVal(unsigned ISDOpcode, unsigned EltSizeBits) { 1940 switch (ISDOpcode) { 1941 default: 1942 llvm_unreachable("Unhandled reduction"); 1943 case ISD::VECREDUCE_ADD: 1944 return {RISCVISD::VECREDUCE_ADD, 0}; 1945 case ISD::VECREDUCE_UMAX: 1946 return {RISCVISD::VECREDUCE_UMAX, 0}; 1947 case ISD::VECREDUCE_SMAX: 1948 return {RISCVISD::VECREDUCE_SMAX, minIntN(EltSizeBits)}; 1949 case ISD::VECREDUCE_UMIN: 1950 return {RISCVISD::VECREDUCE_UMIN, maxUIntN(EltSizeBits)}; 1951 case ISD::VECREDUCE_SMIN: 1952 return {RISCVISD::VECREDUCE_SMIN, maxIntN(EltSizeBits)}; 1953 case ISD::VECREDUCE_AND: 1954 return {RISCVISD::VECREDUCE_AND, -1}; 1955 case ISD::VECREDUCE_OR: 1956 return {RISCVISD::VECREDUCE_OR, 0}; 1957 case ISD::VECREDUCE_XOR: 1958 return {RISCVISD::VECREDUCE_XOR, 0}; 1959 } 1960 } 1961 1962 // Take a (supported) standard ISD reduction opcode and transform it to a RISCV 1963 // reduction opcode. Note that this returns a vector type, which must be 1964 // further processed to access the scalar result in element 0. 1965 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 1966 SelectionDAG &DAG) const { 1967 SDLoc DL(Op); 1968 assert(Op.getValueType().isSimple() && 1969 Op.getOperand(0).getValueType().isSimple() && 1970 "Unexpected vector-reduce lowering"); 1971 MVT VecEltVT = Op.getOperand(0).getSimpleValueType().getVectorElementType(); 1972 unsigned RVVOpcode; 1973 uint64_t IdentityVal; 1974 std::tie(RVVOpcode, IdentityVal) = 1975 getRVVReductionOpAndIdentityVal(Op.getOpcode(), VecEltVT.getSizeInBits()); 1976 // We have to perform a bit of a dance to get from our vector type to the 1977 // correct LMUL=1 vector type. We divide our minimum VLEN (64) by the vector 1978 // element type to find the type which fills a single register. Be careful to 1979 // use the operand's vector element type rather than the reduction's value 1980 // type, as that has likely been extended to XLEN. 1981 unsigned NumElts = 64 / VecEltVT.getSizeInBits(); 1982 MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts); 1983 SDValue IdentitySplat = 1984 DAG.getSplatVector(M1VT, DL, DAG.getConstant(IdentityVal, DL, VecEltVT)); 1985 SDValue Reduction = 1986 DAG.getNode(RVVOpcode, DL, M1VT, Op.getOperand(0), IdentitySplat); 1987 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 1988 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 1989 return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); 1990 } 1991 1992 // Given a reduction op, this function returns the matching reduction opcode, 1993 // the vector SDValue and the scalar SDValue required to lower this to a 1994 // RISCVISD node. 1995 static std::tuple<unsigned, SDValue, SDValue> 1996 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { 1997 SDLoc DL(Op); 1998 switch (Op.getOpcode()) { 1999 default: 2000 llvm_unreachable("Unhandled reduction"); 2001 case ISD::VECREDUCE_FADD: 2002 return std::make_tuple(RISCVISD::VECREDUCE_FADD, Op.getOperand(0), 2003 DAG.getConstantFP(0.0, DL, EltVT)); 2004 case ISD::VECREDUCE_SEQ_FADD: 2005 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD, Op.getOperand(1), 2006 Op.getOperand(0)); 2007 } 2008 } 2009 2010 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, 2011 SelectionDAG &DAG) const { 2012 SDLoc DL(Op); 2013 MVT VecEltVT = Op.getSimpleValueType(); 2014 // We have to perform a bit of a dance to get from our vector type to the 2015 // correct LMUL=1 vector type. See above for an explanation. 2016 unsigned NumElts = 64 / VecEltVT.getSizeInBits(); 2017 MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts); 2018 2019 unsigned RVVOpcode; 2020 SDValue VectorVal, ScalarVal; 2021 std::tie(RVVOpcode, VectorVal, ScalarVal) = 2022 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT); 2023 2024 SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal); 2025 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat); 2026 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 2027 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 2028 } 2029 2030 SDValue 2031 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, 2032 SelectionDAG &DAG) const { 2033 auto *Load = cast<LoadSDNode>(Op); 2034 2035 SDLoc DL(Op); 2036 MVT VT = Op.getSimpleValueType(); 2037 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2038 2039 SDValue VL = 2040 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 2041 2042 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 2043 SDValue NewLoad = DAG.getMemIntrinsicNode( 2044 RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL}, 2045 Load->getMemoryVT(), Load->getMemOperand()); 2046 2047 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 2048 return DAG.getMergeValues({Result, Load->getChain()}, DL); 2049 } 2050 2051 SDValue 2052 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, 2053 SelectionDAG &DAG) const { 2054 auto *Store = cast<StoreSDNode>(Op); 2055 2056 SDLoc DL(Op); 2057 MVT VT = Store->getValue().getSimpleValueType(); 2058 2059 // FIXME: We probably need to zero any extra bits in a byte for mask stores. 2060 // This is tricky to do. 2061 2062 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2063 2064 SDValue VL = 2065 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 2066 2067 SDValue NewValue = 2068 convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget); 2069 return DAG.getMemIntrinsicNode( 2070 RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other), 2071 {Store->getChain(), NewValue, Store->getBasePtr(), VL}, 2072 Store->getMemoryVT(), Store->getMemOperand()); 2073 } 2074 2075 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, 2076 unsigned NewOpc) const { 2077 MVT VT = Op.getSimpleValueType(); 2078 assert(useRVVForFixedLengthVectorVT(VT) && 2079 "Only expected to lower fixed length vector operation!"); 2080 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2081 2082 // Create list of operands by converting existing ones to scalable types. 2083 SmallVector<SDValue, 6> Ops; 2084 for (const SDValue &V : Op->op_values()) { 2085 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 2086 2087 // Pass through non-vector operands. 2088 if (!V.getValueType().isVector()) { 2089 Ops.push_back(V); 2090 continue; 2091 } 2092 2093 // "cast" fixed length vector to a scalable vector. 2094 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && 2095 "Only fixed length vectors are supported!"); 2096 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 2097 } 2098 2099 SDLoc DL(Op); 2100 SDValue VL = 2101 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 2102 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2103 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2104 Ops.push_back(Mask); 2105 Ops.push_back(VL); 2106 2107 SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops); 2108 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 2109 } 2110 2111 // Returns the opcode of the target-specific SDNode that implements the 32-bit 2112 // form of the given Opcode. 2113 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 2114 switch (Opcode) { 2115 default: 2116 llvm_unreachable("Unexpected opcode"); 2117 case ISD::SHL: 2118 return RISCVISD::SLLW; 2119 case ISD::SRA: 2120 return RISCVISD::SRAW; 2121 case ISD::SRL: 2122 return RISCVISD::SRLW; 2123 case ISD::SDIV: 2124 return RISCVISD::DIVW; 2125 case ISD::UDIV: 2126 return RISCVISD::DIVUW; 2127 case ISD::UREM: 2128 return RISCVISD::REMUW; 2129 case ISD::ROTL: 2130 return RISCVISD::ROLW; 2131 case ISD::ROTR: 2132 return RISCVISD::RORW; 2133 case RISCVISD::GREVI: 2134 return RISCVISD::GREVIW; 2135 case RISCVISD::GORCI: 2136 return RISCVISD::GORCIW; 2137 } 2138 } 2139 2140 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 2141 // Because i32 isn't a legal type for RV64, these operations would otherwise 2142 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 2143 // later one because the fact the operation was originally of type i32 is 2144 // lost. 2145 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 2146 unsigned ExtOpc = ISD::ANY_EXTEND) { 2147 SDLoc DL(N); 2148 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 2149 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 2150 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 2151 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 2152 // ReplaceNodeResults requires we maintain the same type for the return value. 2153 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 2154 } 2155 2156 // Converts the given 32-bit operation to a i64 operation with signed extension 2157 // semantic to reduce the signed extension instructions. 2158 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 2159 SDLoc DL(N); 2160 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 2161 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 2162 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 2163 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 2164 DAG.getValueType(MVT::i32)); 2165 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 2166 } 2167 2168 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 2169 SmallVectorImpl<SDValue> &Results, 2170 SelectionDAG &DAG) const { 2171 SDLoc DL(N); 2172 switch (N->getOpcode()) { 2173 default: 2174 llvm_unreachable("Don't know how to custom type legalize this operation!"); 2175 case ISD::STRICT_FP_TO_SINT: 2176 case ISD::STRICT_FP_TO_UINT: 2177 case ISD::FP_TO_SINT: 2178 case ISD::FP_TO_UINT: { 2179 bool IsStrict = N->isStrictFPOpcode(); 2180 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2181 "Unexpected custom legalisation"); 2182 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 2183 // If the FP type needs to be softened, emit a library call using the 'si' 2184 // version. If we left it to default legalization we'd end up with 'di'. If 2185 // the FP type doesn't need to be softened just let generic type 2186 // legalization promote the result type. 2187 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 2188 TargetLowering::TypeSoftenFloat) 2189 return; 2190 RTLIB::Libcall LC; 2191 if (N->getOpcode() == ISD::FP_TO_SINT || 2192 N->getOpcode() == ISD::STRICT_FP_TO_SINT) 2193 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 2194 else 2195 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 2196 MakeLibCallOptions CallOptions; 2197 EVT OpVT = Op0.getValueType(); 2198 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 2199 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 2200 SDValue Result; 2201 std::tie(Result, Chain) = 2202 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 2203 Results.push_back(Result); 2204 if (IsStrict) 2205 Results.push_back(Chain); 2206 break; 2207 } 2208 case ISD::READCYCLECOUNTER: { 2209 assert(!Subtarget.is64Bit() && 2210 "READCYCLECOUNTER only has custom type legalization on riscv32"); 2211 2212 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 2213 SDValue RCW = 2214 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 2215 2216 Results.push_back( 2217 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 2218 Results.push_back(RCW.getValue(2)); 2219 break; 2220 } 2221 case ISD::ADD: 2222 case ISD::SUB: 2223 case ISD::MUL: 2224 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2225 "Unexpected custom legalisation"); 2226 if (N->getOperand(1).getOpcode() == ISD::Constant) 2227 return; 2228 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 2229 break; 2230 case ISD::SHL: 2231 case ISD::SRA: 2232 case ISD::SRL: 2233 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2234 "Unexpected custom legalisation"); 2235 if (N->getOperand(1).getOpcode() == ISD::Constant) 2236 return; 2237 Results.push_back(customLegalizeToWOp(N, DAG)); 2238 break; 2239 case ISD::ROTL: 2240 case ISD::ROTR: 2241 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2242 "Unexpected custom legalisation"); 2243 Results.push_back(customLegalizeToWOp(N, DAG)); 2244 break; 2245 case ISD::SDIV: 2246 case ISD::UDIV: 2247 case ISD::UREM: { 2248 MVT VT = N->getSimpleValueType(0); 2249 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 2250 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 2251 "Unexpected custom legalisation"); 2252 if (N->getOperand(0).getOpcode() == ISD::Constant || 2253 N->getOperand(1).getOpcode() == ISD::Constant) 2254 return; 2255 2256 // If the input is i32, use ANY_EXTEND since the W instructions don't read 2257 // the upper 32 bits. For other types we need to sign or zero extend 2258 // based on the opcode. 2259 unsigned ExtOpc = ISD::ANY_EXTEND; 2260 if (VT != MVT::i32) 2261 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 2262 : ISD::ZERO_EXTEND; 2263 2264 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 2265 break; 2266 } 2267 case ISD::BITCAST: { 2268 assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2269 Subtarget.hasStdExtF()) || 2270 (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) && 2271 "Unexpected custom legalisation"); 2272 SDValue Op0 = N->getOperand(0); 2273 if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) { 2274 if (Op0.getValueType() != MVT::f16) 2275 return; 2276 SDValue FPConv = 2277 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0); 2278 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 2279 } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2280 Subtarget.hasStdExtF()) { 2281 if (Op0.getValueType() != MVT::f32) 2282 return; 2283 SDValue FPConv = 2284 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 2285 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 2286 } 2287 break; 2288 } 2289 case RISCVISD::GREVI: 2290 case RISCVISD::GORCI: { 2291 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2292 "Unexpected custom legalisation"); 2293 // This is similar to customLegalizeToWOp, except that we pass the second 2294 // operand (a TargetConstant) straight through: it is already of type 2295 // XLenVT. 2296 SDLoc DL(N); 2297 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 2298 SDValue NewOp0 = 2299 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 2300 SDValue NewRes = 2301 DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1)); 2302 // ReplaceNodeResults requires we maintain the same type for the return 2303 // value. 2304 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 2305 break; 2306 } 2307 case ISD::BSWAP: 2308 case ISD::BITREVERSE: { 2309 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2310 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 2311 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 2312 N->getOperand(0)); 2313 unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24; 2314 SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0, 2315 DAG.getTargetConstant(Imm, DL, 2316 Subtarget.getXLenVT())); 2317 // ReplaceNodeResults requires we maintain the same type for the return 2318 // value. 2319 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW)); 2320 break; 2321 } 2322 case ISD::FSHL: 2323 case ISD::FSHR: { 2324 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2325 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); 2326 SDValue NewOp0 = 2327 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 2328 SDValue NewOp1 = 2329 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 2330 SDValue NewOp2 = 2331 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 2332 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. 2333 // Mask the shift amount to 5 bits. 2334 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, 2335 DAG.getConstant(0x1f, DL, MVT::i64)); 2336 unsigned Opc = 2337 N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; 2338 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); 2339 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); 2340 break; 2341 } 2342 case ISD::EXTRACT_VECTOR_ELT: { 2343 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 2344 // type is illegal (currently only vXi64 RV32). 2345 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 2346 // transferred to the destination register. We issue two of these from the 2347 // upper- and lower- halves of the SEW-bit vector element, slid down to the 2348 // first element. 2349 SDLoc DL(N); 2350 SDValue Vec = N->getOperand(0); 2351 SDValue Idx = N->getOperand(1); 2352 EVT VecVT = Vec.getValueType(); 2353 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 2354 VecVT.getVectorElementType() == MVT::i64 && 2355 "Unexpected EXTRACT_VECTOR_ELT legalization"); 2356 2357 SDValue Slidedown = Vec; 2358 // Unless the index is known to be 0, we must slide the vector down to get 2359 // the desired element into index 0. 2360 if (!isNullConstant(Idx)) 2361 Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, 2362 DAG.getUNDEF(VecVT), Vec, Idx); 2363 2364 MVT XLenVT = Subtarget.getXLenVT(); 2365 // Extract the lower XLEN bits of the correct vector element. 2366 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Slidedown, Idx); 2367 2368 // To extract the upper XLEN bits of the vector element, shift the first 2369 // element right by 32 bits and re-extract the lower XLEN bits. 2370 SDValue ThirtyTwoV = 2371 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 2372 DAG.getConstant(32, DL, Subtarget.getXLenVT())); 2373 SDValue LShr32 = DAG.getNode(ISD::SRL, DL, VecVT, Slidedown, ThirtyTwoV); 2374 2375 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32, Idx); 2376 2377 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 2378 break; 2379 } 2380 case ISD::INTRINSIC_WO_CHAIN: { 2381 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 2382 switch (IntNo) { 2383 default: 2384 llvm_unreachable( 2385 "Don't know how to custom type legalize this intrinsic!"); 2386 case Intrinsic::riscv_vmv_x_s: { 2387 EVT VT = N->getValueType(0); 2388 assert((VT == MVT::i8 || VT == MVT::i16 || 2389 (Subtarget.is64Bit() && VT == MVT::i32)) && 2390 "Unexpected custom legalisation!"); 2391 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 2392 Subtarget.getXLenVT(), N->getOperand(1)); 2393 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 2394 break; 2395 } 2396 } 2397 break; 2398 } 2399 case ISD::VECREDUCE_ADD: 2400 case ISD::VECREDUCE_AND: 2401 case ISD::VECREDUCE_OR: 2402 case ISD::VECREDUCE_XOR: 2403 case ISD::VECREDUCE_SMAX: 2404 case ISD::VECREDUCE_UMAX: 2405 case ISD::VECREDUCE_SMIN: 2406 case ISD::VECREDUCE_UMIN: 2407 // The custom-lowering for these nodes returns a vector whose first element 2408 // is the result of the reduction. Extract its first element and let the 2409 // legalization for EXTRACT_VECTOR_ELT do the rest of the job. 2410 Results.push_back(lowerVECREDUCE(SDValue(N, 0), DAG)); 2411 break; 2412 } 2413 } 2414 2415 // A structure to hold one of the bit-manipulation patterns below. Together, a 2416 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source: 2417 // (or (and (shl x, 1), 0xAAAAAAAA), 2418 // (and (srl x, 1), 0x55555555)) 2419 struct RISCVBitmanipPat { 2420 SDValue Op; 2421 unsigned ShAmt; 2422 bool IsSHL; 2423 2424 bool formsPairWith(const RISCVBitmanipPat &Other) const { 2425 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; 2426 } 2427 }; 2428 2429 // Matches any of the following bit-manipulation patterns: 2430 // (and (shl x, 1), (0x55555555 << 1)) 2431 // (and (srl x, 1), 0x55555555) 2432 // (shl (and x, 0x55555555), 1) 2433 // (srl (and x, (0x55555555 << 1)), 1) 2434 // where the shift amount and mask may vary thus: 2435 // [1] = 0x55555555 / 0xAAAAAAAA 2436 // [2] = 0x33333333 / 0xCCCCCCCC 2437 // [4] = 0x0F0F0F0F / 0xF0F0F0F0 2438 // [8] = 0x00FF00FF / 0xFF00FF00 2439 // [16] = 0x0000FFFF / 0xFFFFFFFF 2440 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) 2441 static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) { 2442 Optional<uint64_t> Mask; 2443 // Optionally consume a mask around the shift operation. 2444 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { 2445 Mask = Op.getConstantOperandVal(1); 2446 Op = Op.getOperand(0); 2447 } 2448 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) 2449 return None; 2450 bool IsSHL = Op.getOpcode() == ISD::SHL; 2451 2452 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2453 return None; 2454 auto ShAmt = Op.getConstantOperandVal(1); 2455 2456 if (!isPowerOf2_64(ShAmt)) 2457 return None; 2458 2459 // These are the unshifted masks which we use to match bit-manipulation 2460 // patterns. They may be shifted left in certain circumstances. 2461 static const uint64_t BitmanipMasks[] = { 2462 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 2463 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL, 2464 }; 2465 2466 unsigned MaskIdx = Log2_64(ShAmt); 2467 if (MaskIdx >= array_lengthof(BitmanipMasks)) 2468 return None; 2469 2470 auto Src = Op.getOperand(0); 2471 2472 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 2473 auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 2474 2475 // The expected mask is shifted left when the AND is found around SHL 2476 // patterns. 2477 // ((x >> 1) & 0x55555555) 2478 // ((x << 1) & 0xAAAAAAAA) 2479 bool SHLExpMask = IsSHL; 2480 2481 if (!Mask) { 2482 // Sometimes LLVM keeps the mask as an operand of the shift, typically when 2483 // the mask is all ones: consume that now. 2484 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { 2485 Mask = Src.getConstantOperandVal(1); 2486 Src = Src.getOperand(0); 2487 // The expected mask is now in fact shifted left for SRL, so reverse the 2488 // decision. 2489 // ((x & 0xAAAAAAAA) >> 1) 2490 // ((x & 0x55555555) << 1) 2491 SHLExpMask = !SHLExpMask; 2492 } else { 2493 // Use a default shifted mask of all-ones if there's no AND, truncated 2494 // down to the expected width. This simplifies the logic later on. 2495 Mask = maskTrailingOnes<uint64_t>(Width); 2496 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); 2497 } 2498 } 2499 2500 if (SHLExpMask) 2501 ExpMask <<= ShAmt; 2502 2503 if (Mask != ExpMask) 2504 return None; 2505 2506 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; 2507 } 2508 2509 // Match the following pattern as a GREVI(W) operation 2510 // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) 2511 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, 2512 const RISCVSubtarget &Subtarget) { 2513 EVT VT = Op.getValueType(); 2514 2515 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 2516 auto LHS = matchRISCVBitmanipPat(Op.getOperand(0)); 2517 auto RHS = matchRISCVBitmanipPat(Op.getOperand(1)); 2518 if (LHS && RHS && LHS->formsPairWith(*RHS)) { 2519 SDLoc DL(Op); 2520 return DAG.getNode( 2521 RISCVISD::GREVI, DL, VT, LHS->Op, 2522 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 2523 } 2524 } 2525 return SDValue(); 2526 } 2527 2528 // Matches any the following pattern as a GORCI(W) operation 2529 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2 2530 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2 2531 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) 2532 // Note that with the variant of 3., 2533 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) 2534 // the inner pattern will first be matched as GREVI and then the outer 2535 // pattern will be matched to GORC via the first rule above. 2536 // 4. (or (rotl/rotr x, bitwidth/2), x) 2537 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, 2538 const RISCVSubtarget &Subtarget) { 2539 EVT VT = Op.getValueType(); 2540 2541 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 2542 SDLoc DL(Op); 2543 SDValue Op0 = Op.getOperand(0); 2544 SDValue Op1 = Op.getOperand(1); 2545 2546 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { 2547 if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X && 2548 isPowerOf2_32(Reverse.getConstantOperandVal(1))) 2549 return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1)); 2550 // We can also form GORCI from ROTL/ROTR by half the bitwidth. 2551 if ((Reverse.getOpcode() == ISD::ROTL || 2552 Reverse.getOpcode() == ISD::ROTR) && 2553 Reverse.getOperand(0) == X && 2554 isa<ConstantSDNode>(Reverse.getOperand(1))) { 2555 uint64_t RotAmt = Reverse.getConstantOperandVal(1); 2556 if (RotAmt == (VT.getSizeInBits() / 2)) 2557 return DAG.getNode( 2558 RISCVISD::GORCI, DL, VT, X, 2559 DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT())); 2560 } 2561 return SDValue(); 2562 }; 2563 2564 // Check for either commutable permutation of (or (GREVI x, shamt), x) 2565 if (SDValue V = MatchOROfReverse(Op0, Op1)) 2566 return V; 2567 if (SDValue V = MatchOROfReverse(Op1, Op0)) 2568 return V; 2569 2570 // OR is commutable so canonicalize its OR operand to the left 2571 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) 2572 std::swap(Op0, Op1); 2573 if (Op0.getOpcode() != ISD::OR) 2574 return SDValue(); 2575 SDValue OrOp0 = Op0.getOperand(0); 2576 SDValue OrOp1 = Op0.getOperand(1); 2577 auto LHS = matchRISCVBitmanipPat(OrOp0); 2578 // OR is commutable so swap the operands and try again: x might have been 2579 // on the left 2580 if (!LHS) { 2581 std::swap(OrOp0, OrOp1); 2582 LHS = matchRISCVBitmanipPat(OrOp0); 2583 } 2584 auto RHS = matchRISCVBitmanipPat(Op1); 2585 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { 2586 return DAG.getNode( 2587 RISCVISD::GORCI, DL, VT, LHS->Op, 2588 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 2589 } 2590 } 2591 return SDValue(); 2592 } 2593 2594 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is 2595 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. 2596 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does 2597 // not undo itself, but they are redundant. 2598 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { 2599 unsigned ShAmt1 = N->getConstantOperandVal(1); 2600 SDValue Src = N->getOperand(0); 2601 2602 if (Src.getOpcode() != N->getOpcode()) 2603 return SDValue(); 2604 2605 unsigned ShAmt2 = Src.getConstantOperandVal(1); 2606 Src = Src.getOperand(0); 2607 2608 unsigned CombinedShAmt; 2609 if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW) 2610 CombinedShAmt = ShAmt1 | ShAmt2; 2611 else 2612 CombinedShAmt = ShAmt1 ^ ShAmt2; 2613 2614 if (CombinedShAmt == 0) 2615 return Src; 2616 2617 SDLoc DL(N); 2618 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src, 2619 DAG.getTargetConstant(CombinedShAmt, DL, 2620 N->getOperand(1).getValueType())); 2621 } 2622 2623 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 2624 DAGCombinerInfo &DCI) const { 2625 SelectionDAG &DAG = DCI.DAG; 2626 2627 switch (N->getOpcode()) { 2628 default: 2629 break; 2630 case RISCVISD::SplitF64: { 2631 SDValue Op0 = N->getOperand(0); 2632 // If the input to SplitF64 is just BuildPairF64 then the operation is 2633 // redundant. Instead, use BuildPairF64's operands directly. 2634 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 2635 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 2636 2637 SDLoc DL(N); 2638 2639 // It's cheaper to materialise two 32-bit integers than to load a double 2640 // from the constant pool and transfer it to integer registers through the 2641 // stack. 2642 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 2643 APInt V = C->getValueAPF().bitcastToAPInt(); 2644 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 2645 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 2646 return DCI.CombineTo(N, Lo, Hi); 2647 } 2648 2649 // This is a target-specific version of a DAGCombine performed in 2650 // DAGCombiner::visitBITCAST. It performs the equivalent of: 2651 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 2652 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 2653 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 2654 !Op0.getNode()->hasOneUse()) 2655 break; 2656 SDValue NewSplitF64 = 2657 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 2658 Op0.getOperand(0)); 2659 SDValue Lo = NewSplitF64.getValue(0); 2660 SDValue Hi = NewSplitF64.getValue(1); 2661 APInt SignBit = APInt::getSignMask(32); 2662 if (Op0.getOpcode() == ISD::FNEG) { 2663 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 2664 DAG.getConstant(SignBit, DL, MVT::i32)); 2665 return DCI.CombineTo(N, Lo, NewHi); 2666 } 2667 assert(Op0.getOpcode() == ISD::FABS); 2668 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 2669 DAG.getConstant(~SignBit, DL, MVT::i32)); 2670 return DCI.CombineTo(N, Lo, NewHi); 2671 } 2672 case RISCVISD::SLLW: 2673 case RISCVISD::SRAW: 2674 case RISCVISD::SRLW: 2675 case RISCVISD::ROLW: 2676 case RISCVISD::RORW: { 2677 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 2678 SDValue LHS = N->getOperand(0); 2679 SDValue RHS = N->getOperand(1); 2680 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 2681 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 2682 if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) || 2683 SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) { 2684 if (N->getOpcode() != ISD::DELETED_NODE) 2685 DCI.AddToWorklist(N); 2686 return SDValue(N, 0); 2687 } 2688 break; 2689 } 2690 case RISCVISD::FSL: 2691 case RISCVISD::FSR: { 2692 // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read. 2693 SDValue ShAmt = N->getOperand(2); 2694 unsigned BitWidth = ShAmt.getValueSizeInBits(); 2695 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width"); 2696 APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1); 2697 if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 2698 if (N->getOpcode() != ISD::DELETED_NODE) 2699 DCI.AddToWorklist(N); 2700 return SDValue(N, 0); 2701 } 2702 break; 2703 } 2704 case RISCVISD::FSLW: 2705 case RISCVISD::FSRW: { 2706 // Only the lower 32 bits of Values and lower 6 bits of shift amount are 2707 // read. 2708 SDValue Op0 = N->getOperand(0); 2709 SDValue Op1 = N->getOperand(1); 2710 SDValue ShAmt = N->getOperand(2); 2711 APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 2712 APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6); 2713 if (SimplifyDemandedBits(Op0, OpMask, DCI) || 2714 SimplifyDemandedBits(Op1, OpMask, DCI) || 2715 SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 2716 if (N->getOpcode() != ISD::DELETED_NODE) 2717 DCI.AddToWorklist(N); 2718 return SDValue(N, 0); 2719 } 2720 break; 2721 } 2722 case RISCVISD::GREVIW: 2723 case RISCVISD::GORCIW: { 2724 // Only the lower 32 bits of the first operand are read 2725 SDValue Op0 = N->getOperand(0); 2726 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 2727 if (SimplifyDemandedBits(Op0, Mask, DCI)) { 2728 if (N->getOpcode() != ISD::DELETED_NODE) 2729 DCI.AddToWorklist(N); 2730 return SDValue(N, 0); 2731 } 2732 2733 return combineGREVI_GORCI(N, DCI.DAG); 2734 } 2735 case RISCVISD::FMV_X_ANYEXTW_RV64: { 2736 SDLoc DL(N); 2737 SDValue Op0 = N->getOperand(0); 2738 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 2739 // conversion is unnecessary and can be replaced with an ANY_EXTEND 2740 // of the FMV_W_X_RV64 operand. 2741 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 2742 assert(Op0.getOperand(0).getValueType() == MVT::i64 && 2743 "Unexpected value type!"); 2744 return Op0.getOperand(0); 2745 } 2746 2747 // This is a target-specific version of a DAGCombine performed in 2748 // DAGCombiner::visitBITCAST. It performs the equivalent of: 2749 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 2750 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 2751 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 2752 !Op0.getNode()->hasOneUse()) 2753 break; 2754 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 2755 Op0.getOperand(0)); 2756 APInt SignBit = APInt::getSignMask(32).sext(64); 2757 if (Op0.getOpcode() == ISD::FNEG) 2758 return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 2759 DAG.getConstant(SignBit, DL, MVT::i64)); 2760 2761 assert(Op0.getOpcode() == ISD::FABS); 2762 return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 2763 DAG.getConstant(~SignBit, DL, MVT::i64)); 2764 } 2765 case RISCVISD::GREVI: 2766 case RISCVISD::GORCI: 2767 return combineGREVI_GORCI(N, DCI.DAG); 2768 case ISD::OR: 2769 if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) 2770 return GREV; 2771 if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) 2772 return GORC; 2773 break; 2774 case RISCVISD::SELECT_CC: { 2775 // Transform 2776 // (select_cc (xor X, 1), 0, setne, trueV, falseV) -> 2777 // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. 2778 // This can occur when legalizing some floating point comparisons. 2779 SDValue LHS = N->getOperand(0); 2780 SDValue RHS = N->getOperand(1); 2781 auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2)); 2782 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 2783 if (ISD::isIntEqualitySetCC(CCVal) && isNullConstant(RHS) && 2784 LHS.getOpcode() == ISD::XOR && isOneConstant(LHS.getOperand(1)) && 2785 DAG.MaskedValueIsZero(LHS.getOperand(0), Mask)) { 2786 SDLoc DL(N); 2787 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 2788 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 2789 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), 2790 {LHS.getOperand(0), RHS, TargetCC, N->getOperand(3), 2791 N->getOperand(4)}); 2792 } 2793 break; 2794 } 2795 case ISD::SETCC: { 2796 // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1. 2797 // Comparing with 0 may allow us to fold into bnez/beqz. 2798 SDValue LHS = N->getOperand(0); 2799 SDValue RHS = N->getOperand(1); 2800 if (LHS.getValueType().isScalableVector()) 2801 break; 2802 auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2803 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 2804 if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) && 2805 DAG.MaskedValueIsZero(LHS, Mask)) { 2806 SDLoc DL(N); 2807 SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType()); 2808 CC = ISD::getSetCCInverse(CC, LHS.getValueType()); 2809 return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC); 2810 } 2811 break; 2812 } 2813 } 2814 2815 return SDValue(); 2816 } 2817 2818 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 2819 const SDNode *N, CombineLevel Level) const { 2820 // The following folds are only desirable if `(OP _, c1 << c2)` can be 2821 // materialised in fewer instructions than `(OP _, c1)`: 2822 // 2823 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 2824 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 2825 SDValue N0 = N->getOperand(0); 2826 EVT Ty = N0.getValueType(); 2827 if (Ty.isScalarInteger() && 2828 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 2829 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 2830 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 2831 if (C1 && C2) { 2832 const APInt &C1Int = C1->getAPIntValue(); 2833 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 2834 2835 // We can materialise `c1 << c2` into an add immediate, so it's "free", 2836 // and the combine should happen, to potentially allow further combines 2837 // later. 2838 if (ShiftedC1Int.getMinSignedBits() <= 64 && 2839 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 2840 return true; 2841 2842 // We can materialise `c1` in an add immediate, so it's "free", and the 2843 // combine should be prevented. 2844 if (C1Int.getMinSignedBits() <= 64 && 2845 isLegalAddImmediate(C1Int.getSExtValue())) 2846 return false; 2847 2848 // Neither constant will fit into an immediate, so find materialisation 2849 // costs. 2850 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 2851 Subtarget.is64Bit()); 2852 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 2853 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); 2854 2855 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 2856 // combine should be prevented. 2857 if (C1Cost < ShiftedC1Cost) 2858 return false; 2859 } 2860 } 2861 return true; 2862 } 2863 2864 bool RISCVTargetLowering::targetShrinkDemandedConstant( 2865 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 2866 TargetLoweringOpt &TLO) const { 2867 // Delay this optimization as late as possible. 2868 if (!TLO.LegalOps) 2869 return false; 2870 2871 EVT VT = Op.getValueType(); 2872 if (VT.isVector()) 2873 return false; 2874 2875 // Only handle AND for now. 2876 if (Op.getOpcode() != ISD::AND) 2877 return false; 2878 2879 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 2880 if (!C) 2881 return false; 2882 2883 const APInt &Mask = C->getAPIntValue(); 2884 2885 // Clear all non-demanded bits initially. 2886 APInt ShrunkMask = Mask & DemandedBits; 2887 2888 // If the shrunk mask fits in sign extended 12 bits, let the target 2889 // independent code apply it. 2890 if (ShrunkMask.isSignedIntN(12)) 2891 return false; 2892 2893 // Try to make a smaller immediate by setting undemanded bits. 2894 2895 // We need to be able to make a negative number through a combination of mask 2896 // and undemanded bits. 2897 APInt ExpandedMask = Mask | ~DemandedBits; 2898 if (!ExpandedMask.isNegative()) 2899 return false; 2900 2901 // What is the fewest number of bits we need to represent the negative number. 2902 unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); 2903 2904 // Try to make a 12 bit negative immediate. If that fails try to make a 32 2905 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 2906 APInt NewMask = ShrunkMask; 2907 if (MinSignedBits <= 12) 2908 NewMask.setBitsFrom(11); 2909 else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 2910 NewMask.setBitsFrom(31); 2911 else 2912 return false; 2913 2914 // Sanity check that our new mask is a subset of the demanded mask. 2915 assert(NewMask.isSubsetOf(ExpandedMask)); 2916 2917 // If we aren't changing the mask, just return true to keep it and prevent 2918 // the caller from optimizing. 2919 if (NewMask == Mask) 2920 return true; 2921 2922 // Replace the constant with the new mask. 2923 SDLoc DL(Op); 2924 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); 2925 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); 2926 return TLO.CombineTo(Op, NewOp); 2927 } 2928 2929 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 2930 KnownBits &Known, 2931 const APInt &DemandedElts, 2932 const SelectionDAG &DAG, 2933 unsigned Depth) const { 2934 unsigned BitWidth = Known.getBitWidth(); 2935 unsigned Opc = Op.getOpcode(); 2936 assert((Opc >= ISD::BUILTIN_OP_END || 2937 Opc == ISD::INTRINSIC_WO_CHAIN || 2938 Opc == ISD::INTRINSIC_W_CHAIN || 2939 Opc == ISD::INTRINSIC_VOID) && 2940 "Should use MaskedValueIsZero if you don't know whether Op" 2941 " is a target node!"); 2942 2943 Known.resetAll(); 2944 switch (Opc) { 2945 default: break; 2946 case RISCVISD::REMUW: { 2947 KnownBits Known2; 2948 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 2949 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 2950 // We only care about the lower 32 bits. 2951 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 2952 // Restore the original width by sign extending. 2953 Known = Known.sext(BitWidth); 2954 break; 2955 } 2956 case RISCVISD::DIVUW: { 2957 KnownBits Known2; 2958 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 2959 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 2960 // We only care about the lower 32 bits. 2961 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 2962 // Restore the original width by sign extending. 2963 Known = Known.sext(BitWidth); 2964 break; 2965 } 2966 case RISCVISD::READ_VLENB: 2967 // We assume VLENB is at least 8 bytes. 2968 // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits. 2969 Known.Zero.setLowBits(3); 2970 break; 2971 } 2972 } 2973 2974 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 2975 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 2976 unsigned Depth) const { 2977 switch (Op.getOpcode()) { 2978 default: 2979 break; 2980 case RISCVISD::SLLW: 2981 case RISCVISD::SRAW: 2982 case RISCVISD::SRLW: 2983 case RISCVISD::DIVW: 2984 case RISCVISD::DIVUW: 2985 case RISCVISD::REMUW: 2986 case RISCVISD::ROLW: 2987 case RISCVISD::RORW: 2988 case RISCVISD::GREVIW: 2989 case RISCVISD::GORCIW: 2990 case RISCVISD::FSLW: 2991 case RISCVISD::FSRW: 2992 // TODO: As the result is sign-extended, this is conservatively correct. A 2993 // more precise answer could be calculated for SRAW depending on known 2994 // bits in the shift amount. 2995 return 33; 2996 case RISCVISD::VMV_X_S: 2997 // The number of sign bits of the scalar result is computed by obtaining the 2998 // element type of the input vector operand, subtracting its width from the 2999 // XLEN, and then adding one (sign bit within the element type). If the 3000 // element type is wider than XLen, the least-significant XLEN bits are 3001 // taken. 3002 if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen()) 3003 return 1; 3004 return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; 3005 } 3006 3007 return 1; 3008 } 3009 3010 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 3011 MachineBasicBlock *BB) { 3012 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 3013 3014 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 3015 // Should the count have wrapped while it was being read, we need to try 3016 // again. 3017 // ... 3018 // read: 3019 // rdcycleh x3 # load high word of cycle 3020 // rdcycle x2 # load low word of cycle 3021 // rdcycleh x4 # load high word of cycle 3022 // bne x3, x4, read # check if high word reads match, otherwise try again 3023 // ... 3024 3025 MachineFunction &MF = *BB->getParent(); 3026 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3027 MachineFunction::iterator It = ++BB->getIterator(); 3028 3029 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 3030 MF.insert(It, LoopMBB); 3031 3032 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 3033 MF.insert(It, DoneMBB); 3034 3035 // Transfer the remainder of BB and its successor edges to DoneMBB. 3036 DoneMBB->splice(DoneMBB->begin(), BB, 3037 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 3038 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 3039 3040 BB->addSuccessor(LoopMBB); 3041 3042 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3043 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 3044 Register LoReg = MI.getOperand(0).getReg(); 3045 Register HiReg = MI.getOperand(1).getReg(); 3046 DebugLoc DL = MI.getDebugLoc(); 3047 3048 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 3049 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 3050 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 3051 .addReg(RISCV::X0); 3052 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 3053 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 3054 .addReg(RISCV::X0); 3055 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 3056 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 3057 .addReg(RISCV::X0); 3058 3059 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 3060 .addReg(HiReg) 3061 .addReg(ReadAgainReg) 3062 .addMBB(LoopMBB); 3063 3064 LoopMBB->addSuccessor(LoopMBB); 3065 LoopMBB->addSuccessor(DoneMBB); 3066 3067 MI.eraseFromParent(); 3068 3069 return DoneMBB; 3070 } 3071 3072 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 3073 MachineBasicBlock *BB) { 3074 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 3075 3076 MachineFunction &MF = *BB->getParent(); 3077 DebugLoc DL = MI.getDebugLoc(); 3078 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 3079 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 3080 Register LoReg = MI.getOperand(0).getReg(); 3081 Register HiReg = MI.getOperand(1).getReg(); 3082 Register SrcReg = MI.getOperand(2).getReg(); 3083 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 3084 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 3085 3086 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 3087 RI); 3088 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 3089 MachineMemOperand *MMOLo = 3090 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 3091 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 3092 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 3093 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 3094 .addFrameIndex(FI) 3095 .addImm(0) 3096 .addMemOperand(MMOLo); 3097 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 3098 .addFrameIndex(FI) 3099 .addImm(4) 3100 .addMemOperand(MMOHi); 3101 MI.eraseFromParent(); // The pseudo instruction is gone now. 3102 return BB; 3103 } 3104 3105 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 3106 MachineBasicBlock *BB) { 3107 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 3108 "Unexpected instruction"); 3109 3110 MachineFunction &MF = *BB->getParent(); 3111 DebugLoc DL = MI.getDebugLoc(); 3112 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 3113 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 3114 Register DstReg = MI.getOperand(0).getReg(); 3115 Register LoReg = MI.getOperand(1).getReg(); 3116 Register HiReg = MI.getOperand(2).getReg(); 3117 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 3118 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 3119 3120 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 3121 MachineMemOperand *MMOLo = 3122 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 3123 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 3124 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 3125 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 3126 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 3127 .addFrameIndex(FI) 3128 .addImm(0) 3129 .addMemOperand(MMOLo); 3130 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 3131 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 3132 .addFrameIndex(FI) 3133 .addImm(4) 3134 .addMemOperand(MMOHi); 3135 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 3136 MI.eraseFromParent(); // The pseudo instruction is gone now. 3137 return BB; 3138 } 3139 3140 static bool isSelectPseudo(MachineInstr &MI) { 3141 switch (MI.getOpcode()) { 3142 default: 3143 return false; 3144 case RISCV::Select_GPR_Using_CC_GPR: 3145 case RISCV::Select_FPR16_Using_CC_GPR: 3146 case RISCV::Select_FPR32_Using_CC_GPR: 3147 case RISCV::Select_FPR64_Using_CC_GPR: 3148 return true; 3149 } 3150 } 3151 3152 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 3153 MachineBasicBlock *BB) { 3154 // To "insert" Select_* instructions, we actually have to insert the triangle 3155 // control-flow pattern. The incoming instructions know the destination vreg 3156 // to set, the condition code register to branch on, the true/false values to 3157 // select between, and the condcode to use to select the appropriate branch. 3158 // 3159 // We produce the following control flow: 3160 // HeadMBB 3161 // | \ 3162 // | IfFalseMBB 3163 // | / 3164 // TailMBB 3165 // 3166 // When we find a sequence of selects we attempt to optimize their emission 3167 // by sharing the control flow. Currently we only handle cases where we have 3168 // multiple selects with the exact same condition (same LHS, RHS and CC). 3169 // The selects may be interleaved with other instructions if the other 3170 // instructions meet some requirements we deem safe: 3171 // - They are debug instructions. Otherwise, 3172 // - They do not have side-effects, do not access memory and their inputs do 3173 // not depend on the results of the select pseudo-instructions. 3174 // The TrueV/FalseV operands of the selects cannot depend on the result of 3175 // previous selects in the sequence. 3176 // These conditions could be further relaxed. See the X86 target for a 3177 // related approach and more information. 3178 Register LHS = MI.getOperand(1).getReg(); 3179 Register RHS = MI.getOperand(2).getReg(); 3180 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 3181 3182 SmallVector<MachineInstr *, 4> SelectDebugValues; 3183 SmallSet<Register, 4> SelectDests; 3184 SelectDests.insert(MI.getOperand(0).getReg()); 3185 3186 MachineInstr *LastSelectPseudo = &MI; 3187 3188 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 3189 SequenceMBBI != E; ++SequenceMBBI) { 3190 if (SequenceMBBI->isDebugInstr()) 3191 continue; 3192 else if (isSelectPseudo(*SequenceMBBI)) { 3193 if (SequenceMBBI->getOperand(1).getReg() != LHS || 3194 SequenceMBBI->getOperand(2).getReg() != RHS || 3195 SequenceMBBI->getOperand(3).getImm() != CC || 3196 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 3197 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 3198 break; 3199 LastSelectPseudo = &*SequenceMBBI; 3200 SequenceMBBI->collectDebugValues(SelectDebugValues); 3201 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 3202 } else { 3203 if (SequenceMBBI->hasUnmodeledSideEffects() || 3204 SequenceMBBI->mayLoadOrStore()) 3205 break; 3206 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 3207 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 3208 })) 3209 break; 3210 } 3211 } 3212 3213 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 3214 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3215 DebugLoc DL = MI.getDebugLoc(); 3216 MachineFunction::iterator I = ++BB->getIterator(); 3217 3218 MachineBasicBlock *HeadMBB = BB; 3219 MachineFunction *F = BB->getParent(); 3220 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 3221 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 3222 3223 F->insert(I, IfFalseMBB); 3224 F->insert(I, TailMBB); 3225 3226 // Transfer debug instructions associated with the selects to TailMBB. 3227 for (MachineInstr *DebugInstr : SelectDebugValues) { 3228 TailMBB->push_back(DebugInstr->removeFromParent()); 3229 } 3230 3231 // Move all instructions after the sequence to TailMBB. 3232 TailMBB->splice(TailMBB->end(), HeadMBB, 3233 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 3234 // Update machine-CFG edges by transferring all successors of the current 3235 // block to the new block which will contain the Phi nodes for the selects. 3236 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 3237 // Set the successors for HeadMBB. 3238 HeadMBB->addSuccessor(IfFalseMBB); 3239 HeadMBB->addSuccessor(TailMBB); 3240 3241 // Insert appropriate branch. 3242 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 3243 3244 BuildMI(HeadMBB, DL, TII.get(Opcode)) 3245 .addReg(LHS) 3246 .addReg(RHS) 3247 .addMBB(TailMBB); 3248 3249 // IfFalseMBB just falls through to TailMBB. 3250 IfFalseMBB->addSuccessor(TailMBB); 3251 3252 // Create PHIs for all of the select pseudo-instructions. 3253 auto SelectMBBI = MI.getIterator(); 3254 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 3255 auto InsertionPoint = TailMBB->begin(); 3256 while (SelectMBBI != SelectEnd) { 3257 auto Next = std::next(SelectMBBI); 3258 if (isSelectPseudo(*SelectMBBI)) { 3259 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 3260 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 3261 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 3262 .addReg(SelectMBBI->getOperand(4).getReg()) 3263 .addMBB(HeadMBB) 3264 .addReg(SelectMBBI->getOperand(5).getReg()) 3265 .addMBB(IfFalseMBB); 3266 SelectMBBI->eraseFromParent(); 3267 } 3268 SelectMBBI = Next; 3269 } 3270 3271 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 3272 return TailMBB; 3273 } 3274 3275 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, 3276 int VLIndex, unsigned SEWIndex, 3277 RISCVVLMUL VLMul, bool ForceTailAgnostic) { 3278 MachineFunction &MF = *BB->getParent(); 3279 DebugLoc DL = MI.getDebugLoc(); 3280 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 3281 3282 unsigned SEW = MI.getOperand(SEWIndex).getImm(); 3283 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 3284 RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8)); 3285 3286 MachineRegisterInfo &MRI = MF.getRegInfo(); 3287 3288 // VL and VTYPE are alive here. 3289 MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)); 3290 3291 if (VLIndex >= 0) { 3292 // Set VL (rs1 != X0). 3293 Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3294 MIB.addReg(DestReg, RegState::Define | RegState::Dead) 3295 .addReg(MI.getOperand(VLIndex).getReg()); 3296 } else 3297 // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). 3298 MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead) 3299 .addReg(RISCV::X0, RegState::Kill); 3300 3301 // Default to tail agnostic unless the destination is tied to a source. In 3302 // that case the user would have some control over the tail values. The tail 3303 // policy is also ignored on instructions that only update element 0 like 3304 // vmv.s.x or reductions so use agnostic there to match the common case. 3305 // FIXME: This is conservatively correct, but we might want to detect that 3306 // the input is undefined. 3307 bool TailAgnostic = true; 3308 unsigned UseOpIdx; 3309 if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 3310 TailAgnostic = false; 3311 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 3312 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 3313 MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); 3314 if (UseMI && UseMI->isImplicitDef()) 3315 TailAgnostic = true; 3316 } 3317 3318 // For simplicity we reuse the vtype representation here. 3319 MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth, 3320 /*TailAgnostic*/ TailAgnostic, 3321 /*MaskAgnostic*/ false)); 3322 3323 // Remove (now) redundant operands from pseudo 3324 MI.getOperand(SEWIndex).setImm(-1); 3325 if (VLIndex >= 0) { 3326 MI.getOperand(VLIndex).setReg(RISCV::NoRegister); 3327 MI.getOperand(VLIndex).setIsKill(false); 3328 } 3329 3330 return BB; 3331 } 3332 3333 MachineBasicBlock * 3334 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 3335 MachineBasicBlock *BB) const { 3336 uint64_t TSFlags = MI.getDesc().TSFlags; 3337 3338 if (TSFlags & RISCVII::HasSEWOpMask) { 3339 unsigned NumOperands = MI.getNumExplicitOperands(); 3340 int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1; 3341 unsigned SEWIndex = NumOperands - 1; 3342 bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask; 3343 3344 RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >> 3345 RISCVII::VLMulShift); 3346 return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic); 3347 } 3348 3349 switch (MI.getOpcode()) { 3350 default: 3351 llvm_unreachable("Unexpected instr type to insert"); 3352 case RISCV::ReadCycleWide: 3353 assert(!Subtarget.is64Bit() && 3354 "ReadCycleWrite is only to be used on riscv32"); 3355 return emitReadCycleWidePseudo(MI, BB); 3356 case RISCV::Select_GPR_Using_CC_GPR: 3357 case RISCV::Select_FPR16_Using_CC_GPR: 3358 case RISCV::Select_FPR32_Using_CC_GPR: 3359 case RISCV::Select_FPR64_Using_CC_GPR: 3360 return emitSelectPseudo(MI, BB); 3361 case RISCV::BuildPairF64Pseudo: 3362 return emitBuildPairF64Pseudo(MI, BB); 3363 case RISCV::SplitF64Pseudo: 3364 return emitSplitF64Pseudo(MI, BB); 3365 } 3366 } 3367 3368 // Calling Convention Implementation. 3369 // The expectations for frontend ABI lowering vary from target to target. 3370 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 3371 // details, but this is a longer term goal. For now, we simply try to keep the 3372 // role of the frontend as simple and well-defined as possible. The rules can 3373 // be summarised as: 3374 // * Never split up large scalar arguments. We handle them here. 3375 // * If a hardfloat calling convention is being used, and the struct may be 3376 // passed in a pair of registers (fp+fp, int+fp), and both registers are 3377 // available, then pass as two separate arguments. If either the GPRs or FPRs 3378 // are exhausted, then pass according to the rule below. 3379 // * If a struct could never be passed in registers or directly in a stack 3380 // slot (as it is larger than 2*XLEN and the floating point rules don't 3381 // apply), then pass it using a pointer with the byval attribute. 3382 // * If a struct is less than 2*XLEN, then coerce to either a two-element 3383 // word-sized array or a 2*XLEN scalar (depending on alignment). 3384 // * The frontend can determine whether a struct is returned by reference or 3385 // not based on its size and fields. If it will be returned by reference, the 3386 // frontend must modify the prototype so a pointer with the sret annotation is 3387 // passed as the first argument. This is not necessary for large scalar 3388 // returns. 3389 // * Struct return values and varargs should be coerced to structs containing 3390 // register-size fields in the same situations they would be for fixed 3391 // arguments. 3392 3393 static const MCPhysReg ArgGPRs[] = { 3394 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 3395 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 3396 }; 3397 static const MCPhysReg ArgFPR16s[] = { 3398 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 3399 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 3400 }; 3401 static const MCPhysReg ArgFPR32s[] = { 3402 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 3403 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 3404 }; 3405 static const MCPhysReg ArgFPR64s[] = { 3406 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 3407 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 3408 }; 3409 // This is an interim calling convention and it may be changed in the future. 3410 static const MCPhysReg ArgVRs[] = { 3411 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 3412 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 3413 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 3414 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 3415 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 3416 RISCV::V20M2, RISCV::V22M2}; 3417 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 3418 RISCV::V20M4}; 3419 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 3420 3421 // Pass a 2*XLEN argument that has been split into two XLEN values through 3422 // registers or the stack as necessary. 3423 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 3424 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 3425 MVT ValVT2, MVT LocVT2, 3426 ISD::ArgFlagsTy ArgFlags2) { 3427 unsigned XLenInBytes = XLen / 8; 3428 if (Register Reg = State.AllocateReg(ArgGPRs)) { 3429 // At least one half can be passed via register. 3430 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 3431 VA1.getLocVT(), CCValAssign::Full)); 3432 } else { 3433 // Both halves must be passed on the stack, with proper alignment. 3434 Align StackAlign = 3435 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 3436 State.addLoc( 3437 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 3438 State.AllocateStack(XLenInBytes, StackAlign), 3439 VA1.getLocVT(), CCValAssign::Full)); 3440 State.addLoc(CCValAssign::getMem( 3441 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 3442 LocVT2, CCValAssign::Full)); 3443 return false; 3444 } 3445 3446 if (Register Reg = State.AllocateReg(ArgGPRs)) { 3447 // The second half can also be passed via register. 3448 State.addLoc( 3449 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 3450 } else { 3451 // The second half is passed via the stack, without additional alignment. 3452 State.addLoc(CCValAssign::getMem( 3453 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 3454 LocVT2, CCValAssign::Full)); 3455 } 3456 3457 return false; 3458 } 3459 3460 // Implements the RISC-V calling convention. Returns true upon failure. 3461 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 3462 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 3463 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 3464 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 3465 Optional<unsigned> FirstMaskArgument) { 3466 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 3467 assert(XLen == 32 || XLen == 64); 3468 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 3469 3470 // Any return value split in to more than two values can't be returned 3471 // directly. 3472 if (IsRet && ValNo > 1) 3473 return true; 3474 3475 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 3476 // variadic argument, or if no F16/F32 argument registers are available. 3477 bool UseGPRForF16_F32 = true; 3478 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 3479 // variadic argument, or if no F64 argument registers are available. 3480 bool UseGPRForF64 = true; 3481 3482 switch (ABI) { 3483 default: 3484 llvm_unreachable("Unexpected ABI"); 3485 case RISCVABI::ABI_ILP32: 3486 case RISCVABI::ABI_LP64: 3487 break; 3488 case RISCVABI::ABI_ILP32F: 3489 case RISCVABI::ABI_LP64F: 3490 UseGPRForF16_F32 = !IsFixed; 3491 break; 3492 case RISCVABI::ABI_ILP32D: 3493 case RISCVABI::ABI_LP64D: 3494 UseGPRForF16_F32 = !IsFixed; 3495 UseGPRForF64 = !IsFixed; 3496 break; 3497 } 3498 3499 // FPR16, FPR32, and FPR64 alias each other. 3500 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { 3501 UseGPRForF16_F32 = true; 3502 UseGPRForF64 = true; 3503 } 3504 3505 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 3506 // similar local variables rather than directly checking against the target 3507 // ABI. 3508 3509 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { 3510 LocVT = XLenVT; 3511 LocInfo = CCValAssign::BCvt; 3512 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 3513 LocVT = MVT::i64; 3514 LocInfo = CCValAssign::BCvt; 3515 } 3516 3517 // If this is a variadic argument, the RISC-V calling convention requires 3518 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 3519 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 3520 // be used regardless of whether the original argument was split during 3521 // legalisation or not. The argument will not be passed by registers if the 3522 // original type is larger than 2*XLEN, so the register alignment rule does 3523 // not apply. 3524 unsigned TwoXLenInBytes = (2 * XLen) / 8; 3525 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 3526 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 3527 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 3528 // Skip 'odd' register if necessary. 3529 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 3530 State.AllocateReg(ArgGPRs); 3531 } 3532 3533 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 3534 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 3535 State.getPendingArgFlags(); 3536 3537 assert(PendingLocs.size() == PendingArgFlags.size() && 3538 "PendingLocs and PendingArgFlags out of sync"); 3539 3540 // Handle passing f64 on RV32D with a soft float ABI or when floating point 3541 // registers are exhausted. 3542 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 3543 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 3544 "Can't lower f64 if it is split"); 3545 // Depending on available argument GPRS, f64 may be passed in a pair of 3546 // GPRs, split between a GPR and the stack, or passed completely on the 3547 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 3548 // cases. 3549 Register Reg = State.AllocateReg(ArgGPRs); 3550 LocVT = MVT::i32; 3551 if (!Reg) { 3552 unsigned StackOffset = State.AllocateStack(8, Align(8)); 3553 State.addLoc( 3554 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 3555 return false; 3556 } 3557 if (!State.AllocateReg(ArgGPRs)) 3558 State.AllocateStack(4, Align(4)); 3559 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3560 return false; 3561 } 3562 3563 // Split arguments might be passed indirectly, so keep track of the pending 3564 // values. 3565 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 3566 LocVT = XLenVT; 3567 LocInfo = CCValAssign::Indirect; 3568 PendingLocs.push_back( 3569 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 3570 PendingArgFlags.push_back(ArgFlags); 3571 if (!ArgFlags.isSplitEnd()) { 3572 return false; 3573 } 3574 } 3575 3576 // If the split argument only had two elements, it should be passed directly 3577 // in registers or on the stack. 3578 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 3579 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 3580 // Apply the normal calling convention rules to the first half of the 3581 // split argument. 3582 CCValAssign VA = PendingLocs[0]; 3583 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 3584 PendingLocs.clear(); 3585 PendingArgFlags.clear(); 3586 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 3587 ArgFlags); 3588 } 3589 3590 // Allocate to a register if possible, or else a stack slot. 3591 Register Reg; 3592 if (ValVT == MVT::f16 && !UseGPRForF16_F32) 3593 Reg = State.AllocateReg(ArgFPR16s); 3594 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 3595 Reg = State.AllocateReg(ArgFPR32s); 3596 else if (ValVT == MVT::f64 && !UseGPRForF64) 3597 Reg = State.AllocateReg(ArgFPR64s); 3598 else if (ValVT.isScalableVector()) { 3599 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 3600 if (RC == &RISCV::VRRegClass) { 3601 // Assign the first mask argument to V0. 3602 // This is an interim calling convention and it may be changed in the 3603 // future. 3604 if (FirstMaskArgument.hasValue() && 3605 ValNo == FirstMaskArgument.getValue()) { 3606 Reg = State.AllocateReg(RISCV::V0); 3607 } else { 3608 Reg = State.AllocateReg(ArgVRs); 3609 } 3610 } else if (RC == &RISCV::VRM2RegClass) { 3611 Reg = State.AllocateReg(ArgVRM2s); 3612 } else if (RC == &RISCV::VRM4RegClass) { 3613 Reg = State.AllocateReg(ArgVRM4s); 3614 } else if (RC == &RISCV::VRM8RegClass) { 3615 Reg = State.AllocateReg(ArgVRM8s); 3616 } else { 3617 llvm_unreachable("Unhandled class register for ValueType"); 3618 } 3619 if (!Reg) { 3620 LocInfo = CCValAssign::Indirect; 3621 // Try using a GPR to pass the address 3622 Reg = State.AllocateReg(ArgGPRs); 3623 LocVT = XLenVT; 3624 } 3625 } else 3626 Reg = State.AllocateReg(ArgGPRs); 3627 unsigned StackOffset = 3628 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); 3629 3630 // If we reach this point and PendingLocs is non-empty, we must be at the 3631 // end of a split argument that must be passed indirectly. 3632 if (!PendingLocs.empty()) { 3633 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 3634 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 3635 3636 for (auto &It : PendingLocs) { 3637 if (Reg) 3638 It.convertToReg(Reg); 3639 else 3640 It.convertToMem(StackOffset); 3641 State.addLoc(It); 3642 } 3643 PendingLocs.clear(); 3644 PendingArgFlags.clear(); 3645 return false; 3646 } 3647 3648 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 3649 (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) && 3650 "Expected an XLenVT or scalable vector types at this stage"); 3651 3652 if (Reg) { 3653 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3654 return false; 3655 } 3656 3657 // When a floating-point value is passed on the stack, no bit-conversion is 3658 // needed. 3659 if (ValVT.isFloatingPoint()) { 3660 LocVT = ValVT; 3661 LocInfo = CCValAssign::Full; 3662 } 3663 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 3664 return false; 3665 } 3666 3667 template <typename ArgTy> 3668 static Optional<unsigned> preAssignMask(const ArgTy &Args) { 3669 for (const auto &ArgIdx : enumerate(Args)) { 3670 MVT ArgVT = ArgIdx.value().VT; 3671 if (ArgVT.isScalableVector() && 3672 ArgVT.getVectorElementType().SimpleTy == MVT::i1) 3673 return ArgIdx.index(); 3674 } 3675 return None; 3676 } 3677 3678 void RISCVTargetLowering::analyzeInputArgs( 3679 MachineFunction &MF, CCState &CCInfo, 3680 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 3681 unsigned NumArgs = Ins.size(); 3682 FunctionType *FType = MF.getFunction().getFunctionType(); 3683 3684 Optional<unsigned> FirstMaskArgument; 3685 if (Subtarget.hasStdExtV()) 3686 FirstMaskArgument = preAssignMask(Ins); 3687 3688 for (unsigned i = 0; i != NumArgs; ++i) { 3689 MVT ArgVT = Ins[i].VT; 3690 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 3691 3692 Type *ArgTy = nullptr; 3693 if (IsRet) 3694 ArgTy = FType->getReturnType(); 3695 else if (Ins[i].isOrigArg()) 3696 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 3697 3698 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 3699 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 3700 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 3701 FirstMaskArgument)) { 3702 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 3703 << EVT(ArgVT).getEVTString() << '\n'); 3704 llvm_unreachable(nullptr); 3705 } 3706 } 3707 } 3708 3709 void RISCVTargetLowering::analyzeOutputArgs( 3710 MachineFunction &MF, CCState &CCInfo, 3711 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 3712 CallLoweringInfo *CLI) const { 3713 unsigned NumArgs = Outs.size(); 3714 3715 Optional<unsigned> FirstMaskArgument; 3716 if (Subtarget.hasStdExtV()) 3717 FirstMaskArgument = preAssignMask(Outs); 3718 3719 for (unsigned i = 0; i != NumArgs; i++) { 3720 MVT ArgVT = Outs[i].VT; 3721 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 3722 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 3723 3724 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 3725 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 3726 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 3727 FirstMaskArgument)) { 3728 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 3729 << EVT(ArgVT).getEVTString() << "\n"); 3730 llvm_unreachable(nullptr); 3731 } 3732 } 3733 } 3734 3735 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 3736 // values. 3737 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 3738 const CCValAssign &VA, const SDLoc &DL) { 3739 switch (VA.getLocInfo()) { 3740 default: 3741 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3742 case CCValAssign::Full: 3743 break; 3744 case CCValAssign::BCvt: 3745 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 3746 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); 3747 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 3748 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 3749 else 3750 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 3751 break; 3752 } 3753 return Val; 3754 } 3755 3756 // The caller is responsible for loading the full value if the argument is 3757 // passed with CCValAssign::Indirect. 3758 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 3759 const CCValAssign &VA, const SDLoc &DL, 3760 const RISCVTargetLowering &TLI) { 3761 MachineFunction &MF = DAG.getMachineFunction(); 3762 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3763 EVT LocVT = VA.getLocVT(); 3764 SDValue Val; 3765 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 3766 Register VReg = RegInfo.createVirtualRegister(RC); 3767 RegInfo.addLiveIn(VA.getLocReg(), VReg); 3768 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 3769 3770 if (VA.getLocInfo() == CCValAssign::Indirect) 3771 return Val; 3772 3773 return convertLocVTToValVT(DAG, Val, VA, DL); 3774 } 3775 3776 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 3777 const CCValAssign &VA, const SDLoc &DL) { 3778 EVT LocVT = VA.getLocVT(); 3779 3780 switch (VA.getLocInfo()) { 3781 default: 3782 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3783 case CCValAssign::Full: 3784 break; 3785 case CCValAssign::BCvt: 3786 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 3787 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 3788 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 3789 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 3790 else 3791 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 3792 break; 3793 } 3794 return Val; 3795 } 3796 3797 // The caller is responsible for loading the full value if the argument is 3798 // passed with CCValAssign::Indirect. 3799 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 3800 const CCValAssign &VA, const SDLoc &DL) { 3801 MachineFunction &MF = DAG.getMachineFunction(); 3802 MachineFrameInfo &MFI = MF.getFrameInfo(); 3803 EVT LocVT = VA.getLocVT(); 3804 EVT ValVT = VA.getValVT(); 3805 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 3806 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 3807 VA.getLocMemOffset(), /*Immutable=*/true); 3808 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3809 SDValue Val; 3810 3811 ISD::LoadExtType ExtType; 3812 switch (VA.getLocInfo()) { 3813 default: 3814 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3815 case CCValAssign::Full: 3816 case CCValAssign::Indirect: 3817 case CCValAssign::BCvt: 3818 ExtType = ISD::NON_EXTLOAD; 3819 break; 3820 } 3821 Val = DAG.getExtLoad( 3822 ExtType, DL, LocVT, Chain, FIN, 3823 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 3824 return Val; 3825 } 3826 3827 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 3828 const CCValAssign &VA, const SDLoc &DL) { 3829 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 3830 "Unexpected VA"); 3831 MachineFunction &MF = DAG.getMachineFunction(); 3832 MachineFrameInfo &MFI = MF.getFrameInfo(); 3833 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3834 3835 if (VA.isMemLoc()) { 3836 // f64 is passed on the stack. 3837 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 3838 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 3839 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 3840 MachinePointerInfo::getFixedStack(MF, FI)); 3841 } 3842 3843 assert(VA.isRegLoc() && "Expected register VA assignment"); 3844 3845 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 3846 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 3847 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 3848 SDValue Hi; 3849 if (VA.getLocReg() == RISCV::X17) { 3850 // Second half of f64 is passed on the stack. 3851 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 3852 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 3853 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 3854 MachinePointerInfo::getFixedStack(MF, FI)); 3855 } else { 3856 // Second half of f64 is passed in another GPR. 3857 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 3858 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 3859 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 3860 } 3861 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 3862 } 3863 3864 // FastCC has less than 1% performance improvement for some particular 3865 // benchmark. But theoretically, it may has benenfit for some cases. 3866 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, 3867 CCValAssign::LocInfo LocInfo, 3868 ISD::ArgFlagsTy ArgFlags, CCState &State) { 3869 3870 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 3871 // X5 and X6 might be used for save-restore libcall. 3872 static const MCPhysReg GPRList[] = { 3873 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 3874 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 3875 RISCV::X29, RISCV::X30, RISCV::X31}; 3876 if (unsigned Reg = State.AllocateReg(GPRList)) { 3877 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3878 return false; 3879 } 3880 } 3881 3882 if (LocVT == MVT::f16) { 3883 static const MCPhysReg FPR16List[] = { 3884 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 3885 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 3886 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 3887 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 3888 if (unsigned Reg = State.AllocateReg(FPR16List)) { 3889 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3890 return false; 3891 } 3892 } 3893 3894 if (LocVT == MVT::f32) { 3895 static const MCPhysReg FPR32List[] = { 3896 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 3897 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 3898 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 3899 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 3900 if (unsigned Reg = State.AllocateReg(FPR32List)) { 3901 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3902 return false; 3903 } 3904 } 3905 3906 if (LocVT == MVT::f64) { 3907 static const MCPhysReg FPR64List[] = { 3908 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 3909 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 3910 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 3911 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 3912 if (unsigned Reg = State.AllocateReg(FPR64List)) { 3913 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3914 return false; 3915 } 3916 } 3917 3918 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 3919 unsigned Offset4 = State.AllocateStack(4, Align(4)); 3920 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 3921 return false; 3922 } 3923 3924 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 3925 unsigned Offset5 = State.AllocateStack(8, Align(8)); 3926 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 3927 return false; 3928 } 3929 3930 return true; // CC didn't match. 3931 } 3932 3933 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 3934 CCValAssign::LocInfo LocInfo, 3935 ISD::ArgFlagsTy ArgFlags, CCState &State) { 3936 3937 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 3938 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 3939 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 3940 static const MCPhysReg GPRList[] = { 3941 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 3942 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 3943 if (unsigned Reg = State.AllocateReg(GPRList)) { 3944 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3945 return false; 3946 } 3947 } 3948 3949 if (LocVT == MVT::f32) { 3950 // Pass in STG registers: F1, ..., F6 3951 // fs0 ... fs5 3952 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 3953 RISCV::F18_F, RISCV::F19_F, 3954 RISCV::F20_F, RISCV::F21_F}; 3955 if (unsigned Reg = State.AllocateReg(FPR32List)) { 3956 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3957 return false; 3958 } 3959 } 3960 3961 if (LocVT == MVT::f64) { 3962 // Pass in STG registers: D1, ..., D6 3963 // fs6 ... fs11 3964 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 3965 RISCV::F24_D, RISCV::F25_D, 3966 RISCV::F26_D, RISCV::F27_D}; 3967 if (unsigned Reg = State.AllocateReg(FPR64List)) { 3968 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3969 return false; 3970 } 3971 } 3972 3973 report_fatal_error("No registers left in GHC calling convention"); 3974 return true; 3975 } 3976 3977 // Transform physical registers into virtual registers. 3978 SDValue RISCVTargetLowering::LowerFormalArguments( 3979 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 3980 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 3981 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3982 3983 MachineFunction &MF = DAG.getMachineFunction(); 3984 3985 switch (CallConv) { 3986 default: 3987 report_fatal_error("Unsupported calling convention"); 3988 case CallingConv::C: 3989 case CallingConv::Fast: 3990 break; 3991 case CallingConv::GHC: 3992 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || 3993 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) 3994 report_fatal_error( 3995 "GHC calling convention requires the F and D instruction set extensions"); 3996 } 3997 3998 const Function &Func = MF.getFunction(); 3999 if (Func.hasFnAttribute("interrupt")) { 4000 if (!Func.arg_empty()) 4001 report_fatal_error( 4002 "Functions with the interrupt attribute cannot have arguments!"); 4003 4004 StringRef Kind = 4005 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 4006 4007 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 4008 report_fatal_error( 4009 "Function interrupt attribute argument not supported!"); 4010 } 4011 4012 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4013 MVT XLenVT = Subtarget.getXLenVT(); 4014 unsigned XLenInBytes = Subtarget.getXLen() / 8; 4015 // Used with vargs to acumulate store chains. 4016 std::vector<SDValue> OutChains; 4017 4018 // Assign locations to all of the incoming arguments. 4019 SmallVector<CCValAssign, 16> ArgLocs; 4020 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 4021 4022 if (CallConv == CallingConv::Fast) 4023 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); 4024 else if (CallConv == CallingConv::GHC) 4025 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); 4026 else 4027 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 4028 4029 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 4030 CCValAssign &VA = ArgLocs[i]; 4031 SDValue ArgValue; 4032 // Passing f64 on RV32D with a soft float ABI must be handled as a special 4033 // case. 4034 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 4035 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 4036 else if (VA.isRegLoc()) 4037 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 4038 else 4039 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 4040 4041 if (VA.getLocInfo() == CCValAssign::Indirect) { 4042 // If the original argument was split and passed by reference (e.g. i128 4043 // on RV32), we need to load all parts of it here (using the same 4044 // address). 4045 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 4046 MachinePointerInfo())); 4047 unsigned ArgIndex = Ins[i].OrigArgIndex; 4048 assert(Ins[i].PartOffset == 0); 4049 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 4050 CCValAssign &PartVA = ArgLocs[i + 1]; 4051 unsigned PartOffset = Ins[i + 1].PartOffset; 4052 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 4053 DAG.getIntPtrConstant(PartOffset, DL)); 4054 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 4055 MachinePointerInfo())); 4056 ++i; 4057 } 4058 continue; 4059 } 4060 InVals.push_back(ArgValue); 4061 } 4062 4063 if (IsVarArg) { 4064 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 4065 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 4066 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 4067 MachineFrameInfo &MFI = MF.getFrameInfo(); 4068 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4069 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 4070 4071 // Offset of the first variable argument from stack pointer, and size of 4072 // the vararg save area. For now, the varargs save area is either zero or 4073 // large enough to hold a0-a7. 4074 int VaArgOffset, VarArgsSaveSize; 4075 4076 // If all registers are allocated, then all varargs must be passed on the 4077 // stack and we don't need to save any argregs. 4078 if (ArgRegs.size() == Idx) { 4079 VaArgOffset = CCInfo.getNextStackOffset(); 4080 VarArgsSaveSize = 0; 4081 } else { 4082 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 4083 VaArgOffset = -VarArgsSaveSize; 4084 } 4085 4086 // Record the frame index of the first variable argument 4087 // which is a value necessary to VASTART. 4088 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 4089 RVFI->setVarArgsFrameIndex(FI); 4090 4091 // If saving an odd number of registers then create an extra stack slot to 4092 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 4093 // offsets to even-numbered registered remain 2*XLEN-aligned. 4094 if (Idx % 2) { 4095 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 4096 VarArgsSaveSize += XLenInBytes; 4097 } 4098 4099 // Copy the integer registers that may have been used for passing varargs 4100 // to the vararg save area. 4101 for (unsigned I = Idx; I < ArgRegs.size(); 4102 ++I, VaArgOffset += XLenInBytes) { 4103 const Register Reg = RegInfo.createVirtualRegister(RC); 4104 RegInfo.addLiveIn(ArgRegs[I], Reg); 4105 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 4106 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 4107 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 4108 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 4109 MachinePointerInfo::getFixedStack(MF, FI)); 4110 cast<StoreSDNode>(Store.getNode()) 4111 ->getMemOperand() 4112 ->setValue((Value *)nullptr); 4113 OutChains.push_back(Store); 4114 } 4115 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 4116 } 4117 4118 // All stores are grouped in one node to allow the matching between 4119 // the size of Ins and InVals. This only happens for vararg functions. 4120 if (!OutChains.empty()) { 4121 OutChains.push_back(Chain); 4122 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 4123 } 4124 4125 return Chain; 4126 } 4127 4128 /// isEligibleForTailCallOptimization - Check whether the call is eligible 4129 /// for tail call optimization. 4130 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 4131 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 4132 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 4133 const SmallVector<CCValAssign, 16> &ArgLocs) const { 4134 4135 auto &Callee = CLI.Callee; 4136 auto CalleeCC = CLI.CallConv; 4137 auto &Outs = CLI.Outs; 4138 auto &Caller = MF.getFunction(); 4139 auto CallerCC = Caller.getCallingConv(); 4140 4141 // Exception-handling functions need a special set of instructions to 4142 // indicate a return to the hardware. Tail-calling another function would 4143 // probably break this. 4144 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 4145 // should be expanded as new function attributes are introduced. 4146 if (Caller.hasFnAttribute("interrupt")) 4147 return false; 4148 4149 // Do not tail call opt if the stack is used to pass parameters. 4150 if (CCInfo.getNextStackOffset() != 0) 4151 return false; 4152 4153 // Do not tail call opt if any parameters need to be passed indirectly. 4154 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 4155 // passed indirectly. So the address of the value will be passed in a 4156 // register, or if not available, then the address is put on the stack. In 4157 // order to pass indirectly, space on the stack often needs to be allocated 4158 // in order to store the value. In this case the CCInfo.getNextStackOffset() 4159 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 4160 // are passed CCValAssign::Indirect. 4161 for (auto &VA : ArgLocs) 4162 if (VA.getLocInfo() == CCValAssign::Indirect) 4163 return false; 4164 4165 // Do not tail call opt if either caller or callee uses struct return 4166 // semantics. 4167 auto IsCallerStructRet = Caller.hasStructRetAttr(); 4168 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 4169 if (IsCallerStructRet || IsCalleeStructRet) 4170 return false; 4171 4172 // Externally-defined functions with weak linkage should not be 4173 // tail-called. The behaviour of branch instructions in this situation (as 4174 // used for tail calls) is implementation-defined, so we cannot rely on the 4175 // linker replacing the tail call with a return. 4176 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 4177 const GlobalValue *GV = G->getGlobal(); 4178 if (GV->hasExternalWeakLinkage()) 4179 return false; 4180 } 4181 4182 // The callee has to preserve all registers the caller needs to preserve. 4183 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4184 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 4185 if (CalleeCC != CallerCC) { 4186 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 4187 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 4188 return false; 4189 } 4190 4191 // Byval parameters hand the function a pointer directly into the stack area 4192 // we want to reuse during a tail call. Working around this *is* possible 4193 // but less efficient and uglier in LowerCall. 4194 for (auto &Arg : Outs) 4195 if (Arg.Flags.isByVal()) 4196 return false; 4197 4198 return true; 4199 } 4200 4201 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 4202 // and output parameter nodes. 4203 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 4204 SmallVectorImpl<SDValue> &InVals) const { 4205 SelectionDAG &DAG = CLI.DAG; 4206 SDLoc &DL = CLI.DL; 4207 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 4208 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 4209 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 4210 SDValue Chain = CLI.Chain; 4211 SDValue Callee = CLI.Callee; 4212 bool &IsTailCall = CLI.IsTailCall; 4213 CallingConv::ID CallConv = CLI.CallConv; 4214 bool IsVarArg = CLI.IsVarArg; 4215 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4216 MVT XLenVT = Subtarget.getXLenVT(); 4217 4218 MachineFunction &MF = DAG.getMachineFunction(); 4219 4220 // Analyze the operands of the call, assigning locations to each operand. 4221 SmallVector<CCValAssign, 16> ArgLocs; 4222 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 4223 4224 if (CallConv == CallingConv::Fast) 4225 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); 4226 else if (CallConv == CallingConv::GHC) 4227 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); 4228 else 4229 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 4230 4231 // Check if it's really possible to do a tail call. 4232 if (IsTailCall) 4233 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 4234 4235 if (IsTailCall) 4236 ++NumTailCalls; 4237 else if (CLI.CB && CLI.CB->isMustTailCall()) 4238 report_fatal_error("failed to perform tail call elimination on a call " 4239 "site marked musttail"); 4240 4241 // Get a count of how many bytes are to be pushed on the stack. 4242 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 4243 4244 // Create local copies for byval args 4245 SmallVector<SDValue, 8> ByValArgs; 4246 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 4247 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4248 if (!Flags.isByVal()) 4249 continue; 4250 4251 SDValue Arg = OutVals[i]; 4252 unsigned Size = Flags.getByValSize(); 4253 Align Alignment = Flags.getNonZeroByValAlign(); 4254 4255 int FI = 4256 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 4257 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 4258 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 4259 4260 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 4261 /*IsVolatile=*/false, 4262 /*AlwaysInline=*/false, IsTailCall, 4263 MachinePointerInfo(), MachinePointerInfo()); 4264 ByValArgs.push_back(FIPtr); 4265 } 4266 4267 if (!IsTailCall) 4268 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 4269 4270 // Copy argument values to their designated locations. 4271 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 4272 SmallVector<SDValue, 8> MemOpChains; 4273 SDValue StackPtr; 4274 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 4275 CCValAssign &VA = ArgLocs[i]; 4276 SDValue ArgValue = OutVals[i]; 4277 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4278 4279 // Handle passing f64 on RV32D with a soft float ABI as a special case. 4280 bool IsF64OnRV32DSoftABI = 4281 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 4282 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 4283 SDValue SplitF64 = DAG.getNode( 4284 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 4285 SDValue Lo = SplitF64.getValue(0); 4286 SDValue Hi = SplitF64.getValue(1); 4287 4288 Register RegLo = VA.getLocReg(); 4289 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 4290 4291 if (RegLo == RISCV::X17) { 4292 // Second half of f64 is passed on the stack. 4293 // Work out the address of the stack slot. 4294 if (!StackPtr.getNode()) 4295 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 4296 // Emit the store. 4297 MemOpChains.push_back( 4298 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 4299 } else { 4300 // Second half of f64 is passed in another GPR. 4301 assert(RegLo < RISCV::X31 && "Invalid register pair"); 4302 Register RegHigh = RegLo + 1; 4303 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 4304 } 4305 continue; 4306 } 4307 4308 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 4309 // as any other MemLoc. 4310 4311 // Promote the value if needed. 4312 // For now, only handle fully promoted and indirect arguments. 4313 if (VA.getLocInfo() == CCValAssign::Indirect) { 4314 // Store the argument in a stack slot and pass its address. 4315 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 4316 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 4317 MemOpChains.push_back( 4318 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 4319 MachinePointerInfo::getFixedStack(MF, FI))); 4320 // If the original argument was split (e.g. i128), we need 4321 // to store all parts of it here (and pass just one address). 4322 unsigned ArgIndex = Outs[i].OrigArgIndex; 4323 assert(Outs[i].PartOffset == 0); 4324 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 4325 SDValue PartValue = OutVals[i + 1]; 4326 unsigned PartOffset = Outs[i + 1].PartOffset; 4327 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 4328 DAG.getIntPtrConstant(PartOffset, DL)); 4329 MemOpChains.push_back( 4330 DAG.getStore(Chain, DL, PartValue, Address, 4331 MachinePointerInfo::getFixedStack(MF, FI))); 4332 ++i; 4333 } 4334 ArgValue = SpillSlot; 4335 } else { 4336 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 4337 } 4338 4339 // Use local copy if it is a byval arg. 4340 if (Flags.isByVal()) 4341 ArgValue = ByValArgs[j++]; 4342 4343 if (VA.isRegLoc()) { 4344 // Queue up the argument copies and emit them at the end. 4345 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 4346 } else { 4347 assert(VA.isMemLoc() && "Argument not register or memory"); 4348 assert(!IsTailCall && "Tail call not allowed if stack is used " 4349 "for passing parameters"); 4350 4351 // Work out the address of the stack slot. 4352 if (!StackPtr.getNode()) 4353 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 4354 SDValue Address = 4355 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 4356 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 4357 4358 // Emit the store. 4359 MemOpChains.push_back( 4360 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 4361 } 4362 } 4363 4364 // Join the stores, which are independent of one another. 4365 if (!MemOpChains.empty()) 4366 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 4367 4368 SDValue Glue; 4369 4370 // Build a sequence of copy-to-reg nodes, chained and glued together. 4371 for (auto &Reg : RegsToPass) { 4372 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 4373 Glue = Chain.getValue(1); 4374 } 4375 4376 // Validate that none of the argument registers have been marked as 4377 // reserved, if so report an error. Do the same for the return address if this 4378 // is not a tailcall. 4379 validateCCReservedRegs(RegsToPass, MF); 4380 if (!IsTailCall && 4381 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 4382 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 4383 MF.getFunction(), 4384 "Return address register required, but has been reserved."}); 4385 4386 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 4387 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 4388 // split it and then direct call can be matched by PseudoCALL. 4389 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 4390 const GlobalValue *GV = S->getGlobal(); 4391 4392 unsigned OpFlags = RISCVII::MO_CALL; 4393 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 4394 OpFlags = RISCVII::MO_PLT; 4395 4396 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 4397 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 4398 unsigned OpFlags = RISCVII::MO_CALL; 4399 4400 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 4401 nullptr)) 4402 OpFlags = RISCVII::MO_PLT; 4403 4404 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 4405 } 4406 4407 // The first call operand is the chain and the second is the target address. 4408 SmallVector<SDValue, 8> Ops; 4409 Ops.push_back(Chain); 4410 Ops.push_back(Callee); 4411 4412 // Add argument registers to the end of the list so that they are 4413 // known live into the call. 4414 for (auto &Reg : RegsToPass) 4415 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 4416 4417 if (!IsTailCall) { 4418 // Add a register mask operand representing the call-preserved registers. 4419 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4420 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 4421 assert(Mask && "Missing call preserved mask for calling convention"); 4422 Ops.push_back(DAG.getRegisterMask(Mask)); 4423 } 4424 4425 // Glue the call to the argument copies, if any. 4426 if (Glue.getNode()) 4427 Ops.push_back(Glue); 4428 4429 // Emit the call. 4430 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 4431 4432 if (IsTailCall) { 4433 MF.getFrameInfo().setHasTailCall(); 4434 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 4435 } 4436 4437 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 4438 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 4439 Glue = Chain.getValue(1); 4440 4441 // Mark the end of the call, which is glued to the call itself. 4442 Chain = DAG.getCALLSEQ_END(Chain, 4443 DAG.getConstant(NumBytes, DL, PtrVT, true), 4444 DAG.getConstant(0, DL, PtrVT, true), 4445 Glue, DL); 4446 Glue = Chain.getValue(1); 4447 4448 // Assign locations to each value returned by this call. 4449 SmallVector<CCValAssign, 16> RVLocs; 4450 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 4451 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 4452 4453 // Copy all of the result registers out of their specified physreg. 4454 for (auto &VA : RVLocs) { 4455 // Copy the value out 4456 SDValue RetValue = 4457 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 4458 // Glue the RetValue to the end of the call sequence 4459 Chain = RetValue.getValue(1); 4460 Glue = RetValue.getValue(2); 4461 4462 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 4463 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 4464 SDValue RetValue2 = 4465 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 4466 Chain = RetValue2.getValue(1); 4467 Glue = RetValue2.getValue(2); 4468 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 4469 RetValue2); 4470 } 4471 4472 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 4473 4474 InVals.push_back(RetValue); 4475 } 4476 4477 return Chain; 4478 } 4479 4480 bool RISCVTargetLowering::CanLowerReturn( 4481 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 4482 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 4483 SmallVector<CCValAssign, 16> RVLocs; 4484 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 4485 4486 Optional<unsigned> FirstMaskArgument; 4487 if (Subtarget.hasStdExtV()) 4488 FirstMaskArgument = preAssignMask(Outs); 4489 4490 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 4491 MVT VT = Outs[i].VT; 4492 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 4493 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 4494 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 4495 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 4496 *this, FirstMaskArgument)) 4497 return false; 4498 } 4499 return true; 4500 } 4501 4502 SDValue 4503 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 4504 bool IsVarArg, 4505 const SmallVectorImpl<ISD::OutputArg> &Outs, 4506 const SmallVectorImpl<SDValue> &OutVals, 4507 const SDLoc &DL, SelectionDAG &DAG) const { 4508 const MachineFunction &MF = DAG.getMachineFunction(); 4509 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 4510 4511 // Stores the assignment of the return value to a location. 4512 SmallVector<CCValAssign, 16> RVLocs; 4513 4514 // Info about the registers and stack slot. 4515 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 4516 *DAG.getContext()); 4517 4518 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 4519 nullptr); 4520 4521 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 4522 report_fatal_error("GHC functions return void only"); 4523 4524 SDValue Glue; 4525 SmallVector<SDValue, 4> RetOps(1, Chain); 4526 4527 // Copy the result values into the output registers. 4528 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 4529 SDValue Val = OutVals[i]; 4530 CCValAssign &VA = RVLocs[i]; 4531 assert(VA.isRegLoc() && "Can only return in registers!"); 4532 4533 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 4534 // Handle returning f64 on RV32D with a soft float ABI. 4535 assert(VA.isRegLoc() && "Expected return via registers"); 4536 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 4537 DAG.getVTList(MVT::i32, MVT::i32), Val); 4538 SDValue Lo = SplitF64.getValue(0); 4539 SDValue Hi = SplitF64.getValue(1); 4540 Register RegLo = VA.getLocReg(); 4541 assert(RegLo < RISCV::X31 && "Invalid register pair"); 4542 Register RegHi = RegLo + 1; 4543 4544 if (STI.isRegisterReservedByUser(RegLo) || 4545 STI.isRegisterReservedByUser(RegHi)) 4546 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 4547 MF.getFunction(), 4548 "Return value register required, but has been reserved."}); 4549 4550 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 4551 Glue = Chain.getValue(1); 4552 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 4553 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 4554 Glue = Chain.getValue(1); 4555 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 4556 } else { 4557 // Handle a 'normal' return. 4558 Val = convertValVTToLocVT(DAG, Val, VA, DL); 4559 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 4560 4561 if (STI.isRegisterReservedByUser(VA.getLocReg())) 4562 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 4563 MF.getFunction(), 4564 "Return value register required, but has been reserved."}); 4565 4566 // Guarantee that all emitted copies are stuck together. 4567 Glue = Chain.getValue(1); 4568 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 4569 } 4570 } 4571 4572 RetOps[0] = Chain; // Update chain. 4573 4574 // Add the glue node if we have it. 4575 if (Glue.getNode()) { 4576 RetOps.push_back(Glue); 4577 } 4578 4579 // Interrupt service routines use different return instructions. 4580 const Function &Func = DAG.getMachineFunction().getFunction(); 4581 if (Func.hasFnAttribute("interrupt")) { 4582 if (!Func.getReturnType()->isVoidTy()) 4583 report_fatal_error( 4584 "Functions with the interrupt attribute must have void return type!"); 4585 4586 MachineFunction &MF = DAG.getMachineFunction(); 4587 StringRef Kind = 4588 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 4589 4590 unsigned RetOpc; 4591 if (Kind == "user") 4592 RetOpc = RISCVISD::URET_FLAG; 4593 else if (Kind == "supervisor") 4594 RetOpc = RISCVISD::SRET_FLAG; 4595 else 4596 RetOpc = RISCVISD::MRET_FLAG; 4597 4598 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 4599 } 4600 4601 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 4602 } 4603 4604 void RISCVTargetLowering::validateCCReservedRegs( 4605 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 4606 MachineFunction &MF) const { 4607 const Function &F = MF.getFunction(); 4608 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 4609 4610 if (llvm::any_of(Regs, [&STI](auto Reg) { 4611 return STI.isRegisterReservedByUser(Reg.first); 4612 })) 4613 F.getContext().diagnose(DiagnosticInfoUnsupported{ 4614 F, "Argument register required, but has been reserved."}); 4615 } 4616 4617 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 4618 return CI->isTailCall(); 4619 } 4620 4621 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 4622 #define NODE_NAME_CASE(NODE) \ 4623 case RISCVISD::NODE: \ 4624 return "RISCVISD::" #NODE; 4625 // clang-format off 4626 switch ((RISCVISD::NodeType)Opcode) { 4627 case RISCVISD::FIRST_NUMBER: 4628 break; 4629 NODE_NAME_CASE(RET_FLAG) 4630 NODE_NAME_CASE(URET_FLAG) 4631 NODE_NAME_CASE(SRET_FLAG) 4632 NODE_NAME_CASE(MRET_FLAG) 4633 NODE_NAME_CASE(CALL) 4634 NODE_NAME_CASE(SELECT_CC) 4635 NODE_NAME_CASE(BuildPairF64) 4636 NODE_NAME_CASE(SplitF64) 4637 NODE_NAME_CASE(TAIL) 4638 NODE_NAME_CASE(SLLW) 4639 NODE_NAME_CASE(SRAW) 4640 NODE_NAME_CASE(SRLW) 4641 NODE_NAME_CASE(DIVW) 4642 NODE_NAME_CASE(DIVUW) 4643 NODE_NAME_CASE(REMUW) 4644 NODE_NAME_CASE(ROLW) 4645 NODE_NAME_CASE(RORW) 4646 NODE_NAME_CASE(FSLW) 4647 NODE_NAME_CASE(FSRW) 4648 NODE_NAME_CASE(FSL) 4649 NODE_NAME_CASE(FSR) 4650 NODE_NAME_CASE(FMV_H_X) 4651 NODE_NAME_CASE(FMV_X_ANYEXTH) 4652 NODE_NAME_CASE(FMV_W_X_RV64) 4653 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 4654 NODE_NAME_CASE(READ_CYCLE_WIDE) 4655 NODE_NAME_CASE(GREVI) 4656 NODE_NAME_CASE(GREVIW) 4657 NODE_NAME_CASE(GORCI) 4658 NODE_NAME_CASE(GORCIW) 4659 NODE_NAME_CASE(VMV_V_X_VL) 4660 NODE_NAME_CASE(VFMV_V_F_VL) 4661 NODE_NAME_CASE(VMV_X_S) 4662 NODE_NAME_CASE(SPLAT_VECTOR_I64) 4663 NODE_NAME_CASE(READ_VLENB) 4664 NODE_NAME_CASE(TRUNCATE_VECTOR) 4665 NODE_NAME_CASE(VLEFF) 4666 NODE_NAME_CASE(VLEFF_MASK) 4667 NODE_NAME_CASE(VSLIDEUP) 4668 NODE_NAME_CASE(VSLIDEDOWN) 4669 NODE_NAME_CASE(VID_VL) 4670 NODE_NAME_CASE(VFNCVT_ROD) 4671 NODE_NAME_CASE(VECREDUCE_ADD) 4672 NODE_NAME_CASE(VECREDUCE_UMAX) 4673 NODE_NAME_CASE(VECREDUCE_SMAX) 4674 NODE_NAME_CASE(VECREDUCE_UMIN) 4675 NODE_NAME_CASE(VECREDUCE_SMIN) 4676 NODE_NAME_CASE(VECREDUCE_AND) 4677 NODE_NAME_CASE(VECREDUCE_OR) 4678 NODE_NAME_CASE(VECREDUCE_XOR) 4679 NODE_NAME_CASE(VECREDUCE_FADD) 4680 NODE_NAME_CASE(VECREDUCE_SEQ_FADD) 4681 NODE_NAME_CASE(ADD_VL) 4682 NODE_NAME_CASE(AND_VL) 4683 NODE_NAME_CASE(MUL_VL) 4684 NODE_NAME_CASE(OR_VL) 4685 NODE_NAME_CASE(SDIV_VL) 4686 NODE_NAME_CASE(SHL_VL) 4687 NODE_NAME_CASE(SREM_VL) 4688 NODE_NAME_CASE(SRA_VL) 4689 NODE_NAME_CASE(SRL_VL) 4690 NODE_NAME_CASE(SUB_VL) 4691 NODE_NAME_CASE(UDIV_VL) 4692 NODE_NAME_CASE(UREM_VL) 4693 NODE_NAME_CASE(XOR_VL) 4694 NODE_NAME_CASE(FADD_VL) 4695 NODE_NAME_CASE(FSUB_VL) 4696 NODE_NAME_CASE(FMUL_VL) 4697 NODE_NAME_CASE(FDIV_VL) 4698 NODE_NAME_CASE(FNEG_VL) 4699 NODE_NAME_CASE(FMA_VL) 4700 NODE_NAME_CASE(VMCLR_VL) 4701 NODE_NAME_CASE(VMSET_VL) 4702 NODE_NAME_CASE(VRGATHER_VX_VL) 4703 NODE_NAME_CASE(VLE_VL) 4704 NODE_NAME_CASE(VSE_VL) 4705 } 4706 // clang-format on 4707 return nullptr; 4708 #undef NODE_NAME_CASE 4709 } 4710 4711 /// getConstraintType - Given a constraint letter, return the type of 4712 /// constraint it is for this target. 4713 RISCVTargetLowering::ConstraintType 4714 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 4715 if (Constraint.size() == 1) { 4716 switch (Constraint[0]) { 4717 default: 4718 break; 4719 case 'f': 4720 return C_RegisterClass; 4721 case 'I': 4722 case 'J': 4723 case 'K': 4724 return C_Immediate; 4725 case 'A': 4726 return C_Memory; 4727 } 4728 } 4729 return TargetLowering::getConstraintType(Constraint); 4730 } 4731 4732 std::pair<unsigned, const TargetRegisterClass *> 4733 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 4734 StringRef Constraint, 4735 MVT VT) const { 4736 // First, see if this is a constraint that directly corresponds to a 4737 // RISCV register class. 4738 if (Constraint.size() == 1) { 4739 switch (Constraint[0]) { 4740 case 'r': 4741 return std::make_pair(0U, &RISCV::GPRRegClass); 4742 case 'f': 4743 if (Subtarget.hasStdExtZfh() && VT == MVT::f16) 4744 return std::make_pair(0U, &RISCV::FPR16RegClass); 4745 if (Subtarget.hasStdExtF() && VT == MVT::f32) 4746 return std::make_pair(0U, &RISCV::FPR32RegClass); 4747 if (Subtarget.hasStdExtD() && VT == MVT::f64) 4748 return std::make_pair(0U, &RISCV::FPR64RegClass); 4749 break; 4750 default: 4751 break; 4752 } 4753 } 4754 4755 // Clang will correctly decode the usage of register name aliases into their 4756 // official names. However, other frontends like `rustc` do not. This allows 4757 // users of these frontends to use the ABI names for registers in LLVM-style 4758 // register constraints. 4759 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 4760 .Case("{zero}", RISCV::X0) 4761 .Case("{ra}", RISCV::X1) 4762 .Case("{sp}", RISCV::X2) 4763 .Case("{gp}", RISCV::X3) 4764 .Case("{tp}", RISCV::X4) 4765 .Case("{t0}", RISCV::X5) 4766 .Case("{t1}", RISCV::X6) 4767 .Case("{t2}", RISCV::X7) 4768 .Cases("{s0}", "{fp}", RISCV::X8) 4769 .Case("{s1}", RISCV::X9) 4770 .Case("{a0}", RISCV::X10) 4771 .Case("{a1}", RISCV::X11) 4772 .Case("{a2}", RISCV::X12) 4773 .Case("{a3}", RISCV::X13) 4774 .Case("{a4}", RISCV::X14) 4775 .Case("{a5}", RISCV::X15) 4776 .Case("{a6}", RISCV::X16) 4777 .Case("{a7}", RISCV::X17) 4778 .Case("{s2}", RISCV::X18) 4779 .Case("{s3}", RISCV::X19) 4780 .Case("{s4}", RISCV::X20) 4781 .Case("{s5}", RISCV::X21) 4782 .Case("{s6}", RISCV::X22) 4783 .Case("{s7}", RISCV::X23) 4784 .Case("{s8}", RISCV::X24) 4785 .Case("{s9}", RISCV::X25) 4786 .Case("{s10}", RISCV::X26) 4787 .Case("{s11}", RISCV::X27) 4788 .Case("{t3}", RISCV::X28) 4789 .Case("{t4}", RISCV::X29) 4790 .Case("{t5}", RISCV::X30) 4791 .Case("{t6}", RISCV::X31) 4792 .Default(RISCV::NoRegister); 4793 if (XRegFromAlias != RISCV::NoRegister) 4794 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 4795 4796 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 4797 // TableGen record rather than the AsmName to choose registers for InlineAsm 4798 // constraints, plus we want to match those names to the widest floating point 4799 // register type available, manually select floating point registers here. 4800 // 4801 // The second case is the ABI name of the register, so that frontends can also 4802 // use the ABI names in register constraint lists. 4803 if (Subtarget.hasStdExtF()) { 4804 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 4805 .Cases("{f0}", "{ft0}", RISCV::F0_F) 4806 .Cases("{f1}", "{ft1}", RISCV::F1_F) 4807 .Cases("{f2}", "{ft2}", RISCV::F2_F) 4808 .Cases("{f3}", "{ft3}", RISCV::F3_F) 4809 .Cases("{f4}", "{ft4}", RISCV::F4_F) 4810 .Cases("{f5}", "{ft5}", RISCV::F5_F) 4811 .Cases("{f6}", "{ft6}", RISCV::F6_F) 4812 .Cases("{f7}", "{ft7}", RISCV::F7_F) 4813 .Cases("{f8}", "{fs0}", RISCV::F8_F) 4814 .Cases("{f9}", "{fs1}", RISCV::F9_F) 4815 .Cases("{f10}", "{fa0}", RISCV::F10_F) 4816 .Cases("{f11}", "{fa1}", RISCV::F11_F) 4817 .Cases("{f12}", "{fa2}", RISCV::F12_F) 4818 .Cases("{f13}", "{fa3}", RISCV::F13_F) 4819 .Cases("{f14}", "{fa4}", RISCV::F14_F) 4820 .Cases("{f15}", "{fa5}", RISCV::F15_F) 4821 .Cases("{f16}", "{fa6}", RISCV::F16_F) 4822 .Cases("{f17}", "{fa7}", RISCV::F17_F) 4823 .Cases("{f18}", "{fs2}", RISCV::F18_F) 4824 .Cases("{f19}", "{fs3}", RISCV::F19_F) 4825 .Cases("{f20}", "{fs4}", RISCV::F20_F) 4826 .Cases("{f21}", "{fs5}", RISCV::F21_F) 4827 .Cases("{f22}", "{fs6}", RISCV::F22_F) 4828 .Cases("{f23}", "{fs7}", RISCV::F23_F) 4829 .Cases("{f24}", "{fs8}", RISCV::F24_F) 4830 .Cases("{f25}", "{fs9}", RISCV::F25_F) 4831 .Cases("{f26}", "{fs10}", RISCV::F26_F) 4832 .Cases("{f27}", "{fs11}", RISCV::F27_F) 4833 .Cases("{f28}", "{ft8}", RISCV::F28_F) 4834 .Cases("{f29}", "{ft9}", RISCV::F29_F) 4835 .Cases("{f30}", "{ft10}", RISCV::F30_F) 4836 .Cases("{f31}", "{ft11}", RISCV::F31_F) 4837 .Default(RISCV::NoRegister); 4838 if (FReg != RISCV::NoRegister) { 4839 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 4840 if (Subtarget.hasStdExtD()) { 4841 unsigned RegNo = FReg - RISCV::F0_F; 4842 unsigned DReg = RISCV::F0_D + RegNo; 4843 return std::make_pair(DReg, &RISCV::FPR64RegClass); 4844 } 4845 return std::make_pair(FReg, &RISCV::FPR32RegClass); 4846 } 4847 } 4848 4849 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 4850 } 4851 4852 unsigned 4853 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 4854 // Currently only support length 1 constraints. 4855 if (ConstraintCode.size() == 1) { 4856 switch (ConstraintCode[0]) { 4857 case 'A': 4858 return InlineAsm::Constraint_A; 4859 default: 4860 break; 4861 } 4862 } 4863 4864 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 4865 } 4866 4867 void RISCVTargetLowering::LowerAsmOperandForConstraint( 4868 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 4869 SelectionDAG &DAG) const { 4870 // Currently only support length 1 constraints. 4871 if (Constraint.length() == 1) { 4872 switch (Constraint[0]) { 4873 case 'I': 4874 // Validate & create a 12-bit signed immediate operand. 4875 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4876 uint64_t CVal = C->getSExtValue(); 4877 if (isInt<12>(CVal)) 4878 Ops.push_back( 4879 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 4880 } 4881 return; 4882 case 'J': 4883 // Validate & create an integer zero operand. 4884 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 4885 if (C->getZExtValue() == 0) 4886 Ops.push_back( 4887 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 4888 return; 4889 case 'K': 4890 // Validate & create a 5-bit unsigned immediate operand. 4891 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4892 uint64_t CVal = C->getZExtValue(); 4893 if (isUInt<5>(CVal)) 4894 Ops.push_back( 4895 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 4896 } 4897 return; 4898 default: 4899 break; 4900 } 4901 } 4902 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 4903 } 4904 4905 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 4906 Instruction *Inst, 4907 AtomicOrdering Ord) const { 4908 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 4909 return Builder.CreateFence(Ord); 4910 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 4911 return Builder.CreateFence(AtomicOrdering::Release); 4912 return nullptr; 4913 } 4914 4915 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 4916 Instruction *Inst, 4917 AtomicOrdering Ord) const { 4918 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 4919 return Builder.CreateFence(AtomicOrdering::Acquire); 4920 return nullptr; 4921 } 4922 4923 TargetLowering::AtomicExpansionKind 4924 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 4925 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 4926 // point operations can't be used in an lr/sc sequence without breaking the 4927 // forward-progress guarantee. 4928 if (AI->isFloatingPointOperation()) 4929 return AtomicExpansionKind::CmpXChg; 4930 4931 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 4932 if (Size == 8 || Size == 16) 4933 return AtomicExpansionKind::MaskedIntrinsic; 4934 return AtomicExpansionKind::None; 4935 } 4936 4937 static Intrinsic::ID 4938 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 4939 if (XLen == 32) { 4940 switch (BinOp) { 4941 default: 4942 llvm_unreachable("Unexpected AtomicRMW BinOp"); 4943 case AtomicRMWInst::Xchg: 4944 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 4945 case AtomicRMWInst::Add: 4946 return Intrinsic::riscv_masked_atomicrmw_add_i32; 4947 case AtomicRMWInst::Sub: 4948 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 4949 case AtomicRMWInst::Nand: 4950 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 4951 case AtomicRMWInst::Max: 4952 return Intrinsic::riscv_masked_atomicrmw_max_i32; 4953 case AtomicRMWInst::Min: 4954 return Intrinsic::riscv_masked_atomicrmw_min_i32; 4955 case AtomicRMWInst::UMax: 4956 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 4957 case AtomicRMWInst::UMin: 4958 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 4959 } 4960 } 4961 4962 if (XLen == 64) { 4963 switch (BinOp) { 4964 default: 4965 llvm_unreachable("Unexpected AtomicRMW BinOp"); 4966 case AtomicRMWInst::Xchg: 4967 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 4968 case AtomicRMWInst::Add: 4969 return Intrinsic::riscv_masked_atomicrmw_add_i64; 4970 case AtomicRMWInst::Sub: 4971 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 4972 case AtomicRMWInst::Nand: 4973 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 4974 case AtomicRMWInst::Max: 4975 return Intrinsic::riscv_masked_atomicrmw_max_i64; 4976 case AtomicRMWInst::Min: 4977 return Intrinsic::riscv_masked_atomicrmw_min_i64; 4978 case AtomicRMWInst::UMax: 4979 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 4980 case AtomicRMWInst::UMin: 4981 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 4982 } 4983 } 4984 4985 llvm_unreachable("Unexpected XLen\n"); 4986 } 4987 4988 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 4989 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 4990 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 4991 unsigned XLen = Subtarget.getXLen(); 4992 Value *Ordering = 4993 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 4994 Type *Tys[] = {AlignedAddr->getType()}; 4995 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 4996 AI->getModule(), 4997 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 4998 4999 if (XLen == 64) { 5000 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 5001 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 5002 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 5003 } 5004 5005 Value *Result; 5006 5007 // Must pass the shift amount needed to sign extend the loaded value prior 5008 // to performing a signed comparison for min/max. ShiftAmt is the number of 5009 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 5010 // is the number of bits to left+right shift the value in order to 5011 // sign-extend. 5012 if (AI->getOperation() == AtomicRMWInst::Min || 5013 AI->getOperation() == AtomicRMWInst::Max) { 5014 const DataLayout &DL = AI->getModule()->getDataLayout(); 5015 unsigned ValWidth = 5016 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 5017 Value *SextShamt = 5018 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 5019 Result = Builder.CreateCall(LrwOpScwLoop, 5020 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 5021 } else { 5022 Result = 5023 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 5024 } 5025 5026 if (XLen == 64) 5027 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 5028 return Result; 5029 } 5030 5031 TargetLowering::AtomicExpansionKind 5032 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 5033 AtomicCmpXchgInst *CI) const { 5034 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 5035 if (Size == 8 || Size == 16) 5036 return AtomicExpansionKind::MaskedIntrinsic; 5037 return AtomicExpansionKind::None; 5038 } 5039 5040 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 5041 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 5042 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 5043 unsigned XLen = Subtarget.getXLen(); 5044 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 5045 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 5046 if (XLen == 64) { 5047 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 5048 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 5049 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 5050 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 5051 } 5052 Type *Tys[] = {AlignedAddr->getType()}; 5053 Function *MaskedCmpXchg = 5054 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 5055 Value *Result = Builder.CreateCall( 5056 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 5057 if (XLen == 64) 5058 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 5059 return Result; 5060 } 5061 5062 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 5063 EVT VT) const { 5064 VT = VT.getScalarType(); 5065 5066 if (!VT.isSimple()) 5067 return false; 5068 5069 switch (VT.getSimpleVT().SimpleTy) { 5070 case MVT::f16: 5071 return Subtarget.hasStdExtZfh(); 5072 case MVT::f32: 5073 return Subtarget.hasStdExtF(); 5074 case MVT::f64: 5075 return Subtarget.hasStdExtD(); 5076 default: 5077 break; 5078 } 5079 5080 return false; 5081 } 5082 5083 Register RISCVTargetLowering::getExceptionPointerRegister( 5084 const Constant *PersonalityFn) const { 5085 return RISCV::X10; 5086 } 5087 5088 Register RISCVTargetLowering::getExceptionSelectorRegister( 5089 const Constant *PersonalityFn) const { 5090 return RISCV::X11; 5091 } 5092 5093 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 5094 // Return false to suppress the unnecessary extensions if the LibCall 5095 // arguments or return value is f32 type for LP64 ABI. 5096 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 5097 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 5098 return false; 5099 5100 return true; 5101 } 5102 5103 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 5104 if (Subtarget.is64Bit() && Type == MVT::i32) 5105 return true; 5106 5107 return IsSigned; 5108 } 5109 5110 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 5111 SDValue C) const { 5112 // Check integral scalar types. 5113 if (VT.isScalarInteger()) { 5114 // Omit the optimization if the sub target has the M extension and the data 5115 // size exceeds XLen. 5116 if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) 5117 return false; 5118 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 5119 // Break the MUL to a SLLI and an ADD/SUB. 5120 const APInt &Imm = ConstNode->getAPIntValue(); 5121 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 5122 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 5123 return true; 5124 // Omit the following optimization if the sub target has the M extension 5125 // and the data size >= XLen. 5126 if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) 5127 return false; 5128 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 5129 // a pair of LUI/ADDI. 5130 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { 5131 APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); 5132 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 5133 (1 - ImmS).isPowerOf2()) 5134 return true; 5135 } 5136 } 5137 } 5138 5139 return false; 5140 } 5141 5142 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { 5143 if (!Subtarget.useRVVForFixedLengthVectors()) 5144 return false; 5145 5146 if (!VT.isFixedLengthVector()) 5147 return false; 5148 5149 // Don't use RVV for vectors we cannot scalarize if required. 5150 switch (VT.getVectorElementType().SimpleTy) { 5151 default: 5152 return false; 5153 case MVT::i1: 5154 case MVT::i8: 5155 case MVT::i16: 5156 case MVT::i32: 5157 case MVT::i64: 5158 break; 5159 case MVT::f16: 5160 if (!Subtarget.hasStdExtZfh()) 5161 return false; 5162 break; 5163 case MVT::f32: 5164 if (!Subtarget.hasStdExtF()) 5165 return false; 5166 break; 5167 case MVT::f64: 5168 if (!Subtarget.hasStdExtD()) 5169 return false; 5170 break; 5171 } 5172 5173 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 5174 // Don't use RVV for types that don't fit. 5175 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) 5176 return false; 5177 5178 // TODO: Perhaps an artificial restriction, but worth having whilst getting 5179 // the base fixed length RVV support in place. 5180 if (!VT.isPow2VectorType()) 5181 return false; 5182 5183 return true; 5184 } 5185 5186 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( 5187 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 5188 bool *Fast) const { 5189 if (!VT.isScalableVector()) 5190 return false; 5191 5192 EVT ElemVT = VT.getVectorElementType(); 5193 if (Alignment >= ElemVT.getStoreSize()) { 5194 if (Fast) 5195 *Fast = true; 5196 return true; 5197 } 5198 5199 return false; 5200 } 5201 5202 #define GET_REGISTER_MATCHER 5203 #include "RISCVGenAsmMatcher.inc" 5204 5205 Register 5206 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 5207 const MachineFunction &MF) const { 5208 Register Reg = MatchRegisterAltName(RegName); 5209 if (Reg == RISCV::NoRegister) 5210 Reg = MatchRegisterName(RegName); 5211 if (Reg == RISCV::NoRegister) 5212 report_fatal_error( 5213 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 5214 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 5215 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 5216 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 5217 StringRef(RegName) + "\".")); 5218 return Reg; 5219 } 5220 5221 namespace llvm { 5222 namespace RISCVVIntrinsicsTable { 5223 5224 #define GET_RISCVVIntrinsicsTable_IMPL 5225 #include "RISCVGenSearchableTables.inc" 5226 5227 } // namespace RISCVVIntrinsicsTable 5228 5229 namespace RISCVZvlssegTable { 5230 5231 #define GET_RISCVZvlssegTable_IMPL 5232 #include "RISCVGenSearchableTables.inc" 5233 5234 } // namespace RISCVZvlssegTable 5235 } // namespace llvm 5236