1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/IntrinsicsRISCV.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/KnownBits.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "riscv-lower" 42 43 STATISTIC(NumTailCalls, "Number of tail calls"); 44 45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 46 const RISCVSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 if (Subtarget.isRV32E()) 50 report_fatal_error("Codegen not yet implemented for RV32E"); 51 52 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 53 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 54 55 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 56 !Subtarget.hasStdExtF()) { 57 errs() << "Hard-float 'f' ABI can't be used for a target that " 58 "doesn't support the F instruction set extension (ignoring " 59 "target-abi)\n"; 60 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 61 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 62 !Subtarget.hasStdExtD()) { 63 errs() << "Hard-float 'd' ABI can't be used for a target that " 64 "doesn't support the D instruction set extension (ignoring " 65 "target-abi)\n"; 66 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 67 } 68 69 switch (ABI) { 70 default: 71 report_fatal_error("Don't know how to lower this ABI"); 72 case RISCVABI::ABI_ILP32: 73 case RISCVABI::ABI_ILP32F: 74 case RISCVABI::ABI_ILP32D: 75 case RISCVABI::ABI_LP64: 76 case RISCVABI::ABI_LP64F: 77 case RISCVABI::ABI_LP64D: 78 break; 79 } 80 81 MVT XLenVT = Subtarget.getXLenVT(); 82 83 // Set up the register classes. 84 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 85 86 if (Subtarget.hasStdExtZfh()) 87 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 88 if (Subtarget.hasStdExtF()) 89 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 90 if (Subtarget.hasStdExtD()) 91 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 92 93 static const MVT::SimpleValueType BoolVecVTs[] = { 94 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 95 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 96 static const MVT::SimpleValueType IntVecVTs[] = { 97 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 98 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 99 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 100 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 101 MVT::nxv4i64, MVT::nxv8i64}; 102 static const MVT::SimpleValueType F16VecVTs[] = { 103 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 104 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 105 static const MVT::SimpleValueType F32VecVTs[] = { 106 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 107 static const MVT::SimpleValueType F64VecVTs[] = { 108 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 109 110 if (Subtarget.hasStdExtV()) { 111 auto addRegClassForRVV = [this](MVT VT) { 112 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 113 assert(Size <= 512 && isPowerOf2_32(Size)); 114 const TargetRegisterClass *RC; 115 if (Size <= 64) 116 RC = &RISCV::VRRegClass; 117 else if (Size == 128) 118 RC = &RISCV::VRM2RegClass; 119 else if (Size == 256) 120 RC = &RISCV::VRM4RegClass; 121 else 122 RC = &RISCV::VRM8RegClass; 123 124 addRegisterClass(VT, RC); 125 }; 126 127 for (MVT VT : BoolVecVTs) 128 addRegClassForRVV(VT); 129 for (MVT VT : IntVecVTs) 130 addRegClassForRVV(VT); 131 132 if (Subtarget.hasStdExtZfh()) 133 for (MVT VT : F16VecVTs) 134 addRegClassForRVV(VT); 135 136 if (Subtarget.hasStdExtF()) 137 for (MVT VT : F32VecVTs) 138 addRegClassForRVV(VT); 139 140 if (Subtarget.hasStdExtD()) 141 for (MVT VT : F64VecVTs) 142 addRegClassForRVV(VT); 143 } 144 145 // Compute derived properties from the register classes. 146 computeRegisterProperties(STI.getRegisterInfo()); 147 148 setStackPointerRegisterToSaveRestore(RISCV::X2); 149 150 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 151 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 152 153 // TODO: add all necessary setOperationAction calls. 154 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 155 156 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 157 setOperationAction(ISD::BR_CC, XLenVT, Expand); 158 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 159 160 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 161 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 162 163 setOperationAction(ISD::VASTART, MVT::Other, Custom); 164 setOperationAction(ISD::VAARG, MVT::Other, Expand); 165 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 166 setOperationAction(ISD::VAEND, MVT::Other, Expand); 167 168 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 169 if (!Subtarget.hasStdExtZbb()) { 170 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 171 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 172 } 173 174 if (Subtarget.is64Bit()) { 175 setOperationAction(ISD::ADD, MVT::i32, Custom); 176 setOperationAction(ISD::SUB, MVT::i32, Custom); 177 setOperationAction(ISD::SHL, MVT::i32, Custom); 178 setOperationAction(ISD::SRA, MVT::i32, Custom); 179 setOperationAction(ISD::SRL, MVT::i32, Custom); 180 } 181 182 if (!Subtarget.hasStdExtM()) { 183 setOperationAction(ISD::MUL, XLenVT, Expand); 184 setOperationAction(ISD::MULHS, XLenVT, Expand); 185 setOperationAction(ISD::MULHU, XLenVT, Expand); 186 setOperationAction(ISD::SDIV, XLenVT, Expand); 187 setOperationAction(ISD::UDIV, XLenVT, Expand); 188 setOperationAction(ISD::SREM, XLenVT, Expand); 189 setOperationAction(ISD::UREM, XLenVT, Expand); 190 } 191 192 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 193 setOperationAction(ISD::MUL, MVT::i32, Custom); 194 195 setOperationAction(ISD::SDIV, MVT::i8, Custom); 196 setOperationAction(ISD::UDIV, MVT::i8, Custom); 197 setOperationAction(ISD::UREM, MVT::i8, Custom); 198 setOperationAction(ISD::SDIV, MVT::i16, Custom); 199 setOperationAction(ISD::UDIV, MVT::i16, Custom); 200 setOperationAction(ISD::UREM, MVT::i16, Custom); 201 setOperationAction(ISD::SDIV, MVT::i32, Custom); 202 setOperationAction(ISD::UDIV, MVT::i32, Custom); 203 setOperationAction(ISD::UREM, MVT::i32, Custom); 204 } 205 206 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 207 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 208 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 209 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 210 211 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 212 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 213 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 214 215 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { 216 if (Subtarget.is64Bit()) { 217 setOperationAction(ISD::ROTL, MVT::i32, Custom); 218 setOperationAction(ISD::ROTR, MVT::i32, Custom); 219 } 220 } else { 221 setOperationAction(ISD::ROTL, XLenVT, Expand); 222 setOperationAction(ISD::ROTR, XLenVT, Expand); 223 } 224 225 if (Subtarget.hasStdExtZbp()) { 226 // Custom lower bswap/bitreverse so we can convert them to GREVI to enable 227 // more combining. 228 setOperationAction(ISD::BITREVERSE, XLenVT, Custom); 229 setOperationAction(ISD::BSWAP, XLenVT, Custom); 230 231 if (Subtarget.is64Bit()) { 232 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 233 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 234 } 235 } else { 236 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 237 // pattern match it directly in isel. 238 setOperationAction(ISD::BSWAP, XLenVT, 239 Subtarget.hasStdExtZbb() ? Legal : Expand); 240 } 241 242 if (Subtarget.hasStdExtZbb()) { 243 setOperationAction(ISD::SMIN, XLenVT, Legal); 244 setOperationAction(ISD::SMAX, XLenVT, Legal); 245 setOperationAction(ISD::UMIN, XLenVT, Legal); 246 setOperationAction(ISD::UMAX, XLenVT, Legal); 247 } else { 248 setOperationAction(ISD::CTTZ, XLenVT, Expand); 249 setOperationAction(ISD::CTLZ, XLenVT, Expand); 250 setOperationAction(ISD::CTPOP, XLenVT, Expand); 251 } 252 253 if (Subtarget.hasStdExtZbt()) { 254 setOperationAction(ISD::FSHL, XLenVT, Custom); 255 setOperationAction(ISD::FSHR, XLenVT, Custom); 256 setOperationAction(ISD::SELECT, XLenVT, Legal); 257 258 if (Subtarget.is64Bit()) { 259 setOperationAction(ISD::FSHL, MVT::i32, Custom); 260 setOperationAction(ISD::FSHR, MVT::i32, Custom); 261 } 262 } else { 263 setOperationAction(ISD::SELECT, XLenVT, Custom); 264 } 265 266 ISD::CondCode FPCCToExpand[] = { 267 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 268 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 269 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 270 271 ISD::NodeType FPOpToExpand[] = { 272 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, 273 ISD::FP_TO_FP16}; 274 275 if (Subtarget.hasStdExtZfh()) 276 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 277 278 if (Subtarget.hasStdExtZfh()) { 279 setOperationAction(ISD::FMINNUM, MVT::f16, Legal); 280 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); 281 for (auto CC : FPCCToExpand) 282 setCondCodeAction(CC, MVT::f16, Expand); 283 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 284 setOperationAction(ISD::SELECT, MVT::f16, Custom); 285 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 286 for (auto Op : FPOpToExpand) 287 setOperationAction(Op, MVT::f16, Expand); 288 } 289 290 if (Subtarget.hasStdExtF()) { 291 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 292 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 293 for (auto CC : FPCCToExpand) 294 setCondCodeAction(CC, MVT::f32, Expand); 295 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 296 setOperationAction(ISD::SELECT, MVT::f32, Custom); 297 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 298 for (auto Op : FPOpToExpand) 299 setOperationAction(Op, MVT::f32, Expand); 300 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 301 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 302 } 303 304 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 305 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 306 307 if (Subtarget.hasStdExtD()) { 308 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 309 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 310 for (auto CC : FPCCToExpand) 311 setCondCodeAction(CC, MVT::f64, Expand); 312 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 313 setOperationAction(ISD::SELECT, MVT::f64, Custom); 314 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 315 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 316 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 317 for (auto Op : FPOpToExpand) 318 setOperationAction(Op, MVT::f64, Expand); 319 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 320 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 321 } 322 323 if (Subtarget.is64Bit()) { 324 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 325 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 326 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 327 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 328 } 329 330 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 331 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 332 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 333 setOperationAction(ISD::JumpTable, XLenVT, Custom); 334 335 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 336 337 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 338 // Unfortunately this can't be determined just from the ISA naming string. 339 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 340 Subtarget.is64Bit() ? Legal : Custom); 341 342 setOperationAction(ISD::TRAP, MVT::Other, Legal); 343 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 344 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 345 346 if (Subtarget.hasStdExtA()) { 347 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 348 setMinCmpXchgSizeInBits(32); 349 } else { 350 setMaxAtomicSizeInBitsSupported(0); 351 } 352 353 setBooleanContents(ZeroOrOneBooleanContent); 354 355 if (Subtarget.hasStdExtV()) { 356 setBooleanVectorContents(ZeroOrOneBooleanContent); 357 358 setOperationAction(ISD::VSCALE, XLenVT, Custom); 359 360 // RVV intrinsics may have illegal operands. 361 // We also need to custom legalize vmv.x.s. 362 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); 363 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); 364 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 365 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); 366 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 367 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 368 369 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 370 371 if (Subtarget.is64Bit()) { 372 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 373 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 374 } else { 375 // We must custom-lower certain vXi64 operations on RV32 due to the vector 376 // element type being illegal. 377 setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom); 378 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); 379 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); 380 381 setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom); 382 setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom); 383 setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom); 384 setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom); 385 setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom); 386 setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom); 387 setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom); 388 setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom); 389 } 390 391 for (MVT VT : BoolVecVTs) { 392 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 393 394 // Mask VTs are custom-expanded into a series of standard nodes 395 setOperationAction(ISD::TRUNCATE, VT, Custom); 396 } 397 398 for (MVT VT : IntVecVTs) { 399 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 400 401 setOperationAction(ISD::SMIN, VT, Legal); 402 setOperationAction(ISD::SMAX, VT, Legal); 403 setOperationAction(ISD::UMIN, VT, Legal); 404 setOperationAction(ISD::UMAX, VT, Legal); 405 406 setOperationAction(ISD::ROTL, VT, Expand); 407 setOperationAction(ISD::ROTR, VT, Expand); 408 409 // Custom-lower extensions and truncations from/to mask types. 410 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 411 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 412 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 413 414 // RVV has native int->float & float->int conversions where the 415 // element type sizes are within one power-of-two of each other. Any 416 // wider distances between type sizes have to be lowered as sequences 417 // which progressively narrow the gap in stages. 418 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 419 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 420 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 421 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 422 423 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR" 424 // nodes which truncate by one power of two at a time. 425 setOperationAction(ISD::TRUNCATE, VT, Custom); 426 427 // Custom-lower insert/extract operations to simplify patterns. 428 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 429 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 430 431 // Custom-lower reduction operations to set up the corresponding custom 432 // nodes' operands. 433 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 434 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 435 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 436 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 437 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 438 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 439 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 440 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 441 } 442 443 // Expand various CCs to best match the RVV ISA, which natively supports UNE 444 // but no other unordered comparisons, and supports all ordered comparisons 445 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 446 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 447 // and we pattern-match those back to the "original", swapping operands once 448 // more. This way we catch both operations and both "vf" and "fv" forms with 449 // fewer patterns. 450 ISD::CondCode VFPCCToExpand[] = { 451 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 452 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 453 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 454 }; 455 456 // Sets common operation actions on RVV floating-point vector types. 457 const auto SetCommonVFPActions = [&](MVT VT) { 458 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 459 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 460 // sizes are within one power-of-two of each other. Therefore conversions 461 // between vXf16 and vXf64 must be lowered as sequences which convert via 462 // vXf32. 463 setOperationAction(ISD::FP_ROUND, VT, Custom); 464 setOperationAction(ISD::FP_EXTEND, VT, Custom); 465 // Custom-lower insert/extract operations to simplify patterns. 466 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 467 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 468 // Expand various condition codes (explained above). 469 for (auto CC : VFPCCToExpand) 470 setCondCodeAction(CC, VT, Expand); 471 }; 472 473 if (Subtarget.hasStdExtZfh()) 474 for (MVT VT : F16VecVTs) 475 SetCommonVFPActions(VT); 476 477 if (Subtarget.hasStdExtF()) 478 for (MVT VT : F32VecVTs) 479 SetCommonVFPActions(VT); 480 481 if (Subtarget.hasStdExtD()) 482 for (MVT VT : F64VecVTs) 483 SetCommonVFPActions(VT); 484 } 485 486 // Function alignments. 487 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 488 setMinFunctionAlignment(FunctionAlignment); 489 setPrefFunctionAlignment(FunctionAlignment); 490 491 setMinimumJumpTableEntries(5); 492 493 // Jumps are expensive, compared to logic 494 setJumpIsExpensive(); 495 496 // We can use any register for comparisons 497 setHasMultipleConditionRegisters(); 498 499 setTargetDAGCombine(ISD::SETCC); 500 if (Subtarget.hasStdExtZbp()) { 501 setTargetDAGCombine(ISD::OR); 502 } 503 } 504 505 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 506 LLVMContext &Context, 507 EVT VT) const { 508 if (!VT.isVector()) 509 return getPointerTy(DL); 510 if (Subtarget.hasStdExtV() && VT.isScalableVector()) 511 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 512 return VT.changeVectorElementTypeToInteger(); 513 } 514 515 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 516 const CallInst &I, 517 MachineFunction &MF, 518 unsigned Intrinsic) const { 519 switch (Intrinsic) { 520 default: 521 return false; 522 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 523 case Intrinsic::riscv_masked_atomicrmw_add_i32: 524 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 525 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 526 case Intrinsic::riscv_masked_atomicrmw_max_i32: 527 case Intrinsic::riscv_masked_atomicrmw_min_i32: 528 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 529 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 530 case Intrinsic::riscv_masked_cmpxchg_i32: 531 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 532 Info.opc = ISD::INTRINSIC_W_CHAIN; 533 Info.memVT = MVT::getVT(PtrTy->getElementType()); 534 Info.ptrVal = I.getArgOperand(0); 535 Info.offset = 0; 536 Info.align = Align(4); 537 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 538 MachineMemOperand::MOVolatile; 539 return true; 540 } 541 } 542 543 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 544 const AddrMode &AM, Type *Ty, 545 unsigned AS, 546 Instruction *I) const { 547 // No global is ever allowed as a base. 548 if (AM.BaseGV) 549 return false; 550 551 // Require a 12-bit signed offset. 552 if (!isInt<12>(AM.BaseOffs)) 553 return false; 554 555 switch (AM.Scale) { 556 case 0: // "r+i" or just "i", depending on HasBaseReg. 557 break; 558 case 1: 559 if (!AM.HasBaseReg) // allow "r+i". 560 break; 561 return false; // disallow "r+r" or "r+r+i". 562 default: 563 return false; 564 } 565 566 return true; 567 } 568 569 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 570 return isInt<12>(Imm); 571 } 572 573 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 574 return isInt<12>(Imm); 575 } 576 577 // On RV32, 64-bit integers are split into their high and low parts and held 578 // in two different registers, so the trunc is free since the low register can 579 // just be used. 580 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 581 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 582 return false; 583 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 584 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 585 return (SrcBits == 64 && DestBits == 32); 586 } 587 588 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 589 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 590 !SrcVT.isInteger() || !DstVT.isInteger()) 591 return false; 592 unsigned SrcBits = SrcVT.getSizeInBits(); 593 unsigned DestBits = DstVT.getSizeInBits(); 594 return (SrcBits == 64 && DestBits == 32); 595 } 596 597 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 598 // Zexts are free if they can be combined with a load. 599 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 600 EVT MemVT = LD->getMemoryVT(); 601 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 602 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 603 (LD->getExtensionType() == ISD::NON_EXTLOAD || 604 LD->getExtensionType() == ISD::ZEXTLOAD)) 605 return true; 606 } 607 608 return TargetLowering::isZExtFree(Val, VT2); 609 } 610 611 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 612 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 613 } 614 615 bool RISCVTargetLowering::isCheapToSpeculateCttz() const { 616 return Subtarget.hasStdExtZbb(); 617 } 618 619 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { 620 return Subtarget.hasStdExtZbb(); 621 } 622 623 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 624 bool ForCodeSize) const { 625 if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) 626 return false; 627 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 628 return false; 629 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 630 return false; 631 if (Imm.isNegZero()) 632 return false; 633 return Imm.isZero(); 634 } 635 636 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 637 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || 638 (VT == MVT::f32 && Subtarget.hasStdExtF()) || 639 (VT == MVT::f64 && Subtarget.hasStdExtD()); 640 } 641 642 // Changes the condition code and swaps operands if necessary, so the SetCC 643 // operation matches one of the comparisons supported directly in the RISC-V 644 // ISA. 645 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 646 switch (CC) { 647 default: 648 break; 649 case ISD::SETGT: 650 case ISD::SETLE: 651 case ISD::SETUGT: 652 case ISD::SETULE: 653 CC = ISD::getSetCCSwappedOperands(CC); 654 std::swap(LHS, RHS); 655 break; 656 } 657 } 658 659 // Return the RISC-V branch opcode that matches the given DAG integer 660 // condition code. The CondCode must be one of those supported by the RISC-V 661 // ISA (see normaliseSetCC). 662 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 663 switch (CC) { 664 default: 665 llvm_unreachable("Unsupported CondCode"); 666 case ISD::SETEQ: 667 return RISCV::BEQ; 668 case ISD::SETNE: 669 return RISCV::BNE; 670 case ISD::SETLT: 671 return RISCV::BLT; 672 case ISD::SETGE: 673 return RISCV::BGE; 674 case ISD::SETULT: 675 return RISCV::BLTU; 676 case ISD::SETUGE: 677 return RISCV::BGEU; 678 } 679 } 680 681 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 682 SelectionDAG &DAG) const { 683 switch (Op.getOpcode()) { 684 default: 685 report_fatal_error("unimplemented operand"); 686 case ISD::GlobalAddress: 687 return lowerGlobalAddress(Op, DAG); 688 case ISD::BlockAddress: 689 return lowerBlockAddress(Op, DAG); 690 case ISD::ConstantPool: 691 return lowerConstantPool(Op, DAG); 692 case ISD::JumpTable: 693 return lowerJumpTable(Op, DAG); 694 case ISD::GlobalTLSAddress: 695 return lowerGlobalTLSAddress(Op, DAG); 696 case ISD::SELECT: 697 return lowerSELECT(Op, DAG); 698 case ISD::VASTART: 699 return lowerVASTART(Op, DAG); 700 case ISD::FRAMEADDR: 701 return lowerFRAMEADDR(Op, DAG); 702 case ISD::RETURNADDR: 703 return lowerRETURNADDR(Op, DAG); 704 case ISD::SHL_PARTS: 705 return lowerShiftLeftParts(Op, DAG); 706 case ISD::SRA_PARTS: 707 return lowerShiftRightParts(Op, DAG, true); 708 case ISD::SRL_PARTS: 709 return lowerShiftRightParts(Op, DAG, false); 710 case ISD::BITCAST: { 711 assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) || 712 Subtarget.hasStdExtZfh()) && 713 "Unexpected custom legalisation"); 714 SDLoc DL(Op); 715 SDValue Op0 = Op.getOperand(0); 716 if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) { 717 if (Op0.getValueType() != MVT::i16) 718 return SDValue(); 719 SDValue NewOp0 = 720 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0); 721 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 722 return FPConv; 723 } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && 724 Subtarget.hasStdExtF()) { 725 if (Op0.getValueType() != MVT::i32) 726 return SDValue(); 727 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 728 SDValue FPConv = 729 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 730 return FPConv; 731 } 732 return SDValue(); 733 } 734 case ISD::INTRINSIC_WO_CHAIN: 735 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 736 case ISD::INTRINSIC_W_CHAIN: 737 return LowerINTRINSIC_W_CHAIN(Op, DAG); 738 case ISD::BSWAP: 739 case ISD::BITREVERSE: { 740 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. 741 assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 742 MVT VT = Op.getSimpleValueType(); 743 SDLoc DL(Op); 744 // Start with the maximum immediate value which is the bitwidth - 1. 745 unsigned Imm = VT.getSizeInBits() - 1; 746 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. 747 if (Op.getOpcode() == ISD::BSWAP) 748 Imm &= ~0x7U; 749 return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0), 750 DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT())); 751 } 752 case ISD::FSHL: 753 case ISD::FSHR: { 754 MVT VT = Op.getSimpleValueType(); 755 assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization"); 756 SDLoc DL(Op); 757 // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only 758 // use log(XLen) bits. Mask the shift amount accordingly. 759 unsigned ShAmtWidth = Subtarget.getXLen() - 1; 760 SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2), 761 DAG.getConstant(ShAmtWidth, DL, VT)); 762 unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR; 763 return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt); 764 } 765 case ISD::TRUNCATE: { 766 SDLoc DL(Op); 767 EVT VT = Op.getValueType(); 768 // Only custom-lower vector truncates 769 if (!VT.isVector()) 770 return Op; 771 772 // Truncates to mask types are handled differently 773 if (VT.getVectorElementType() == MVT::i1) 774 return lowerVectorMaskTrunc(Op, DAG); 775 776 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 777 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR" nodes which 778 // truncate by one power of two at a time. 779 EVT DstEltVT = VT.getVectorElementType(); 780 781 SDValue Src = Op.getOperand(0); 782 EVT SrcVT = Src.getValueType(); 783 EVT SrcEltVT = SrcVT.getVectorElementType(); 784 785 assert(DstEltVT.bitsLT(SrcEltVT) && 786 isPowerOf2_64(DstEltVT.getSizeInBits()) && 787 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 788 "Unexpected vector truncate lowering"); 789 790 SDValue Result = Src; 791 LLVMContext &Context = *DAG.getContext(); 792 const ElementCount Count = SrcVT.getVectorElementCount(); 793 do { 794 SrcEltVT = EVT::getIntegerVT(Context, SrcEltVT.getSizeInBits() / 2); 795 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 796 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR, DL, ResultVT, Result); 797 } while (SrcEltVT != DstEltVT); 798 799 return Result; 800 } 801 case ISD::ANY_EXTEND: 802 case ISD::ZERO_EXTEND: 803 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 804 case ISD::SIGN_EXTEND: 805 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 806 case ISD::SPLAT_VECTOR: 807 return lowerSPLATVECTOR(Op, DAG); 808 case ISD::INSERT_VECTOR_ELT: 809 return lowerINSERT_VECTOR_ELT(Op, DAG); 810 case ISD::EXTRACT_VECTOR_ELT: 811 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 812 case ISD::VSCALE: { 813 MVT VT = Op.getSimpleValueType(); 814 SDLoc DL(Op); 815 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); 816 // We define our scalable vector types for lmul=1 to use a 64 bit known 817 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 818 // vscale as VLENB / 8. 819 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, 820 DAG.getConstant(3, DL, VT)); 821 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); 822 } 823 case ISD::FP_EXTEND: { 824 // RVV can only do fp_extend to types double the size as the source. We 825 // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going 826 // via f32. 827 MVT VT = Op.getSimpleValueType(); 828 MVT SrcVT = Op.getOperand(0).getSimpleValueType(); 829 // We only need to close the gap between vXf16->vXf64. 830 if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 || 831 SrcVT.getVectorElementType() != MVT::f16) 832 return Op; 833 SDLoc DL(Op); 834 MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 835 SDValue IntermediateRound = 836 DAG.getFPExtendOrRound(Op.getOperand(0), DL, InterVT); 837 return DAG.getFPExtendOrRound(IntermediateRound, DL, VT); 838 } 839 case ISD::FP_ROUND: { 840 // RVV can only do fp_round to types half the size as the source. We 841 // custom-lower f64->f16 rounds via RVV's round-to-odd float 842 // conversion instruction. 843 MVT VT = Op.getSimpleValueType(); 844 MVT SrcVT = Op.getOperand(0).getSimpleValueType(); 845 // We only need to close the gap between vXf64<->vXf16. 846 if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || 847 SrcVT.getVectorElementType() != MVT::f64) 848 return Op; 849 SDLoc DL(Op); 850 MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 851 SDValue IntermediateRound = 852 DAG.getNode(RISCVISD::VFNCVT_ROD, DL, InterVT, Op.getOperand(0)); 853 return DAG.getFPExtendOrRound(IntermediateRound, DL, VT); 854 } 855 case ISD::FP_TO_SINT: 856 case ISD::FP_TO_UINT: 857 case ISD::SINT_TO_FP: 858 case ISD::UINT_TO_FP: { 859 // RVV can only do fp<->int conversions to types half/double the size as 860 // the source. We custom-lower any conversions that do two hops into 861 // sequences. 862 MVT VT = Op.getSimpleValueType(); 863 if (!VT.isVector()) 864 return Op; 865 SDLoc DL(Op); 866 SDValue Src = Op.getOperand(0); 867 MVT EltVT = VT.getVectorElementType(); 868 MVT SrcEltVT = Src.getSimpleValueType().getVectorElementType(); 869 unsigned EltSize = EltVT.getSizeInBits(); 870 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 871 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 872 "Unexpected vector element types"); 873 bool IsInt2FP = SrcEltVT.isInteger(); 874 // Widening conversions 875 if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) { 876 if (IsInt2FP) { 877 // Do a regular integer sign/zero extension then convert to float. 878 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()), 879 VT.getVectorElementCount()); 880 unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP 881 ? ISD::ZERO_EXTEND 882 : ISD::SIGN_EXTEND; 883 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 884 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 885 } 886 // FP2Int 887 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 888 // Do one doubling fp_extend then complete the operation by converting 889 // to int. 890 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 891 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 892 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 893 } 894 895 // Narrowing conversions 896 if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) { 897 if (IsInt2FP) { 898 // One narrowing int_to_fp, then an fp_round. 899 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 900 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 901 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 902 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 903 } 904 // FP2Int 905 // One narrowing fp_to_int, then truncate the integer. If the float isn't 906 // representable by the integer, the result is poison. 907 MVT IVecVT = 908 MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2), 909 VT.getVectorElementCount()); 910 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 911 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 912 } 913 914 return Op; 915 } 916 case ISD::VECREDUCE_ADD: 917 case ISD::VECREDUCE_UMAX: 918 case ISD::VECREDUCE_SMAX: 919 case ISD::VECREDUCE_UMIN: 920 case ISD::VECREDUCE_SMIN: 921 case ISD::VECREDUCE_AND: 922 case ISD::VECREDUCE_OR: 923 case ISD::VECREDUCE_XOR: 924 return lowerVECREDUCE(Op, DAG); 925 } 926 } 927 928 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 929 SelectionDAG &DAG, unsigned Flags) { 930 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 931 } 932 933 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 934 SelectionDAG &DAG, unsigned Flags) { 935 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 936 Flags); 937 } 938 939 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 940 SelectionDAG &DAG, unsigned Flags) { 941 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 942 N->getOffset(), Flags); 943 } 944 945 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 946 SelectionDAG &DAG, unsigned Flags) { 947 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 948 } 949 950 template <class NodeTy> 951 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 952 bool IsLocal) const { 953 SDLoc DL(N); 954 EVT Ty = getPointerTy(DAG.getDataLayout()); 955 956 if (isPositionIndependent()) { 957 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 958 if (IsLocal) 959 // Use PC-relative addressing to access the symbol. This generates the 960 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 961 // %pcrel_lo(auipc)). 962 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 963 964 // Use PC-relative addressing to access the GOT for this symbol, then load 965 // the address from the GOT. This generates the pattern (PseudoLA sym), 966 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 967 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 968 } 969 970 switch (getTargetMachine().getCodeModel()) { 971 default: 972 report_fatal_error("Unsupported code model for lowering"); 973 case CodeModel::Small: { 974 // Generate a sequence for accessing addresses within the first 2 GiB of 975 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 976 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 977 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 978 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 979 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 980 } 981 case CodeModel::Medium: { 982 // Generate a sequence for accessing addresses within any 2GiB range within 983 // the address space. This generates the pattern (PseudoLLA sym), which 984 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 985 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 986 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 987 } 988 } 989 } 990 991 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 992 SelectionDAG &DAG) const { 993 SDLoc DL(Op); 994 EVT Ty = Op.getValueType(); 995 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 996 int64_t Offset = N->getOffset(); 997 MVT XLenVT = Subtarget.getXLenVT(); 998 999 const GlobalValue *GV = N->getGlobal(); 1000 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 1001 SDValue Addr = getAddr(N, DAG, IsLocal); 1002 1003 // In order to maximise the opportunity for common subexpression elimination, 1004 // emit a separate ADD node for the global address offset instead of folding 1005 // it in the global address node. Later peephole optimisations may choose to 1006 // fold it back in when profitable. 1007 if (Offset != 0) 1008 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1009 DAG.getConstant(Offset, DL, XLenVT)); 1010 return Addr; 1011 } 1012 1013 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 1014 SelectionDAG &DAG) const { 1015 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 1016 1017 return getAddr(N, DAG); 1018 } 1019 1020 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 1021 SelectionDAG &DAG) const { 1022 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 1023 1024 return getAddr(N, DAG); 1025 } 1026 1027 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 1028 SelectionDAG &DAG) const { 1029 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 1030 1031 return getAddr(N, DAG); 1032 } 1033 1034 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 1035 SelectionDAG &DAG, 1036 bool UseGOT) const { 1037 SDLoc DL(N); 1038 EVT Ty = getPointerTy(DAG.getDataLayout()); 1039 const GlobalValue *GV = N->getGlobal(); 1040 MVT XLenVT = Subtarget.getXLenVT(); 1041 1042 if (UseGOT) { 1043 // Use PC-relative addressing to access the GOT for this TLS symbol, then 1044 // load the address from the GOT and add the thread pointer. This generates 1045 // the pattern (PseudoLA_TLS_IE sym), which expands to 1046 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 1047 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1048 SDValue Load = 1049 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 1050 1051 // Add the thread pointer. 1052 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1053 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 1054 } 1055 1056 // Generate a sequence for accessing the address relative to the thread 1057 // pointer, with the appropriate adjustment for the thread pointer offset. 1058 // This generates the pattern 1059 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 1060 SDValue AddrHi = 1061 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 1062 SDValue AddrAdd = 1063 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 1064 SDValue AddrLo = 1065 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 1066 1067 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1068 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1069 SDValue MNAdd = SDValue( 1070 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 1071 0); 1072 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 1073 } 1074 1075 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 1076 SelectionDAG &DAG) const { 1077 SDLoc DL(N); 1078 EVT Ty = getPointerTy(DAG.getDataLayout()); 1079 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 1080 const GlobalValue *GV = N->getGlobal(); 1081 1082 // Use a PC-relative addressing mode to access the global dynamic GOT address. 1083 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 1084 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 1085 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1086 SDValue Load = 1087 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 1088 1089 // Prepare argument list to generate call. 1090 ArgListTy Args; 1091 ArgListEntry Entry; 1092 Entry.Node = Load; 1093 Entry.Ty = CallTy; 1094 Args.push_back(Entry); 1095 1096 // Setup call to __tls_get_addr. 1097 TargetLowering::CallLoweringInfo CLI(DAG); 1098 CLI.setDebugLoc(DL) 1099 .setChain(DAG.getEntryNode()) 1100 .setLibCallee(CallingConv::C, CallTy, 1101 DAG.getExternalSymbol("__tls_get_addr", Ty), 1102 std::move(Args)); 1103 1104 return LowerCallTo(CLI).first; 1105 } 1106 1107 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 1108 SelectionDAG &DAG) const { 1109 SDLoc DL(Op); 1110 EVT Ty = Op.getValueType(); 1111 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1112 int64_t Offset = N->getOffset(); 1113 MVT XLenVT = Subtarget.getXLenVT(); 1114 1115 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 1116 1117 if (DAG.getMachineFunction().getFunction().getCallingConv() == 1118 CallingConv::GHC) 1119 report_fatal_error("In GHC calling convention TLS is not supported"); 1120 1121 SDValue Addr; 1122 switch (Model) { 1123 case TLSModel::LocalExec: 1124 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 1125 break; 1126 case TLSModel::InitialExec: 1127 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 1128 break; 1129 case TLSModel::LocalDynamic: 1130 case TLSModel::GeneralDynamic: 1131 Addr = getDynamicTLSAddr(N, DAG); 1132 break; 1133 } 1134 1135 // In order to maximise the opportunity for common subexpression elimination, 1136 // emit a separate ADD node for the global address offset instead of folding 1137 // it in the global address node. Later peephole optimisations may choose to 1138 // fold it back in when profitable. 1139 if (Offset != 0) 1140 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1141 DAG.getConstant(Offset, DL, XLenVT)); 1142 return Addr; 1143 } 1144 1145 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 1146 SDValue CondV = Op.getOperand(0); 1147 SDValue TrueV = Op.getOperand(1); 1148 SDValue FalseV = Op.getOperand(2); 1149 SDLoc DL(Op); 1150 MVT XLenVT = Subtarget.getXLenVT(); 1151 1152 // If the result type is XLenVT and CondV is the output of a SETCC node 1153 // which also operated on XLenVT inputs, then merge the SETCC node into the 1154 // lowered RISCVISD::SELECT_CC to take advantage of the integer 1155 // compare+branch instructions. i.e.: 1156 // (select (setcc lhs, rhs, cc), truev, falsev) 1157 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 1158 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 1159 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 1160 SDValue LHS = CondV.getOperand(0); 1161 SDValue RHS = CondV.getOperand(1); 1162 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 1163 ISD::CondCode CCVal = CC->get(); 1164 1165 normaliseSetCC(LHS, RHS, CCVal); 1166 1167 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 1168 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 1169 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1170 } 1171 1172 // Otherwise: 1173 // (select condv, truev, falsev) 1174 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 1175 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 1176 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 1177 1178 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 1179 1180 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1181 } 1182 1183 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 1184 MachineFunction &MF = DAG.getMachineFunction(); 1185 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 1186 1187 SDLoc DL(Op); 1188 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1189 getPointerTy(MF.getDataLayout())); 1190 1191 // vastart just stores the address of the VarArgsFrameIndex slot into the 1192 // memory location argument. 1193 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1194 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 1195 MachinePointerInfo(SV)); 1196 } 1197 1198 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 1199 SelectionDAG &DAG) const { 1200 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1201 MachineFunction &MF = DAG.getMachineFunction(); 1202 MachineFrameInfo &MFI = MF.getFrameInfo(); 1203 MFI.setFrameAddressIsTaken(true); 1204 Register FrameReg = RI.getFrameRegister(MF); 1205 int XLenInBytes = Subtarget.getXLen() / 8; 1206 1207 EVT VT = Op.getValueType(); 1208 SDLoc DL(Op); 1209 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 1210 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1211 while (Depth--) { 1212 int Offset = -(XLenInBytes * 2); 1213 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 1214 DAG.getIntPtrConstant(Offset, DL)); 1215 FrameAddr = 1216 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 1217 } 1218 return FrameAddr; 1219 } 1220 1221 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 1222 SelectionDAG &DAG) const { 1223 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1224 MachineFunction &MF = DAG.getMachineFunction(); 1225 MachineFrameInfo &MFI = MF.getFrameInfo(); 1226 MFI.setReturnAddressIsTaken(true); 1227 MVT XLenVT = Subtarget.getXLenVT(); 1228 int XLenInBytes = Subtarget.getXLen() / 8; 1229 1230 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 1231 return SDValue(); 1232 1233 EVT VT = Op.getValueType(); 1234 SDLoc DL(Op); 1235 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1236 if (Depth) { 1237 int Off = -XLenInBytes; 1238 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 1239 SDValue Offset = DAG.getConstant(Off, DL, VT); 1240 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 1241 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 1242 MachinePointerInfo()); 1243 } 1244 1245 // Return the value of the return address register, marking it an implicit 1246 // live-in. 1247 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 1248 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 1249 } 1250 1251 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 1252 SelectionDAG &DAG) const { 1253 SDLoc DL(Op); 1254 SDValue Lo = Op.getOperand(0); 1255 SDValue Hi = Op.getOperand(1); 1256 SDValue Shamt = Op.getOperand(2); 1257 EVT VT = Lo.getValueType(); 1258 1259 // if Shamt-XLEN < 0: // Shamt < XLEN 1260 // Lo = Lo << Shamt 1261 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 1262 // else: 1263 // Lo = 0 1264 // Hi = Lo << (Shamt-XLEN) 1265 1266 SDValue Zero = DAG.getConstant(0, DL, VT); 1267 SDValue One = DAG.getConstant(1, DL, VT); 1268 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1269 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1270 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1271 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1272 1273 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 1274 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 1275 SDValue ShiftRightLo = 1276 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 1277 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 1278 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 1279 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 1280 1281 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1282 1283 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 1284 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1285 1286 SDValue Parts[2] = {Lo, Hi}; 1287 return DAG.getMergeValues(Parts, DL); 1288 } 1289 1290 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 1291 bool IsSRA) const { 1292 SDLoc DL(Op); 1293 SDValue Lo = Op.getOperand(0); 1294 SDValue Hi = Op.getOperand(1); 1295 SDValue Shamt = Op.getOperand(2); 1296 EVT VT = Lo.getValueType(); 1297 1298 // SRA expansion: 1299 // if Shamt-XLEN < 0: // Shamt < XLEN 1300 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1301 // Hi = Hi >>s Shamt 1302 // else: 1303 // Lo = Hi >>s (Shamt-XLEN); 1304 // Hi = Hi >>s (XLEN-1) 1305 // 1306 // SRL expansion: 1307 // if Shamt-XLEN < 0: // Shamt < XLEN 1308 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1309 // Hi = Hi >>u Shamt 1310 // else: 1311 // Lo = Hi >>u (Shamt-XLEN); 1312 // Hi = 0; 1313 1314 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 1315 1316 SDValue Zero = DAG.getConstant(0, DL, VT); 1317 SDValue One = DAG.getConstant(1, DL, VT); 1318 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1319 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1320 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1321 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1322 1323 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 1324 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 1325 SDValue ShiftLeftHi = 1326 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 1327 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 1328 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 1329 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 1330 SDValue HiFalse = 1331 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 1332 1333 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1334 1335 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 1336 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1337 1338 SDValue Parts[2] = {Lo, Hi}; 1339 return DAG.getMergeValues(Parts, DL); 1340 } 1341 1342 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is 1343 // illegal (currently only vXi64 RV32). 1344 // FIXME: We could also catch non-constant sign-extended i32 values and lower 1345 // them to SPLAT_VECTOR_I64 1346 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op, 1347 SelectionDAG &DAG) const { 1348 SDLoc DL(Op); 1349 EVT VecVT = Op.getValueType(); 1350 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 1351 "Unexpected SPLAT_VECTOR lowering"); 1352 SDValue SplatVal = Op.getOperand(0); 1353 1354 // If we can prove that the value is a sign-extended 32-bit value, lower this 1355 // as a custom node in order to try and match RVV vector/scalar instructions. 1356 if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) { 1357 if (isInt<32>(CVal->getSExtValue())) 1358 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1359 DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32)); 1360 } 1361 1362 if (SplatVal.getOpcode() == ISD::SIGN_EXTEND && 1363 SplatVal.getOperand(0).getValueType() == MVT::i32) { 1364 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1365 SplatVal.getOperand(0)); 1366 } 1367 1368 // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not 1369 // to accidentally sign-extend the 32-bit halves to the e64 SEW: 1370 // vmv.v.x vX, hi 1371 // vsll.vx vX, vX, /*32*/ 1372 // vmv.v.x vY, lo 1373 // vsll.vx vY, vY, /*32*/ 1374 // vsrl.vx vY, vY, /*32*/ 1375 // vor.vv vX, vX, vY 1376 SDValue One = DAG.getConstant(1, DL, MVT::i32); 1377 SDValue Zero = DAG.getConstant(0, DL, MVT::i32); 1378 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT); 1379 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero); 1380 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One); 1381 1382 Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 1383 Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV); 1384 Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV); 1385 1386 if (isNullConstant(Hi)) 1387 return Lo; 1388 1389 Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi); 1390 Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV); 1391 1392 return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi); 1393 } 1394 1395 // Custom-lower extensions from mask vectors by using a vselect either with 1 1396 // for zero/any-extension or -1 for sign-extension: 1397 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 1398 // Note that any-extension is lowered identically to zero-extension. 1399 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 1400 int64_t ExtTrueVal) const { 1401 SDLoc DL(Op); 1402 EVT VecVT = Op.getValueType(); 1403 SDValue Src = Op.getOperand(0); 1404 // Only custom-lower extensions from mask types 1405 if (!Src.getValueType().isVector() || 1406 Src.getValueType().getVectorElementType() != MVT::i1) 1407 return Op; 1408 1409 // Be careful not to introduce illegal scalar types at this stage, and be 1410 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 1411 // illegal and must be expanded. Since we know that the constants are 1412 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 1413 bool IsRV32E64 = 1414 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 1415 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1416 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, Subtarget.getXLenVT()); 1417 1418 if (!IsRV32E64) { 1419 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 1420 SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal); 1421 } else { 1422 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 1423 SplatTrueVal = 1424 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal); 1425 } 1426 1427 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 1428 } 1429 1430 // Custom-lower truncations from vectors to mask vectors by using a mask and a 1431 // setcc operation: 1432 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 1433 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, 1434 SelectionDAG &DAG) const { 1435 SDLoc DL(Op); 1436 EVT MaskVT = Op.getValueType(); 1437 // Only expect to custom-lower truncations to mask types 1438 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 1439 "Unexpected type for vector mask lowering"); 1440 SDValue Src = Op.getOperand(0); 1441 EVT VecVT = Src.getValueType(); 1442 1443 // Be careful not to introduce illegal scalar types at this stage, and be 1444 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 1445 // illegal and must be expanded. Since we know that the constants are 1446 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 1447 bool IsRV32E64 = 1448 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 1449 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 1450 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1451 1452 if (!IsRV32E64) { 1453 SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne); 1454 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 1455 } else { 1456 SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne); 1457 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 1458 } 1459 1460 SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); 1461 1462 return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); 1463 } 1464 1465 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 1466 SelectionDAG &DAG) const { 1467 SDLoc DL(Op); 1468 EVT VecVT = Op.getValueType(); 1469 SDValue Vec = Op.getOperand(0); 1470 SDValue Val = Op.getOperand(1); 1471 SDValue Idx = Op.getOperand(2); 1472 1473 // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is 1474 // first slid down into position, the value is inserted into the first 1475 // position, and the vector is slid back up. We do this to simplify patterns. 1476 // (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx), 1477 if (Subtarget.is64Bit() || VecVT.getVectorElementType() != MVT::i64) { 1478 if (isNullConstant(Idx)) 1479 return Op; 1480 SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, 1481 DAG.getUNDEF(VecVT), Vec, Idx); 1482 SDValue InsertElt0 = 1483 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, Slidedown, Val, 1484 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 1485 1486 return DAG.getNode(RISCVISD::VSLIDEUP, DL, VecVT, Vec, InsertElt0, Idx); 1487 } 1488 1489 // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type 1490 // is illegal (currently only vXi64 RV32). 1491 // Since there is no easy way of getting a single element into a vector when 1492 // XLEN<SEW, we lower the operation to the following sequence: 1493 // splat vVal, rVal 1494 // vid.v vVid 1495 // vmseq.vx mMask, vVid, rIdx 1496 // vmerge.vvm vDest, vSrc, vVal, mMask 1497 // This essentially merges the original vector with the inserted element by 1498 // using a mask whose only set bit is that corresponding to the insert 1499 // index. 1500 SDValue SplattedVal = DAG.getSplatVector(VecVT, DL, Val); 1501 SDValue SplattedIdx = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Idx); 1502 1503 SDValue VID = DAG.getNode(RISCVISD::VID, DL, VecVT); 1504 auto SetCCVT = 1505 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VecVT); 1506 SDValue Mask = DAG.getSetCC(DL, SetCCVT, VID, SplattedIdx, ISD::SETEQ); 1507 1508 return DAG.getNode(ISD::VSELECT, DL, VecVT, Mask, SplattedVal, Vec); 1509 } 1510 1511 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 1512 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer 1513 // types this is done using VMV_X_S to allow us to glean information about the 1514 // sign bits of the result. 1515 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 1516 SelectionDAG &DAG) const { 1517 SDLoc DL(Op); 1518 SDValue Idx = Op.getOperand(1); 1519 SDValue Vec = Op.getOperand(0); 1520 EVT EltVT = Op.getValueType(); 1521 EVT VecVT = Vec.getValueType(); 1522 MVT XLenVT = Subtarget.getXLenVT(); 1523 1524 // If the index is 0, the vector is already in the right position. 1525 if (!isNullConstant(Idx)) { 1526 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, DAG.getUNDEF(VecVT), Vec, 1527 Idx); 1528 } 1529 1530 if (!EltVT.isInteger()) { 1531 // Floating-point extracts are handled in TableGen. 1532 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 1533 DAG.getConstant(0, DL, XLenVT)); 1534 } 1535 1536 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 1537 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 1538 } 1539 1540 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 1541 SelectionDAG &DAG) const { 1542 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1543 SDLoc DL(Op); 1544 1545 if (Subtarget.hasStdExtV()) { 1546 // Some RVV intrinsics may claim that they want an integer operand to be 1547 // extended. 1548 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 1549 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 1550 if (II->ExtendedOperand) { 1551 assert(II->ExtendedOperand < Op.getNumOperands()); 1552 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 1553 SDValue &ScalarOp = Operands[II->ExtendedOperand]; 1554 EVT OpVT = ScalarOp.getValueType(); 1555 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 1556 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 1557 // If the operand is a constant, sign extend to increase our chances 1558 // of being able to use a .vi instruction. ANY_EXTEND would become a 1559 // a zero extend and the simm5 check in isel would fail. 1560 // FIXME: Should we ignore the upper bits in isel instead? 1561 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 1562 : ISD::ANY_EXTEND; 1563 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 1564 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), 1565 Operands); 1566 } 1567 } 1568 } 1569 } 1570 1571 switch (IntNo) { 1572 default: 1573 return SDValue(); // Don't custom lower most intrinsics. 1574 case Intrinsic::thread_pointer: { 1575 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1576 return DAG.getRegister(RISCV::X4, PtrVT); 1577 } 1578 case Intrinsic::riscv_vmv_x_s: 1579 assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!"); 1580 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 1581 Op.getOperand(1)); 1582 } 1583 } 1584 1585 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 1586 SelectionDAG &DAG) const { 1587 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1588 SDLoc DL(Op); 1589 1590 if (Subtarget.hasStdExtV()) { 1591 // Some RVV intrinsics may claim that they want an integer operand to be 1592 // extended. 1593 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 1594 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 1595 if (II->ExtendedOperand) { 1596 // The operands start from the second argument in INTRINSIC_W_CHAIN. 1597 unsigned ExtendOp = II->ExtendedOperand + 1; 1598 assert(ExtendOp < Op.getNumOperands()); 1599 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 1600 SDValue &ScalarOp = Operands[ExtendOp]; 1601 EVT OpVT = ScalarOp.getValueType(); 1602 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 1603 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 1604 // If the operand is a constant, sign extend to increase our chances 1605 // of being able to use a .vi instruction. ANY_EXTEND would become a 1606 // a zero extend and the simm5 check in isel would fail. 1607 // FIXME: Should we ignore the upper bits in isel instead? 1608 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 1609 : ISD::ANY_EXTEND; 1610 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 1611 return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(), 1612 Operands); 1613 } 1614 } 1615 } 1616 } 1617 1618 switch (IntNo) { 1619 default: 1620 return SDValue(); // Don't custom lower most intrinsics. 1621 case Intrinsic::riscv_vleff: { 1622 SDLoc DL(Op); 1623 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue); 1624 SDValue Load = DAG.getNode(RISCVISD::VLEFF, DL, VTs, Op.getOperand(0), 1625 Op.getOperand(2), Op.getOperand(3)); 1626 SDValue ReadVL = 1627 SDValue(DAG.getMachineNode(RISCV::PseudoReadVL, DL, Op->getValueType(1), 1628 Load.getValue(2)), 1629 0); 1630 return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL); 1631 } 1632 case Intrinsic::riscv_vleff_mask: { 1633 SDLoc DL(Op); 1634 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue); 1635 SDValue Load = DAG.getNode(RISCVISD::VLEFF_MASK, DL, VTs, Op.getOperand(0), 1636 Op.getOperand(2), Op.getOperand(3), 1637 Op.getOperand(4), Op.getOperand(5)); 1638 SDValue ReadVL = 1639 SDValue(DAG.getMachineNode(RISCV::PseudoReadVL, DL, Op->getValueType(1), 1640 Load.getValue(2)), 1641 0); 1642 return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL); 1643 } 1644 } 1645 } 1646 1647 static std::pair<unsigned, uint64_t> 1648 getRVVReductionOpAndIdentityVal(unsigned ISDOpcode, unsigned EltSizeBits) { 1649 switch (ISDOpcode) { 1650 default: 1651 llvm_unreachable("Unhandled reduction"); 1652 case ISD::VECREDUCE_ADD: 1653 return {RISCVISD::VECREDUCE_ADD, 0}; 1654 case ISD::VECREDUCE_UMAX: 1655 return {RISCVISD::VECREDUCE_UMAX, 0}; 1656 case ISD::VECREDUCE_SMAX: 1657 return {RISCVISD::VECREDUCE_SMAX, minIntN(EltSizeBits)}; 1658 case ISD::VECREDUCE_UMIN: 1659 return {RISCVISD::VECREDUCE_UMIN, maxUIntN(EltSizeBits)}; 1660 case ISD::VECREDUCE_SMIN: 1661 return {RISCVISD::VECREDUCE_SMIN, maxIntN(EltSizeBits)}; 1662 case ISD::VECREDUCE_AND: 1663 return {RISCVISD::VECREDUCE_AND, -1}; 1664 case ISD::VECREDUCE_OR: 1665 return {RISCVISD::VECREDUCE_OR, 0}; 1666 case ISD::VECREDUCE_XOR: 1667 return {RISCVISD::VECREDUCE_XOR, 0}; 1668 } 1669 } 1670 1671 // Take a (supported) standard ISD reduction opcode and transform it to a RISCV 1672 // reduction opcode. Note that this returns a vector type, which must be 1673 // further processed to access the scalar result in element 0. 1674 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 1675 SelectionDAG &DAG) const { 1676 SDLoc DL(Op); 1677 assert(Op.getValueType().isSimple() && 1678 Op.getOperand(0).getValueType().isSimple() && 1679 "Unexpected vector-reduce lowering"); 1680 MVT VecEltVT = Op.getOperand(0).getSimpleValueType().getVectorElementType(); 1681 unsigned RVVOpcode; 1682 uint64_t IdentityVal; 1683 std::tie(RVVOpcode, IdentityVal) = 1684 getRVVReductionOpAndIdentityVal(Op.getOpcode(), VecEltVT.getSizeInBits()); 1685 // We have to perform a bit of a dance to get from our vector type to the 1686 // correct LMUL=1 vector type. We divide our minimum VLEN (64) by the vector 1687 // element type to find the type which fills a single register. Be careful to 1688 // use the operand's vector element type rather than the reduction's value 1689 // type, as that has likely been extended to XLEN. 1690 unsigned NumElts = 64 / VecEltVT.getSizeInBits(); 1691 MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts); 1692 SDValue IdentitySplat = 1693 DAG.getSplatVector(M1VT, DL, DAG.getConstant(IdentityVal, DL, VecEltVT)); 1694 SDValue Reduction = 1695 DAG.getNode(RVVOpcode, DL, M1VT, Op.getOperand(0), IdentitySplat); 1696 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 1697 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 1698 return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); 1699 } 1700 1701 // Returns the opcode of the target-specific SDNode that implements the 32-bit 1702 // form of the given Opcode. 1703 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 1704 switch (Opcode) { 1705 default: 1706 llvm_unreachable("Unexpected opcode"); 1707 case ISD::SHL: 1708 return RISCVISD::SLLW; 1709 case ISD::SRA: 1710 return RISCVISD::SRAW; 1711 case ISD::SRL: 1712 return RISCVISD::SRLW; 1713 case ISD::SDIV: 1714 return RISCVISD::DIVW; 1715 case ISD::UDIV: 1716 return RISCVISD::DIVUW; 1717 case ISD::UREM: 1718 return RISCVISD::REMUW; 1719 case ISD::ROTL: 1720 return RISCVISD::ROLW; 1721 case ISD::ROTR: 1722 return RISCVISD::RORW; 1723 case RISCVISD::GREVI: 1724 return RISCVISD::GREVIW; 1725 case RISCVISD::GORCI: 1726 return RISCVISD::GORCIW; 1727 } 1728 } 1729 1730 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 1731 // Because i32 isn't a legal type for RV64, these operations would otherwise 1732 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 1733 // later one because the fact the operation was originally of type i32 is 1734 // lost. 1735 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 1736 unsigned ExtOpc = ISD::ANY_EXTEND) { 1737 SDLoc DL(N); 1738 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 1739 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 1740 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 1741 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 1742 // ReplaceNodeResults requires we maintain the same type for the return value. 1743 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 1744 } 1745 1746 // Converts the given 32-bit operation to a i64 operation with signed extension 1747 // semantic to reduce the signed extension instructions. 1748 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 1749 SDLoc DL(N); 1750 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 1751 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 1752 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 1753 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 1754 DAG.getValueType(MVT::i32)); 1755 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 1756 } 1757 1758 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 1759 SmallVectorImpl<SDValue> &Results, 1760 SelectionDAG &DAG) const { 1761 SDLoc DL(N); 1762 switch (N->getOpcode()) { 1763 default: 1764 llvm_unreachable("Don't know how to custom type legalize this operation!"); 1765 case ISD::STRICT_FP_TO_SINT: 1766 case ISD::STRICT_FP_TO_UINT: 1767 case ISD::FP_TO_SINT: 1768 case ISD::FP_TO_UINT: { 1769 bool IsStrict = N->isStrictFPOpcode(); 1770 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1771 "Unexpected custom legalisation"); 1772 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 1773 // If the FP type needs to be softened, emit a library call using the 'si' 1774 // version. If we left it to default legalization we'd end up with 'di'. If 1775 // the FP type doesn't need to be softened just let generic type 1776 // legalization promote the result type. 1777 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 1778 TargetLowering::TypeSoftenFloat) 1779 return; 1780 RTLIB::Libcall LC; 1781 if (N->getOpcode() == ISD::FP_TO_SINT || 1782 N->getOpcode() == ISD::STRICT_FP_TO_SINT) 1783 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 1784 else 1785 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 1786 MakeLibCallOptions CallOptions; 1787 EVT OpVT = Op0.getValueType(); 1788 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 1789 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 1790 SDValue Result; 1791 std::tie(Result, Chain) = 1792 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 1793 Results.push_back(Result); 1794 if (IsStrict) 1795 Results.push_back(Chain); 1796 break; 1797 } 1798 case ISD::READCYCLECOUNTER: { 1799 assert(!Subtarget.is64Bit() && 1800 "READCYCLECOUNTER only has custom type legalization on riscv32"); 1801 1802 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 1803 SDValue RCW = 1804 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 1805 1806 Results.push_back( 1807 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 1808 Results.push_back(RCW.getValue(2)); 1809 break; 1810 } 1811 case ISD::ADD: 1812 case ISD::SUB: 1813 case ISD::MUL: 1814 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1815 "Unexpected custom legalisation"); 1816 if (N->getOperand(1).getOpcode() == ISD::Constant) 1817 return; 1818 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 1819 break; 1820 case ISD::SHL: 1821 case ISD::SRA: 1822 case ISD::SRL: 1823 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1824 "Unexpected custom legalisation"); 1825 if (N->getOperand(1).getOpcode() == ISD::Constant) 1826 return; 1827 Results.push_back(customLegalizeToWOp(N, DAG)); 1828 break; 1829 case ISD::ROTL: 1830 case ISD::ROTR: 1831 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1832 "Unexpected custom legalisation"); 1833 Results.push_back(customLegalizeToWOp(N, DAG)); 1834 break; 1835 case ISD::SDIV: 1836 case ISD::UDIV: 1837 case ISD::UREM: { 1838 MVT VT = N->getSimpleValueType(0); 1839 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 1840 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 1841 "Unexpected custom legalisation"); 1842 if (N->getOperand(0).getOpcode() == ISD::Constant || 1843 N->getOperand(1).getOpcode() == ISD::Constant) 1844 return; 1845 1846 // If the input is i32, use ANY_EXTEND since the W instructions don't read 1847 // the upper 32 bits. For other types we need to sign or zero extend 1848 // based on the opcode. 1849 unsigned ExtOpc = ISD::ANY_EXTEND; 1850 if (VT != MVT::i32) 1851 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 1852 : ISD::ZERO_EXTEND; 1853 1854 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 1855 break; 1856 } 1857 case ISD::BITCAST: { 1858 assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1859 Subtarget.hasStdExtF()) || 1860 (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) && 1861 "Unexpected custom legalisation"); 1862 SDValue Op0 = N->getOperand(0); 1863 if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) { 1864 if (Op0.getValueType() != MVT::f16) 1865 return; 1866 SDValue FPConv = 1867 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0); 1868 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 1869 } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1870 Subtarget.hasStdExtF()) { 1871 if (Op0.getValueType() != MVT::f32) 1872 return; 1873 SDValue FPConv = 1874 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 1875 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 1876 } 1877 break; 1878 } 1879 case RISCVISD::GREVI: 1880 case RISCVISD::GORCI: { 1881 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1882 "Unexpected custom legalisation"); 1883 // This is similar to customLegalizeToWOp, except that we pass the second 1884 // operand (a TargetConstant) straight through: it is already of type 1885 // XLenVT. 1886 SDLoc DL(N); 1887 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 1888 SDValue NewOp0 = 1889 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 1890 SDValue NewRes = 1891 DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1)); 1892 // ReplaceNodeResults requires we maintain the same type for the return 1893 // value. 1894 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 1895 break; 1896 } 1897 case ISD::BSWAP: 1898 case ISD::BITREVERSE: { 1899 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1900 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 1901 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 1902 N->getOperand(0)); 1903 unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24; 1904 SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0, 1905 DAG.getTargetConstant(Imm, DL, 1906 Subtarget.getXLenVT())); 1907 // ReplaceNodeResults requires we maintain the same type for the return 1908 // value. 1909 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW)); 1910 break; 1911 } 1912 case ISD::FSHL: 1913 case ISD::FSHR: { 1914 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1915 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); 1916 SDValue NewOp0 = 1917 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 1918 SDValue NewOp1 = 1919 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 1920 SDValue NewOp2 = 1921 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 1922 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. 1923 // Mask the shift amount to 5 bits. 1924 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, 1925 DAG.getConstant(0x1f, DL, MVT::i64)); 1926 unsigned Opc = 1927 N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; 1928 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); 1929 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); 1930 break; 1931 } 1932 case ISD::EXTRACT_VECTOR_ELT: { 1933 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 1934 // type is illegal (currently only vXi64 RV32). 1935 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 1936 // transferred to the destination register. We issue two of these from the 1937 // upper- and lower- halves of the SEW-bit vector element, slid down to the 1938 // first element. 1939 SDLoc DL(N); 1940 SDValue Vec = N->getOperand(0); 1941 SDValue Idx = N->getOperand(1); 1942 EVT VecVT = Vec.getValueType(); 1943 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 1944 VecVT.getVectorElementType() == MVT::i64 && 1945 "Unexpected EXTRACT_VECTOR_ELT legalization"); 1946 1947 SDValue Slidedown = Vec; 1948 // Unless the index is known to be 0, we must slide the vector down to get 1949 // the desired element into index 0. 1950 if (!isNullConstant(Idx)) 1951 Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, 1952 DAG.getUNDEF(VecVT), Vec, Idx); 1953 1954 MVT XLenVT = Subtarget.getXLenVT(); 1955 // Extract the lower XLEN bits of the correct vector element. 1956 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Slidedown, Idx); 1957 1958 // To extract the upper XLEN bits of the vector element, shift the first 1959 // element right by 32 bits and re-extract the lower XLEN bits. 1960 SDValue ThirtyTwoV = 1961 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1962 DAG.getConstant(32, DL, Subtarget.getXLenVT())); 1963 SDValue LShr32 = DAG.getNode(ISD::SRL, DL, VecVT, Slidedown, ThirtyTwoV); 1964 1965 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32, Idx); 1966 1967 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 1968 break; 1969 } 1970 case ISD::INTRINSIC_WO_CHAIN: { 1971 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 1972 switch (IntNo) { 1973 default: 1974 llvm_unreachable( 1975 "Don't know how to custom type legalize this intrinsic!"); 1976 case Intrinsic::riscv_vmv_x_s: { 1977 EVT VT = N->getValueType(0); 1978 assert((VT == MVT::i8 || VT == MVT::i16 || 1979 (Subtarget.is64Bit() && VT == MVT::i32)) && 1980 "Unexpected custom legalisation!"); 1981 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 1982 Subtarget.getXLenVT(), N->getOperand(1)); 1983 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 1984 break; 1985 } 1986 } 1987 break; 1988 } 1989 case ISD::VECREDUCE_ADD: 1990 case ISD::VECREDUCE_AND: 1991 case ISD::VECREDUCE_OR: 1992 case ISD::VECREDUCE_XOR: 1993 case ISD::VECREDUCE_SMAX: 1994 case ISD::VECREDUCE_UMAX: 1995 case ISD::VECREDUCE_SMIN: 1996 case ISD::VECREDUCE_UMIN: 1997 // The custom-lowering for these nodes returns a vector whose first element 1998 // is the result of the reduction. Extract its first element and let the 1999 // legalization for EXTRACT_VECTOR_ELT do the rest of the job. 2000 Results.push_back(lowerVECREDUCE(SDValue(N, 0), DAG)); 2001 break; 2002 } 2003 } 2004 2005 // A structure to hold one of the bit-manipulation patterns below. Together, a 2006 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source: 2007 // (or (and (shl x, 1), 0xAAAAAAAA), 2008 // (and (srl x, 1), 0x55555555)) 2009 struct RISCVBitmanipPat { 2010 SDValue Op; 2011 unsigned ShAmt; 2012 bool IsSHL; 2013 2014 bool formsPairWith(const RISCVBitmanipPat &Other) const { 2015 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; 2016 } 2017 }; 2018 2019 // Matches any of the following bit-manipulation patterns: 2020 // (and (shl x, 1), (0x55555555 << 1)) 2021 // (and (srl x, 1), 0x55555555) 2022 // (shl (and x, 0x55555555), 1) 2023 // (srl (and x, (0x55555555 << 1)), 1) 2024 // where the shift amount and mask may vary thus: 2025 // [1] = 0x55555555 / 0xAAAAAAAA 2026 // [2] = 0x33333333 / 0xCCCCCCCC 2027 // [4] = 0x0F0F0F0F / 0xF0F0F0F0 2028 // [8] = 0x00FF00FF / 0xFF00FF00 2029 // [16] = 0x0000FFFF / 0xFFFFFFFF 2030 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) 2031 static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) { 2032 Optional<uint64_t> Mask; 2033 // Optionally consume a mask around the shift operation. 2034 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { 2035 Mask = Op.getConstantOperandVal(1); 2036 Op = Op.getOperand(0); 2037 } 2038 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) 2039 return None; 2040 bool IsSHL = Op.getOpcode() == ISD::SHL; 2041 2042 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2043 return None; 2044 auto ShAmt = Op.getConstantOperandVal(1); 2045 2046 if (!isPowerOf2_64(ShAmt)) 2047 return None; 2048 2049 // These are the unshifted masks which we use to match bit-manipulation 2050 // patterns. They may be shifted left in certain circumstances. 2051 static const uint64_t BitmanipMasks[] = { 2052 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 2053 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL, 2054 }; 2055 2056 unsigned MaskIdx = Log2_64(ShAmt); 2057 if (MaskIdx >= array_lengthof(BitmanipMasks)) 2058 return None; 2059 2060 auto Src = Op.getOperand(0); 2061 2062 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 2063 auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 2064 2065 // The expected mask is shifted left when the AND is found around SHL 2066 // patterns. 2067 // ((x >> 1) & 0x55555555) 2068 // ((x << 1) & 0xAAAAAAAA) 2069 bool SHLExpMask = IsSHL; 2070 2071 if (!Mask) { 2072 // Sometimes LLVM keeps the mask as an operand of the shift, typically when 2073 // the mask is all ones: consume that now. 2074 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { 2075 Mask = Src.getConstantOperandVal(1); 2076 Src = Src.getOperand(0); 2077 // The expected mask is now in fact shifted left for SRL, so reverse the 2078 // decision. 2079 // ((x & 0xAAAAAAAA) >> 1) 2080 // ((x & 0x55555555) << 1) 2081 SHLExpMask = !SHLExpMask; 2082 } else { 2083 // Use a default shifted mask of all-ones if there's no AND, truncated 2084 // down to the expected width. This simplifies the logic later on. 2085 Mask = maskTrailingOnes<uint64_t>(Width); 2086 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); 2087 } 2088 } 2089 2090 if (SHLExpMask) 2091 ExpMask <<= ShAmt; 2092 2093 if (Mask != ExpMask) 2094 return None; 2095 2096 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; 2097 } 2098 2099 // Match the following pattern as a GREVI(W) operation 2100 // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) 2101 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, 2102 const RISCVSubtarget &Subtarget) { 2103 EVT VT = Op.getValueType(); 2104 2105 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 2106 auto LHS = matchRISCVBitmanipPat(Op.getOperand(0)); 2107 auto RHS = matchRISCVBitmanipPat(Op.getOperand(1)); 2108 if (LHS && RHS && LHS->formsPairWith(*RHS)) { 2109 SDLoc DL(Op); 2110 return DAG.getNode( 2111 RISCVISD::GREVI, DL, VT, LHS->Op, 2112 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 2113 } 2114 } 2115 return SDValue(); 2116 } 2117 2118 // Matches any the following pattern as a GORCI(W) operation 2119 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2 2120 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2 2121 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) 2122 // Note that with the variant of 3., 2123 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) 2124 // the inner pattern will first be matched as GREVI and then the outer 2125 // pattern will be matched to GORC via the first rule above. 2126 // 4. (or (rotl/rotr x, bitwidth/2), x) 2127 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, 2128 const RISCVSubtarget &Subtarget) { 2129 EVT VT = Op.getValueType(); 2130 2131 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 2132 SDLoc DL(Op); 2133 SDValue Op0 = Op.getOperand(0); 2134 SDValue Op1 = Op.getOperand(1); 2135 2136 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { 2137 if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X && 2138 isPowerOf2_32(Reverse.getConstantOperandVal(1))) 2139 return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1)); 2140 // We can also form GORCI from ROTL/ROTR by half the bitwidth. 2141 if ((Reverse.getOpcode() == ISD::ROTL || 2142 Reverse.getOpcode() == ISD::ROTR) && 2143 Reverse.getOperand(0) == X && 2144 isa<ConstantSDNode>(Reverse.getOperand(1))) { 2145 uint64_t RotAmt = Reverse.getConstantOperandVal(1); 2146 if (RotAmt == (VT.getSizeInBits() / 2)) 2147 return DAG.getNode( 2148 RISCVISD::GORCI, DL, VT, X, 2149 DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT())); 2150 } 2151 return SDValue(); 2152 }; 2153 2154 // Check for either commutable permutation of (or (GREVI x, shamt), x) 2155 if (SDValue V = MatchOROfReverse(Op0, Op1)) 2156 return V; 2157 if (SDValue V = MatchOROfReverse(Op1, Op0)) 2158 return V; 2159 2160 // OR is commutable so canonicalize its OR operand to the left 2161 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) 2162 std::swap(Op0, Op1); 2163 if (Op0.getOpcode() != ISD::OR) 2164 return SDValue(); 2165 SDValue OrOp0 = Op0.getOperand(0); 2166 SDValue OrOp1 = Op0.getOperand(1); 2167 auto LHS = matchRISCVBitmanipPat(OrOp0); 2168 // OR is commutable so swap the operands and try again: x might have been 2169 // on the left 2170 if (!LHS) { 2171 std::swap(OrOp0, OrOp1); 2172 LHS = matchRISCVBitmanipPat(OrOp0); 2173 } 2174 auto RHS = matchRISCVBitmanipPat(Op1); 2175 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { 2176 return DAG.getNode( 2177 RISCVISD::GORCI, DL, VT, LHS->Op, 2178 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 2179 } 2180 } 2181 return SDValue(); 2182 } 2183 2184 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is 2185 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. 2186 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does 2187 // not undo itself, but they are redundant. 2188 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { 2189 unsigned ShAmt1 = N->getConstantOperandVal(1); 2190 SDValue Src = N->getOperand(0); 2191 2192 if (Src.getOpcode() != N->getOpcode()) 2193 return SDValue(); 2194 2195 unsigned ShAmt2 = Src.getConstantOperandVal(1); 2196 Src = Src.getOperand(0); 2197 2198 unsigned CombinedShAmt; 2199 if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW) 2200 CombinedShAmt = ShAmt1 | ShAmt2; 2201 else 2202 CombinedShAmt = ShAmt1 ^ ShAmt2; 2203 2204 if (CombinedShAmt == 0) 2205 return Src; 2206 2207 SDLoc DL(N); 2208 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src, 2209 DAG.getTargetConstant(CombinedShAmt, DL, 2210 N->getOperand(1).getValueType())); 2211 } 2212 2213 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 2214 DAGCombinerInfo &DCI) const { 2215 SelectionDAG &DAG = DCI.DAG; 2216 2217 switch (N->getOpcode()) { 2218 default: 2219 break; 2220 case RISCVISD::SplitF64: { 2221 SDValue Op0 = N->getOperand(0); 2222 // If the input to SplitF64 is just BuildPairF64 then the operation is 2223 // redundant. Instead, use BuildPairF64's operands directly. 2224 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 2225 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 2226 2227 SDLoc DL(N); 2228 2229 // It's cheaper to materialise two 32-bit integers than to load a double 2230 // from the constant pool and transfer it to integer registers through the 2231 // stack. 2232 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 2233 APInt V = C->getValueAPF().bitcastToAPInt(); 2234 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 2235 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 2236 return DCI.CombineTo(N, Lo, Hi); 2237 } 2238 2239 // This is a target-specific version of a DAGCombine performed in 2240 // DAGCombiner::visitBITCAST. It performs the equivalent of: 2241 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 2242 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 2243 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 2244 !Op0.getNode()->hasOneUse()) 2245 break; 2246 SDValue NewSplitF64 = 2247 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 2248 Op0.getOperand(0)); 2249 SDValue Lo = NewSplitF64.getValue(0); 2250 SDValue Hi = NewSplitF64.getValue(1); 2251 APInt SignBit = APInt::getSignMask(32); 2252 if (Op0.getOpcode() == ISD::FNEG) { 2253 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 2254 DAG.getConstant(SignBit, DL, MVT::i32)); 2255 return DCI.CombineTo(N, Lo, NewHi); 2256 } 2257 assert(Op0.getOpcode() == ISD::FABS); 2258 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 2259 DAG.getConstant(~SignBit, DL, MVT::i32)); 2260 return DCI.CombineTo(N, Lo, NewHi); 2261 } 2262 case RISCVISD::SLLW: 2263 case RISCVISD::SRAW: 2264 case RISCVISD::SRLW: 2265 case RISCVISD::ROLW: 2266 case RISCVISD::RORW: { 2267 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 2268 SDValue LHS = N->getOperand(0); 2269 SDValue RHS = N->getOperand(1); 2270 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 2271 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 2272 if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) || 2273 SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) { 2274 if (N->getOpcode() != ISD::DELETED_NODE) 2275 DCI.AddToWorklist(N); 2276 return SDValue(N, 0); 2277 } 2278 break; 2279 } 2280 case RISCVISD::FSL: 2281 case RISCVISD::FSR: { 2282 // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read. 2283 SDValue ShAmt = N->getOperand(2); 2284 unsigned BitWidth = ShAmt.getValueSizeInBits(); 2285 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width"); 2286 APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1); 2287 if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 2288 if (N->getOpcode() != ISD::DELETED_NODE) 2289 DCI.AddToWorklist(N); 2290 return SDValue(N, 0); 2291 } 2292 break; 2293 } 2294 case RISCVISD::FSLW: 2295 case RISCVISD::FSRW: { 2296 // Only the lower 32 bits of Values and lower 6 bits of shift amount are 2297 // read. 2298 SDValue Op0 = N->getOperand(0); 2299 SDValue Op1 = N->getOperand(1); 2300 SDValue ShAmt = N->getOperand(2); 2301 APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 2302 APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6); 2303 if (SimplifyDemandedBits(Op0, OpMask, DCI) || 2304 SimplifyDemandedBits(Op1, OpMask, DCI) || 2305 SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 2306 if (N->getOpcode() != ISD::DELETED_NODE) 2307 DCI.AddToWorklist(N); 2308 return SDValue(N, 0); 2309 } 2310 break; 2311 } 2312 case RISCVISD::GREVIW: 2313 case RISCVISD::GORCIW: { 2314 // Only the lower 32 bits of the first operand are read 2315 SDValue Op0 = N->getOperand(0); 2316 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 2317 if (SimplifyDemandedBits(Op0, Mask, DCI)) { 2318 if (N->getOpcode() != ISD::DELETED_NODE) 2319 DCI.AddToWorklist(N); 2320 return SDValue(N, 0); 2321 } 2322 2323 return combineGREVI_GORCI(N, DCI.DAG); 2324 } 2325 case RISCVISD::FMV_X_ANYEXTW_RV64: { 2326 SDLoc DL(N); 2327 SDValue Op0 = N->getOperand(0); 2328 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 2329 // conversion is unnecessary and can be replaced with an ANY_EXTEND 2330 // of the FMV_W_X_RV64 operand. 2331 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 2332 assert(Op0.getOperand(0).getValueType() == MVT::i64 && 2333 "Unexpected value type!"); 2334 return Op0.getOperand(0); 2335 } 2336 2337 // This is a target-specific version of a DAGCombine performed in 2338 // DAGCombiner::visitBITCAST. It performs the equivalent of: 2339 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 2340 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 2341 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 2342 !Op0.getNode()->hasOneUse()) 2343 break; 2344 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 2345 Op0.getOperand(0)); 2346 APInt SignBit = APInt::getSignMask(32).sext(64); 2347 if (Op0.getOpcode() == ISD::FNEG) 2348 return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 2349 DAG.getConstant(SignBit, DL, MVT::i64)); 2350 2351 assert(Op0.getOpcode() == ISD::FABS); 2352 return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 2353 DAG.getConstant(~SignBit, DL, MVT::i64)); 2354 } 2355 case RISCVISD::GREVI: 2356 case RISCVISD::GORCI: 2357 return combineGREVI_GORCI(N, DCI.DAG); 2358 case ISD::OR: 2359 if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) 2360 return GREV; 2361 if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) 2362 return GORC; 2363 break; 2364 case RISCVISD::SELECT_CC: { 2365 // Transform 2366 // (select_cc (xor X, 1), 0, setne, trueV, falseV) -> 2367 // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. 2368 // This can occur when legalizing some floating point comparisons. 2369 SDValue LHS = N->getOperand(0); 2370 SDValue RHS = N->getOperand(1); 2371 auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2)); 2372 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 2373 if (ISD::isIntEqualitySetCC(CCVal) && isNullConstant(RHS) && 2374 LHS.getOpcode() == ISD::XOR && isOneConstant(LHS.getOperand(1)) && 2375 DAG.MaskedValueIsZero(LHS.getOperand(0), Mask)) { 2376 SDLoc DL(N); 2377 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 2378 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 2379 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), 2380 {LHS.getOperand(0), RHS, TargetCC, N->getOperand(3), 2381 N->getOperand(4)}); 2382 } 2383 break; 2384 } 2385 case ISD::SETCC: { 2386 // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1. 2387 // Comparing with 0 may allow us to fold into bnez/beqz. 2388 SDValue LHS = N->getOperand(0); 2389 SDValue RHS = N->getOperand(1); 2390 if (LHS.getValueType().isScalableVector()) 2391 break; 2392 auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2393 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 2394 if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) && 2395 DAG.MaskedValueIsZero(LHS, Mask)) { 2396 SDLoc DL(N); 2397 SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType()); 2398 CC = ISD::getSetCCInverse(CC, LHS.getValueType()); 2399 return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC); 2400 } 2401 break; 2402 } 2403 } 2404 2405 return SDValue(); 2406 } 2407 2408 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 2409 const SDNode *N, CombineLevel Level) const { 2410 // The following folds are only desirable if `(OP _, c1 << c2)` can be 2411 // materialised in fewer instructions than `(OP _, c1)`: 2412 // 2413 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 2414 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 2415 SDValue N0 = N->getOperand(0); 2416 EVT Ty = N0.getValueType(); 2417 if (Ty.isScalarInteger() && 2418 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 2419 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 2420 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 2421 if (C1 && C2) { 2422 const APInt &C1Int = C1->getAPIntValue(); 2423 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 2424 2425 // We can materialise `c1 << c2` into an add immediate, so it's "free", 2426 // and the combine should happen, to potentially allow further combines 2427 // later. 2428 if (ShiftedC1Int.getMinSignedBits() <= 64 && 2429 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 2430 return true; 2431 2432 // We can materialise `c1` in an add immediate, so it's "free", and the 2433 // combine should be prevented. 2434 if (C1Int.getMinSignedBits() <= 64 && 2435 isLegalAddImmediate(C1Int.getSExtValue())) 2436 return false; 2437 2438 // Neither constant will fit into an immediate, so find materialisation 2439 // costs. 2440 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 2441 Subtarget.is64Bit()); 2442 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 2443 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); 2444 2445 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 2446 // combine should be prevented. 2447 if (C1Cost < ShiftedC1Cost) 2448 return false; 2449 } 2450 } 2451 return true; 2452 } 2453 2454 bool RISCVTargetLowering::targetShrinkDemandedConstant( 2455 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 2456 TargetLoweringOpt &TLO) const { 2457 // Delay this optimization as late as possible. 2458 if (!TLO.LegalOps) 2459 return false; 2460 2461 EVT VT = Op.getValueType(); 2462 if (VT.isVector()) 2463 return false; 2464 2465 // Only handle AND for now. 2466 if (Op.getOpcode() != ISD::AND) 2467 return false; 2468 2469 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 2470 if (!C) 2471 return false; 2472 2473 const APInt &Mask = C->getAPIntValue(); 2474 2475 // Clear all non-demanded bits initially. 2476 APInt ShrunkMask = Mask & DemandedBits; 2477 2478 // If the shrunk mask fits in sign extended 12 bits, let the target 2479 // independent code apply it. 2480 if (ShrunkMask.isSignedIntN(12)) 2481 return false; 2482 2483 // Try to make a smaller immediate by setting undemanded bits. 2484 2485 // We need to be able to make a negative number through a combination of mask 2486 // and undemanded bits. 2487 APInt ExpandedMask = Mask | ~DemandedBits; 2488 if (!ExpandedMask.isNegative()) 2489 return false; 2490 2491 // What is the fewest number of bits we need to represent the negative number. 2492 unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); 2493 2494 // Try to make a 12 bit negative immediate. If that fails try to make a 32 2495 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 2496 APInt NewMask = ShrunkMask; 2497 if (MinSignedBits <= 12) 2498 NewMask.setBitsFrom(11); 2499 else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 2500 NewMask.setBitsFrom(31); 2501 else 2502 return false; 2503 2504 // Sanity check that our new mask is a subset of the demanded mask. 2505 assert(NewMask.isSubsetOf(ExpandedMask)); 2506 2507 // If we aren't changing the mask, just return true to keep it and prevent 2508 // the caller from optimizing. 2509 if (NewMask == Mask) 2510 return true; 2511 2512 // Replace the constant with the new mask. 2513 SDLoc DL(Op); 2514 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); 2515 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); 2516 return TLO.CombineTo(Op, NewOp); 2517 } 2518 2519 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 2520 KnownBits &Known, 2521 const APInt &DemandedElts, 2522 const SelectionDAG &DAG, 2523 unsigned Depth) const { 2524 unsigned BitWidth = Known.getBitWidth(); 2525 unsigned Opc = Op.getOpcode(); 2526 assert((Opc >= ISD::BUILTIN_OP_END || 2527 Opc == ISD::INTRINSIC_WO_CHAIN || 2528 Opc == ISD::INTRINSIC_W_CHAIN || 2529 Opc == ISD::INTRINSIC_VOID) && 2530 "Should use MaskedValueIsZero if you don't know whether Op" 2531 " is a target node!"); 2532 2533 Known.resetAll(); 2534 switch (Opc) { 2535 default: break; 2536 case RISCVISD::REMUW: { 2537 KnownBits Known2; 2538 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 2539 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 2540 // We only care about the lower 32 bits. 2541 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 2542 // Restore the original width by sign extending. 2543 Known = Known.sext(BitWidth); 2544 break; 2545 } 2546 case RISCVISD::DIVUW: { 2547 KnownBits Known2; 2548 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 2549 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 2550 // We only care about the lower 32 bits. 2551 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 2552 // Restore the original width by sign extending. 2553 Known = Known.sext(BitWidth); 2554 break; 2555 } 2556 case RISCVISD::READ_VLENB: 2557 // We assume VLENB is at least 8 bytes. 2558 // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits. 2559 Known.Zero.setLowBits(3); 2560 break; 2561 } 2562 } 2563 2564 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 2565 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 2566 unsigned Depth) const { 2567 switch (Op.getOpcode()) { 2568 default: 2569 break; 2570 case RISCVISD::SLLW: 2571 case RISCVISD::SRAW: 2572 case RISCVISD::SRLW: 2573 case RISCVISD::DIVW: 2574 case RISCVISD::DIVUW: 2575 case RISCVISD::REMUW: 2576 case RISCVISD::ROLW: 2577 case RISCVISD::RORW: 2578 case RISCVISD::GREVIW: 2579 case RISCVISD::GORCIW: 2580 case RISCVISD::FSLW: 2581 case RISCVISD::FSRW: 2582 // TODO: As the result is sign-extended, this is conservatively correct. A 2583 // more precise answer could be calculated for SRAW depending on known 2584 // bits in the shift amount. 2585 return 33; 2586 case RISCVISD::VMV_X_S: 2587 // The number of sign bits of the scalar result is computed by obtaining the 2588 // element type of the input vector operand, subtracting its width from the 2589 // XLEN, and then adding one (sign bit within the element type). If the 2590 // element type is wider than XLen, the least-significant XLEN bits are 2591 // taken. 2592 if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen()) 2593 return 1; 2594 return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; 2595 } 2596 2597 return 1; 2598 } 2599 2600 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 2601 MachineBasicBlock *BB) { 2602 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 2603 2604 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 2605 // Should the count have wrapped while it was being read, we need to try 2606 // again. 2607 // ... 2608 // read: 2609 // rdcycleh x3 # load high word of cycle 2610 // rdcycle x2 # load low word of cycle 2611 // rdcycleh x4 # load high word of cycle 2612 // bne x3, x4, read # check if high word reads match, otherwise try again 2613 // ... 2614 2615 MachineFunction &MF = *BB->getParent(); 2616 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2617 MachineFunction::iterator It = ++BB->getIterator(); 2618 2619 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 2620 MF.insert(It, LoopMBB); 2621 2622 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 2623 MF.insert(It, DoneMBB); 2624 2625 // Transfer the remainder of BB and its successor edges to DoneMBB. 2626 DoneMBB->splice(DoneMBB->begin(), BB, 2627 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 2628 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 2629 2630 BB->addSuccessor(LoopMBB); 2631 2632 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2633 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 2634 Register LoReg = MI.getOperand(0).getReg(); 2635 Register HiReg = MI.getOperand(1).getReg(); 2636 DebugLoc DL = MI.getDebugLoc(); 2637 2638 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 2639 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 2640 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 2641 .addReg(RISCV::X0); 2642 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 2643 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 2644 .addReg(RISCV::X0); 2645 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 2646 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 2647 .addReg(RISCV::X0); 2648 2649 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 2650 .addReg(HiReg) 2651 .addReg(ReadAgainReg) 2652 .addMBB(LoopMBB); 2653 2654 LoopMBB->addSuccessor(LoopMBB); 2655 LoopMBB->addSuccessor(DoneMBB); 2656 2657 MI.eraseFromParent(); 2658 2659 return DoneMBB; 2660 } 2661 2662 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 2663 MachineBasicBlock *BB) { 2664 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 2665 2666 MachineFunction &MF = *BB->getParent(); 2667 DebugLoc DL = MI.getDebugLoc(); 2668 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 2669 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 2670 Register LoReg = MI.getOperand(0).getReg(); 2671 Register HiReg = MI.getOperand(1).getReg(); 2672 Register SrcReg = MI.getOperand(2).getReg(); 2673 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 2674 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 2675 2676 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 2677 RI); 2678 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 2679 MachineMemOperand *MMOLo = 2680 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 2681 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 2682 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 2683 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 2684 .addFrameIndex(FI) 2685 .addImm(0) 2686 .addMemOperand(MMOLo); 2687 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 2688 .addFrameIndex(FI) 2689 .addImm(4) 2690 .addMemOperand(MMOHi); 2691 MI.eraseFromParent(); // The pseudo instruction is gone now. 2692 return BB; 2693 } 2694 2695 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 2696 MachineBasicBlock *BB) { 2697 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 2698 "Unexpected instruction"); 2699 2700 MachineFunction &MF = *BB->getParent(); 2701 DebugLoc DL = MI.getDebugLoc(); 2702 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 2703 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 2704 Register DstReg = MI.getOperand(0).getReg(); 2705 Register LoReg = MI.getOperand(1).getReg(); 2706 Register HiReg = MI.getOperand(2).getReg(); 2707 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 2708 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 2709 2710 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 2711 MachineMemOperand *MMOLo = 2712 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 2713 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 2714 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 2715 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 2716 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 2717 .addFrameIndex(FI) 2718 .addImm(0) 2719 .addMemOperand(MMOLo); 2720 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 2721 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 2722 .addFrameIndex(FI) 2723 .addImm(4) 2724 .addMemOperand(MMOHi); 2725 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 2726 MI.eraseFromParent(); // The pseudo instruction is gone now. 2727 return BB; 2728 } 2729 2730 static bool isSelectPseudo(MachineInstr &MI) { 2731 switch (MI.getOpcode()) { 2732 default: 2733 return false; 2734 case RISCV::Select_GPR_Using_CC_GPR: 2735 case RISCV::Select_FPR16_Using_CC_GPR: 2736 case RISCV::Select_FPR32_Using_CC_GPR: 2737 case RISCV::Select_FPR64_Using_CC_GPR: 2738 return true; 2739 } 2740 } 2741 2742 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 2743 MachineBasicBlock *BB) { 2744 // To "insert" Select_* instructions, we actually have to insert the triangle 2745 // control-flow pattern. The incoming instructions know the destination vreg 2746 // to set, the condition code register to branch on, the true/false values to 2747 // select between, and the condcode to use to select the appropriate branch. 2748 // 2749 // We produce the following control flow: 2750 // HeadMBB 2751 // | \ 2752 // | IfFalseMBB 2753 // | / 2754 // TailMBB 2755 // 2756 // When we find a sequence of selects we attempt to optimize their emission 2757 // by sharing the control flow. Currently we only handle cases where we have 2758 // multiple selects with the exact same condition (same LHS, RHS and CC). 2759 // The selects may be interleaved with other instructions if the other 2760 // instructions meet some requirements we deem safe: 2761 // - They are debug instructions. Otherwise, 2762 // - They do not have side-effects, do not access memory and their inputs do 2763 // not depend on the results of the select pseudo-instructions. 2764 // The TrueV/FalseV operands of the selects cannot depend on the result of 2765 // previous selects in the sequence. 2766 // These conditions could be further relaxed. See the X86 target for a 2767 // related approach and more information. 2768 Register LHS = MI.getOperand(1).getReg(); 2769 Register RHS = MI.getOperand(2).getReg(); 2770 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 2771 2772 SmallVector<MachineInstr *, 4> SelectDebugValues; 2773 SmallSet<Register, 4> SelectDests; 2774 SelectDests.insert(MI.getOperand(0).getReg()); 2775 2776 MachineInstr *LastSelectPseudo = &MI; 2777 2778 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 2779 SequenceMBBI != E; ++SequenceMBBI) { 2780 if (SequenceMBBI->isDebugInstr()) 2781 continue; 2782 else if (isSelectPseudo(*SequenceMBBI)) { 2783 if (SequenceMBBI->getOperand(1).getReg() != LHS || 2784 SequenceMBBI->getOperand(2).getReg() != RHS || 2785 SequenceMBBI->getOperand(3).getImm() != CC || 2786 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 2787 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 2788 break; 2789 LastSelectPseudo = &*SequenceMBBI; 2790 SequenceMBBI->collectDebugValues(SelectDebugValues); 2791 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 2792 } else { 2793 if (SequenceMBBI->hasUnmodeledSideEffects() || 2794 SequenceMBBI->mayLoadOrStore()) 2795 break; 2796 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 2797 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 2798 })) 2799 break; 2800 } 2801 } 2802 2803 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 2804 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2805 DebugLoc DL = MI.getDebugLoc(); 2806 MachineFunction::iterator I = ++BB->getIterator(); 2807 2808 MachineBasicBlock *HeadMBB = BB; 2809 MachineFunction *F = BB->getParent(); 2810 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 2811 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 2812 2813 F->insert(I, IfFalseMBB); 2814 F->insert(I, TailMBB); 2815 2816 // Transfer debug instructions associated with the selects to TailMBB. 2817 for (MachineInstr *DebugInstr : SelectDebugValues) { 2818 TailMBB->push_back(DebugInstr->removeFromParent()); 2819 } 2820 2821 // Move all instructions after the sequence to TailMBB. 2822 TailMBB->splice(TailMBB->end(), HeadMBB, 2823 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 2824 // Update machine-CFG edges by transferring all successors of the current 2825 // block to the new block which will contain the Phi nodes for the selects. 2826 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 2827 // Set the successors for HeadMBB. 2828 HeadMBB->addSuccessor(IfFalseMBB); 2829 HeadMBB->addSuccessor(TailMBB); 2830 2831 // Insert appropriate branch. 2832 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 2833 2834 BuildMI(HeadMBB, DL, TII.get(Opcode)) 2835 .addReg(LHS) 2836 .addReg(RHS) 2837 .addMBB(TailMBB); 2838 2839 // IfFalseMBB just falls through to TailMBB. 2840 IfFalseMBB->addSuccessor(TailMBB); 2841 2842 // Create PHIs for all of the select pseudo-instructions. 2843 auto SelectMBBI = MI.getIterator(); 2844 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 2845 auto InsertionPoint = TailMBB->begin(); 2846 while (SelectMBBI != SelectEnd) { 2847 auto Next = std::next(SelectMBBI); 2848 if (isSelectPseudo(*SelectMBBI)) { 2849 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 2850 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 2851 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 2852 .addReg(SelectMBBI->getOperand(4).getReg()) 2853 .addMBB(HeadMBB) 2854 .addReg(SelectMBBI->getOperand(5).getReg()) 2855 .addMBB(IfFalseMBB); 2856 SelectMBBI->eraseFromParent(); 2857 } 2858 SelectMBBI = Next; 2859 } 2860 2861 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 2862 return TailMBB; 2863 } 2864 2865 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, 2866 int VLIndex, unsigned SEWIndex, 2867 RISCVVLMUL VLMul, bool WritesElement0) { 2868 MachineFunction &MF = *BB->getParent(); 2869 DebugLoc DL = MI.getDebugLoc(); 2870 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 2871 2872 unsigned SEW = MI.getOperand(SEWIndex).getImm(); 2873 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 2874 RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8)); 2875 2876 MachineRegisterInfo &MRI = MF.getRegInfo(); 2877 2878 // VL and VTYPE are alive here. 2879 MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)); 2880 2881 if (VLIndex >= 0) { 2882 // Set VL (rs1 != X0). 2883 Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 2884 MIB.addReg(DestReg, RegState::Define | RegState::Dead) 2885 .addReg(MI.getOperand(VLIndex).getReg()); 2886 } else 2887 // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). 2888 MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead) 2889 .addReg(RISCV::X0, RegState::Kill); 2890 2891 // Default to tail agnostic unless the destination is tied to a source. In 2892 // that case the user would have some control over the tail values. The tail 2893 // policy is also ignored on instructions that only update element 0 like 2894 // vmv.s.x or reductions so use agnostic there to match the common case. 2895 // FIXME: This is conservatively correct, but we might want to detect that 2896 // the input is undefined. 2897 bool TailAgnostic = true; 2898 unsigned UseOpIdx; 2899 if (MI.isRegTiedToUseOperand(0, &UseOpIdx) && !WritesElement0) { 2900 TailAgnostic = false; 2901 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 2902 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 2903 MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); 2904 if (UseMI && UseMI->isImplicitDef()) 2905 TailAgnostic = true; 2906 } 2907 2908 // For simplicity we reuse the vtype representation here. 2909 MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth, 2910 /*TailAgnostic*/ TailAgnostic, 2911 /*MaskAgnostic*/ false)); 2912 2913 // Remove (now) redundant operands from pseudo 2914 MI.getOperand(SEWIndex).setImm(-1); 2915 if (VLIndex >= 0) { 2916 MI.getOperand(VLIndex).setReg(RISCV::NoRegister); 2917 MI.getOperand(VLIndex).setIsKill(false); 2918 } 2919 2920 return BB; 2921 } 2922 2923 MachineBasicBlock * 2924 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 2925 MachineBasicBlock *BB) const { 2926 uint64_t TSFlags = MI.getDesc().TSFlags; 2927 2928 if (TSFlags & RISCVII::HasSEWOpMask) { 2929 unsigned NumOperands = MI.getNumExplicitOperands(); 2930 int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1; 2931 unsigned SEWIndex = NumOperands - 1; 2932 bool WritesElement0 = TSFlags & RISCVII::WritesElement0Mask; 2933 2934 RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >> 2935 RISCVII::VLMulShift); 2936 return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, WritesElement0); 2937 } 2938 2939 switch (MI.getOpcode()) { 2940 default: 2941 llvm_unreachable("Unexpected instr type to insert"); 2942 case RISCV::ReadCycleWide: 2943 assert(!Subtarget.is64Bit() && 2944 "ReadCycleWrite is only to be used on riscv32"); 2945 return emitReadCycleWidePseudo(MI, BB); 2946 case RISCV::Select_GPR_Using_CC_GPR: 2947 case RISCV::Select_FPR16_Using_CC_GPR: 2948 case RISCV::Select_FPR32_Using_CC_GPR: 2949 case RISCV::Select_FPR64_Using_CC_GPR: 2950 return emitSelectPseudo(MI, BB); 2951 case RISCV::BuildPairF64Pseudo: 2952 return emitBuildPairF64Pseudo(MI, BB); 2953 case RISCV::SplitF64Pseudo: 2954 return emitSplitF64Pseudo(MI, BB); 2955 } 2956 } 2957 2958 // Calling Convention Implementation. 2959 // The expectations for frontend ABI lowering vary from target to target. 2960 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 2961 // details, but this is a longer term goal. For now, we simply try to keep the 2962 // role of the frontend as simple and well-defined as possible. The rules can 2963 // be summarised as: 2964 // * Never split up large scalar arguments. We handle them here. 2965 // * If a hardfloat calling convention is being used, and the struct may be 2966 // passed in a pair of registers (fp+fp, int+fp), and both registers are 2967 // available, then pass as two separate arguments. If either the GPRs or FPRs 2968 // are exhausted, then pass according to the rule below. 2969 // * If a struct could never be passed in registers or directly in a stack 2970 // slot (as it is larger than 2*XLEN and the floating point rules don't 2971 // apply), then pass it using a pointer with the byval attribute. 2972 // * If a struct is less than 2*XLEN, then coerce to either a two-element 2973 // word-sized array or a 2*XLEN scalar (depending on alignment). 2974 // * The frontend can determine whether a struct is returned by reference or 2975 // not based on its size and fields. If it will be returned by reference, the 2976 // frontend must modify the prototype so a pointer with the sret annotation is 2977 // passed as the first argument. This is not necessary for large scalar 2978 // returns. 2979 // * Struct return values and varargs should be coerced to structs containing 2980 // register-size fields in the same situations they would be for fixed 2981 // arguments. 2982 2983 static const MCPhysReg ArgGPRs[] = { 2984 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 2985 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 2986 }; 2987 static const MCPhysReg ArgFPR16s[] = { 2988 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 2989 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 2990 }; 2991 static const MCPhysReg ArgFPR32s[] = { 2992 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 2993 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 2994 }; 2995 static const MCPhysReg ArgFPR64s[] = { 2996 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 2997 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 2998 }; 2999 // This is an interim calling convention and it may be changed in the future. 3000 static const MCPhysReg ArgVRs[] = { 3001 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 3002 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 3003 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 3004 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 3005 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 3006 RISCV::V20M2, RISCV::V22M2}; 3007 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 3008 RISCV::V20M4}; 3009 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 3010 3011 // Pass a 2*XLEN argument that has been split into two XLEN values through 3012 // registers or the stack as necessary. 3013 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 3014 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 3015 MVT ValVT2, MVT LocVT2, 3016 ISD::ArgFlagsTy ArgFlags2) { 3017 unsigned XLenInBytes = XLen / 8; 3018 if (Register Reg = State.AllocateReg(ArgGPRs)) { 3019 // At least one half can be passed via register. 3020 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 3021 VA1.getLocVT(), CCValAssign::Full)); 3022 } else { 3023 // Both halves must be passed on the stack, with proper alignment. 3024 Align StackAlign = 3025 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 3026 State.addLoc( 3027 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 3028 State.AllocateStack(XLenInBytes, StackAlign), 3029 VA1.getLocVT(), CCValAssign::Full)); 3030 State.addLoc(CCValAssign::getMem( 3031 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 3032 LocVT2, CCValAssign::Full)); 3033 return false; 3034 } 3035 3036 if (Register Reg = State.AllocateReg(ArgGPRs)) { 3037 // The second half can also be passed via register. 3038 State.addLoc( 3039 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 3040 } else { 3041 // The second half is passed via the stack, without additional alignment. 3042 State.addLoc(CCValAssign::getMem( 3043 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 3044 LocVT2, CCValAssign::Full)); 3045 } 3046 3047 return false; 3048 } 3049 3050 // Implements the RISC-V calling convention. Returns true upon failure. 3051 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 3052 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 3053 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 3054 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 3055 Optional<unsigned> FirstMaskArgument) { 3056 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 3057 assert(XLen == 32 || XLen == 64); 3058 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 3059 3060 // Any return value split in to more than two values can't be returned 3061 // directly. 3062 if (IsRet && ValNo > 1) 3063 return true; 3064 3065 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 3066 // variadic argument, or if no F16/F32 argument registers are available. 3067 bool UseGPRForF16_F32 = true; 3068 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 3069 // variadic argument, or if no F64 argument registers are available. 3070 bool UseGPRForF64 = true; 3071 3072 switch (ABI) { 3073 default: 3074 llvm_unreachable("Unexpected ABI"); 3075 case RISCVABI::ABI_ILP32: 3076 case RISCVABI::ABI_LP64: 3077 break; 3078 case RISCVABI::ABI_ILP32F: 3079 case RISCVABI::ABI_LP64F: 3080 UseGPRForF16_F32 = !IsFixed; 3081 break; 3082 case RISCVABI::ABI_ILP32D: 3083 case RISCVABI::ABI_LP64D: 3084 UseGPRForF16_F32 = !IsFixed; 3085 UseGPRForF64 = !IsFixed; 3086 break; 3087 } 3088 3089 // FPR16, FPR32, and FPR64 alias each other. 3090 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { 3091 UseGPRForF16_F32 = true; 3092 UseGPRForF64 = true; 3093 } 3094 3095 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 3096 // similar local variables rather than directly checking against the target 3097 // ABI. 3098 3099 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { 3100 LocVT = XLenVT; 3101 LocInfo = CCValAssign::BCvt; 3102 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 3103 LocVT = MVT::i64; 3104 LocInfo = CCValAssign::BCvt; 3105 } 3106 3107 // If this is a variadic argument, the RISC-V calling convention requires 3108 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 3109 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 3110 // be used regardless of whether the original argument was split during 3111 // legalisation or not. The argument will not be passed by registers if the 3112 // original type is larger than 2*XLEN, so the register alignment rule does 3113 // not apply. 3114 unsigned TwoXLenInBytes = (2 * XLen) / 8; 3115 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 3116 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 3117 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 3118 // Skip 'odd' register if necessary. 3119 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 3120 State.AllocateReg(ArgGPRs); 3121 } 3122 3123 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 3124 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 3125 State.getPendingArgFlags(); 3126 3127 assert(PendingLocs.size() == PendingArgFlags.size() && 3128 "PendingLocs and PendingArgFlags out of sync"); 3129 3130 // Handle passing f64 on RV32D with a soft float ABI or when floating point 3131 // registers are exhausted. 3132 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 3133 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 3134 "Can't lower f64 if it is split"); 3135 // Depending on available argument GPRS, f64 may be passed in a pair of 3136 // GPRs, split between a GPR and the stack, or passed completely on the 3137 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 3138 // cases. 3139 Register Reg = State.AllocateReg(ArgGPRs); 3140 LocVT = MVT::i32; 3141 if (!Reg) { 3142 unsigned StackOffset = State.AllocateStack(8, Align(8)); 3143 State.addLoc( 3144 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 3145 return false; 3146 } 3147 if (!State.AllocateReg(ArgGPRs)) 3148 State.AllocateStack(4, Align(4)); 3149 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3150 return false; 3151 } 3152 3153 // Split arguments might be passed indirectly, so keep track of the pending 3154 // values. 3155 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 3156 LocVT = XLenVT; 3157 LocInfo = CCValAssign::Indirect; 3158 PendingLocs.push_back( 3159 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 3160 PendingArgFlags.push_back(ArgFlags); 3161 if (!ArgFlags.isSplitEnd()) { 3162 return false; 3163 } 3164 } 3165 3166 // If the split argument only had two elements, it should be passed directly 3167 // in registers or on the stack. 3168 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 3169 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 3170 // Apply the normal calling convention rules to the first half of the 3171 // split argument. 3172 CCValAssign VA = PendingLocs[0]; 3173 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 3174 PendingLocs.clear(); 3175 PendingArgFlags.clear(); 3176 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 3177 ArgFlags); 3178 } 3179 3180 // Allocate to a register if possible, or else a stack slot. 3181 Register Reg; 3182 if (ValVT == MVT::f16 && !UseGPRForF16_F32) 3183 Reg = State.AllocateReg(ArgFPR16s); 3184 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 3185 Reg = State.AllocateReg(ArgFPR32s); 3186 else if (ValVT == MVT::f64 && !UseGPRForF64) 3187 Reg = State.AllocateReg(ArgFPR64s); 3188 else if (ValVT.isScalableVector()) { 3189 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 3190 if (RC == &RISCV::VRRegClass) { 3191 // Assign the first mask argument to V0. 3192 // This is an interim calling convention and it may be changed in the 3193 // future. 3194 if (FirstMaskArgument.hasValue() && 3195 ValNo == FirstMaskArgument.getValue()) { 3196 Reg = State.AllocateReg(RISCV::V0); 3197 } else { 3198 Reg = State.AllocateReg(ArgVRs); 3199 } 3200 } else if (RC == &RISCV::VRM2RegClass) { 3201 Reg = State.AllocateReg(ArgVRM2s); 3202 } else if (RC == &RISCV::VRM4RegClass) { 3203 Reg = State.AllocateReg(ArgVRM4s); 3204 } else if (RC == &RISCV::VRM8RegClass) { 3205 Reg = State.AllocateReg(ArgVRM8s); 3206 } else { 3207 llvm_unreachable("Unhandled class register for ValueType"); 3208 } 3209 if (!Reg) { 3210 LocInfo = CCValAssign::Indirect; 3211 // Try using a GPR to pass the address 3212 Reg = State.AllocateReg(ArgGPRs); 3213 LocVT = XLenVT; 3214 } 3215 } else 3216 Reg = State.AllocateReg(ArgGPRs); 3217 unsigned StackOffset = 3218 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); 3219 3220 // If we reach this point and PendingLocs is non-empty, we must be at the 3221 // end of a split argument that must be passed indirectly. 3222 if (!PendingLocs.empty()) { 3223 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 3224 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 3225 3226 for (auto &It : PendingLocs) { 3227 if (Reg) 3228 It.convertToReg(Reg); 3229 else 3230 It.convertToMem(StackOffset); 3231 State.addLoc(It); 3232 } 3233 PendingLocs.clear(); 3234 PendingArgFlags.clear(); 3235 return false; 3236 } 3237 3238 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 3239 (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) && 3240 "Expected an XLenVT or scalable vector types at this stage"); 3241 3242 if (Reg) { 3243 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3244 return false; 3245 } 3246 3247 // When a floating-point value is passed on the stack, no bit-conversion is 3248 // needed. 3249 if (ValVT.isFloatingPoint()) { 3250 LocVT = ValVT; 3251 LocInfo = CCValAssign::Full; 3252 } 3253 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 3254 return false; 3255 } 3256 3257 template <typename ArgTy> 3258 static Optional<unsigned> preAssignMask(const ArgTy &Args) { 3259 for (const auto &ArgIdx : enumerate(Args)) { 3260 MVT ArgVT = ArgIdx.value().VT; 3261 if (ArgVT.isScalableVector() && 3262 ArgVT.getVectorElementType().SimpleTy == MVT::i1) 3263 return ArgIdx.index(); 3264 } 3265 return None; 3266 } 3267 3268 void RISCVTargetLowering::analyzeInputArgs( 3269 MachineFunction &MF, CCState &CCInfo, 3270 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 3271 unsigned NumArgs = Ins.size(); 3272 FunctionType *FType = MF.getFunction().getFunctionType(); 3273 3274 Optional<unsigned> FirstMaskArgument; 3275 if (Subtarget.hasStdExtV()) 3276 FirstMaskArgument = preAssignMask(Ins); 3277 3278 for (unsigned i = 0; i != NumArgs; ++i) { 3279 MVT ArgVT = Ins[i].VT; 3280 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 3281 3282 Type *ArgTy = nullptr; 3283 if (IsRet) 3284 ArgTy = FType->getReturnType(); 3285 else if (Ins[i].isOrigArg()) 3286 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 3287 3288 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 3289 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 3290 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 3291 FirstMaskArgument)) { 3292 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 3293 << EVT(ArgVT).getEVTString() << '\n'); 3294 llvm_unreachable(nullptr); 3295 } 3296 } 3297 } 3298 3299 void RISCVTargetLowering::analyzeOutputArgs( 3300 MachineFunction &MF, CCState &CCInfo, 3301 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 3302 CallLoweringInfo *CLI) const { 3303 unsigned NumArgs = Outs.size(); 3304 3305 Optional<unsigned> FirstMaskArgument; 3306 if (Subtarget.hasStdExtV()) 3307 FirstMaskArgument = preAssignMask(Outs); 3308 3309 for (unsigned i = 0; i != NumArgs; i++) { 3310 MVT ArgVT = Outs[i].VT; 3311 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 3312 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 3313 3314 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 3315 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 3316 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 3317 FirstMaskArgument)) { 3318 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 3319 << EVT(ArgVT).getEVTString() << "\n"); 3320 llvm_unreachable(nullptr); 3321 } 3322 } 3323 } 3324 3325 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 3326 // values. 3327 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 3328 const CCValAssign &VA, const SDLoc &DL) { 3329 switch (VA.getLocInfo()) { 3330 default: 3331 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3332 case CCValAssign::Full: 3333 break; 3334 case CCValAssign::BCvt: 3335 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 3336 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); 3337 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 3338 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 3339 else 3340 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 3341 break; 3342 } 3343 return Val; 3344 } 3345 3346 // The caller is responsible for loading the full value if the argument is 3347 // passed with CCValAssign::Indirect. 3348 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 3349 const CCValAssign &VA, const SDLoc &DL, 3350 const RISCVTargetLowering &TLI) { 3351 MachineFunction &MF = DAG.getMachineFunction(); 3352 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3353 EVT LocVT = VA.getLocVT(); 3354 SDValue Val; 3355 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 3356 Register VReg = RegInfo.createVirtualRegister(RC); 3357 RegInfo.addLiveIn(VA.getLocReg(), VReg); 3358 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 3359 3360 if (VA.getLocInfo() == CCValAssign::Indirect) 3361 return Val; 3362 3363 return convertLocVTToValVT(DAG, Val, VA, DL); 3364 } 3365 3366 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 3367 const CCValAssign &VA, const SDLoc &DL) { 3368 EVT LocVT = VA.getLocVT(); 3369 3370 switch (VA.getLocInfo()) { 3371 default: 3372 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3373 case CCValAssign::Full: 3374 break; 3375 case CCValAssign::BCvt: 3376 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 3377 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 3378 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 3379 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 3380 else 3381 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 3382 break; 3383 } 3384 return Val; 3385 } 3386 3387 // The caller is responsible for loading the full value if the argument is 3388 // passed with CCValAssign::Indirect. 3389 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 3390 const CCValAssign &VA, const SDLoc &DL) { 3391 MachineFunction &MF = DAG.getMachineFunction(); 3392 MachineFrameInfo &MFI = MF.getFrameInfo(); 3393 EVT LocVT = VA.getLocVT(); 3394 EVT ValVT = VA.getValVT(); 3395 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 3396 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 3397 VA.getLocMemOffset(), /*Immutable=*/true); 3398 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3399 SDValue Val; 3400 3401 ISD::LoadExtType ExtType; 3402 switch (VA.getLocInfo()) { 3403 default: 3404 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3405 case CCValAssign::Full: 3406 case CCValAssign::Indirect: 3407 case CCValAssign::BCvt: 3408 ExtType = ISD::NON_EXTLOAD; 3409 break; 3410 } 3411 Val = DAG.getExtLoad( 3412 ExtType, DL, LocVT, Chain, FIN, 3413 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 3414 return Val; 3415 } 3416 3417 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 3418 const CCValAssign &VA, const SDLoc &DL) { 3419 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 3420 "Unexpected VA"); 3421 MachineFunction &MF = DAG.getMachineFunction(); 3422 MachineFrameInfo &MFI = MF.getFrameInfo(); 3423 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3424 3425 if (VA.isMemLoc()) { 3426 // f64 is passed on the stack. 3427 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 3428 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 3429 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 3430 MachinePointerInfo::getFixedStack(MF, FI)); 3431 } 3432 3433 assert(VA.isRegLoc() && "Expected register VA assignment"); 3434 3435 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 3436 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 3437 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 3438 SDValue Hi; 3439 if (VA.getLocReg() == RISCV::X17) { 3440 // Second half of f64 is passed on the stack. 3441 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 3442 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 3443 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 3444 MachinePointerInfo::getFixedStack(MF, FI)); 3445 } else { 3446 // Second half of f64 is passed in another GPR. 3447 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 3448 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 3449 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 3450 } 3451 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 3452 } 3453 3454 // FastCC has less than 1% performance improvement for some particular 3455 // benchmark. But theoretically, it may has benenfit for some cases. 3456 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, 3457 CCValAssign::LocInfo LocInfo, 3458 ISD::ArgFlagsTy ArgFlags, CCState &State) { 3459 3460 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 3461 // X5 and X6 might be used for save-restore libcall. 3462 static const MCPhysReg GPRList[] = { 3463 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 3464 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 3465 RISCV::X29, RISCV::X30, RISCV::X31}; 3466 if (unsigned Reg = State.AllocateReg(GPRList)) { 3467 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3468 return false; 3469 } 3470 } 3471 3472 if (LocVT == MVT::f16) { 3473 static const MCPhysReg FPR16List[] = { 3474 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 3475 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 3476 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 3477 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 3478 if (unsigned Reg = State.AllocateReg(FPR16List)) { 3479 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3480 return false; 3481 } 3482 } 3483 3484 if (LocVT == MVT::f32) { 3485 static const MCPhysReg FPR32List[] = { 3486 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 3487 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 3488 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 3489 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 3490 if (unsigned Reg = State.AllocateReg(FPR32List)) { 3491 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3492 return false; 3493 } 3494 } 3495 3496 if (LocVT == MVT::f64) { 3497 static const MCPhysReg FPR64List[] = { 3498 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 3499 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 3500 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 3501 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 3502 if (unsigned Reg = State.AllocateReg(FPR64List)) { 3503 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3504 return false; 3505 } 3506 } 3507 3508 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 3509 unsigned Offset4 = State.AllocateStack(4, Align(4)); 3510 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 3511 return false; 3512 } 3513 3514 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 3515 unsigned Offset5 = State.AllocateStack(8, Align(8)); 3516 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 3517 return false; 3518 } 3519 3520 return true; // CC didn't match. 3521 } 3522 3523 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 3524 CCValAssign::LocInfo LocInfo, 3525 ISD::ArgFlagsTy ArgFlags, CCState &State) { 3526 3527 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 3528 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 3529 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 3530 static const MCPhysReg GPRList[] = { 3531 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 3532 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 3533 if (unsigned Reg = State.AllocateReg(GPRList)) { 3534 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3535 return false; 3536 } 3537 } 3538 3539 if (LocVT == MVT::f32) { 3540 // Pass in STG registers: F1, ..., F6 3541 // fs0 ... fs5 3542 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 3543 RISCV::F18_F, RISCV::F19_F, 3544 RISCV::F20_F, RISCV::F21_F}; 3545 if (unsigned Reg = State.AllocateReg(FPR32List)) { 3546 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3547 return false; 3548 } 3549 } 3550 3551 if (LocVT == MVT::f64) { 3552 // Pass in STG registers: D1, ..., D6 3553 // fs6 ... fs11 3554 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 3555 RISCV::F24_D, RISCV::F25_D, 3556 RISCV::F26_D, RISCV::F27_D}; 3557 if (unsigned Reg = State.AllocateReg(FPR64List)) { 3558 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3559 return false; 3560 } 3561 } 3562 3563 report_fatal_error("No registers left in GHC calling convention"); 3564 return true; 3565 } 3566 3567 // Transform physical registers into virtual registers. 3568 SDValue RISCVTargetLowering::LowerFormalArguments( 3569 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 3570 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 3571 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3572 3573 MachineFunction &MF = DAG.getMachineFunction(); 3574 3575 switch (CallConv) { 3576 default: 3577 report_fatal_error("Unsupported calling convention"); 3578 case CallingConv::C: 3579 case CallingConv::Fast: 3580 break; 3581 case CallingConv::GHC: 3582 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || 3583 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) 3584 report_fatal_error( 3585 "GHC calling convention requires the F and D instruction set extensions"); 3586 } 3587 3588 const Function &Func = MF.getFunction(); 3589 if (Func.hasFnAttribute("interrupt")) { 3590 if (!Func.arg_empty()) 3591 report_fatal_error( 3592 "Functions with the interrupt attribute cannot have arguments!"); 3593 3594 StringRef Kind = 3595 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 3596 3597 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 3598 report_fatal_error( 3599 "Function interrupt attribute argument not supported!"); 3600 } 3601 3602 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3603 MVT XLenVT = Subtarget.getXLenVT(); 3604 unsigned XLenInBytes = Subtarget.getXLen() / 8; 3605 // Used with vargs to acumulate store chains. 3606 std::vector<SDValue> OutChains; 3607 3608 // Assign locations to all of the incoming arguments. 3609 SmallVector<CCValAssign, 16> ArgLocs; 3610 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 3611 3612 if (CallConv == CallingConv::Fast) 3613 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); 3614 else if (CallConv == CallingConv::GHC) 3615 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); 3616 else 3617 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 3618 3619 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 3620 CCValAssign &VA = ArgLocs[i]; 3621 SDValue ArgValue; 3622 // Passing f64 on RV32D with a soft float ABI must be handled as a special 3623 // case. 3624 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 3625 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 3626 else if (VA.isRegLoc()) 3627 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 3628 else 3629 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 3630 3631 if (VA.getLocInfo() == CCValAssign::Indirect) { 3632 // If the original argument was split and passed by reference (e.g. i128 3633 // on RV32), we need to load all parts of it here (using the same 3634 // address). 3635 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 3636 MachinePointerInfo())); 3637 unsigned ArgIndex = Ins[i].OrigArgIndex; 3638 assert(Ins[i].PartOffset == 0); 3639 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 3640 CCValAssign &PartVA = ArgLocs[i + 1]; 3641 unsigned PartOffset = Ins[i + 1].PartOffset; 3642 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 3643 DAG.getIntPtrConstant(PartOffset, DL)); 3644 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 3645 MachinePointerInfo())); 3646 ++i; 3647 } 3648 continue; 3649 } 3650 InVals.push_back(ArgValue); 3651 } 3652 3653 if (IsVarArg) { 3654 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 3655 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 3656 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 3657 MachineFrameInfo &MFI = MF.getFrameInfo(); 3658 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3659 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 3660 3661 // Offset of the first variable argument from stack pointer, and size of 3662 // the vararg save area. For now, the varargs save area is either zero or 3663 // large enough to hold a0-a7. 3664 int VaArgOffset, VarArgsSaveSize; 3665 3666 // If all registers are allocated, then all varargs must be passed on the 3667 // stack and we don't need to save any argregs. 3668 if (ArgRegs.size() == Idx) { 3669 VaArgOffset = CCInfo.getNextStackOffset(); 3670 VarArgsSaveSize = 0; 3671 } else { 3672 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 3673 VaArgOffset = -VarArgsSaveSize; 3674 } 3675 3676 // Record the frame index of the first variable argument 3677 // which is a value necessary to VASTART. 3678 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 3679 RVFI->setVarArgsFrameIndex(FI); 3680 3681 // If saving an odd number of registers then create an extra stack slot to 3682 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 3683 // offsets to even-numbered registered remain 2*XLEN-aligned. 3684 if (Idx % 2) { 3685 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 3686 VarArgsSaveSize += XLenInBytes; 3687 } 3688 3689 // Copy the integer registers that may have been used for passing varargs 3690 // to the vararg save area. 3691 for (unsigned I = Idx; I < ArgRegs.size(); 3692 ++I, VaArgOffset += XLenInBytes) { 3693 const Register Reg = RegInfo.createVirtualRegister(RC); 3694 RegInfo.addLiveIn(ArgRegs[I], Reg); 3695 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 3696 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 3697 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 3698 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 3699 MachinePointerInfo::getFixedStack(MF, FI)); 3700 cast<StoreSDNode>(Store.getNode()) 3701 ->getMemOperand() 3702 ->setValue((Value *)nullptr); 3703 OutChains.push_back(Store); 3704 } 3705 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 3706 } 3707 3708 // All stores are grouped in one node to allow the matching between 3709 // the size of Ins and InVals. This only happens for vararg functions. 3710 if (!OutChains.empty()) { 3711 OutChains.push_back(Chain); 3712 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 3713 } 3714 3715 return Chain; 3716 } 3717 3718 /// isEligibleForTailCallOptimization - Check whether the call is eligible 3719 /// for tail call optimization. 3720 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 3721 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 3722 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 3723 const SmallVector<CCValAssign, 16> &ArgLocs) const { 3724 3725 auto &Callee = CLI.Callee; 3726 auto CalleeCC = CLI.CallConv; 3727 auto &Outs = CLI.Outs; 3728 auto &Caller = MF.getFunction(); 3729 auto CallerCC = Caller.getCallingConv(); 3730 3731 // Exception-handling functions need a special set of instructions to 3732 // indicate a return to the hardware. Tail-calling another function would 3733 // probably break this. 3734 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 3735 // should be expanded as new function attributes are introduced. 3736 if (Caller.hasFnAttribute("interrupt")) 3737 return false; 3738 3739 // Do not tail call opt if the stack is used to pass parameters. 3740 if (CCInfo.getNextStackOffset() != 0) 3741 return false; 3742 3743 // Do not tail call opt if any parameters need to be passed indirectly. 3744 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 3745 // passed indirectly. So the address of the value will be passed in a 3746 // register, or if not available, then the address is put on the stack. In 3747 // order to pass indirectly, space on the stack often needs to be allocated 3748 // in order to store the value. In this case the CCInfo.getNextStackOffset() 3749 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 3750 // are passed CCValAssign::Indirect. 3751 for (auto &VA : ArgLocs) 3752 if (VA.getLocInfo() == CCValAssign::Indirect) 3753 return false; 3754 3755 // Do not tail call opt if either caller or callee uses struct return 3756 // semantics. 3757 auto IsCallerStructRet = Caller.hasStructRetAttr(); 3758 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 3759 if (IsCallerStructRet || IsCalleeStructRet) 3760 return false; 3761 3762 // Externally-defined functions with weak linkage should not be 3763 // tail-called. The behaviour of branch instructions in this situation (as 3764 // used for tail calls) is implementation-defined, so we cannot rely on the 3765 // linker replacing the tail call with a return. 3766 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 3767 const GlobalValue *GV = G->getGlobal(); 3768 if (GV->hasExternalWeakLinkage()) 3769 return false; 3770 } 3771 3772 // The callee has to preserve all registers the caller needs to preserve. 3773 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 3774 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 3775 if (CalleeCC != CallerCC) { 3776 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 3777 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 3778 return false; 3779 } 3780 3781 // Byval parameters hand the function a pointer directly into the stack area 3782 // we want to reuse during a tail call. Working around this *is* possible 3783 // but less efficient and uglier in LowerCall. 3784 for (auto &Arg : Outs) 3785 if (Arg.Flags.isByVal()) 3786 return false; 3787 3788 return true; 3789 } 3790 3791 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 3792 // and output parameter nodes. 3793 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 3794 SmallVectorImpl<SDValue> &InVals) const { 3795 SelectionDAG &DAG = CLI.DAG; 3796 SDLoc &DL = CLI.DL; 3797 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 3798 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 3799 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 3800 SDValue Chain = CLI.Chain; 3801 SDValue Callee = CLI.Callee; 3802 bool &IsTailCall = CLI.IsTailCall; 3803 CallingConv::ID CallConv = CLI.CallConv; 3804 bool IsVarArg = CLI.IsVarArg; 3805 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3806 MVT XLenVT = Subtarget.getXLenVT(); 3807 3808 MachineFunction &MF = DAG.getMachineFunction(); 3809 3810 // Analyze the operands of the call, assigning locations to each operand. 3811 SmallVector<CCValAssign, 16> ArgLocs; 3812 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 3813 3814 if (CallConv == CallingConv::Fast) 3815 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); 3816 else if (CallConv == CallingConv::GHC) 3817 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); 3818 else 3819 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 3820 3821 // Check if it's really possible to do a tail call. 3822 if (IsTailCall) 3823 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 3824 3825 if (IsTailCall) 3826 ++NumTailCalls; 3827 else if (CLI.CB && CLI.CB->isMustTailCall()) 3828 report_fatal_error("failed to perform tail call elimination on a call " 3829 "site marked musttail"); 3830 3831 // Get a count of how many bytes are to be pushed on the stack. 3832 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 3833 3834 // Create local copies for byval args 3835 SmallVector<SDValue, 8> ByValArgs; 3836 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 3837 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3838 if (!Flags.isByVal()) 3839 continue; 3840 3841 SDValue Arg = OutVals[i]; 3842 unsigned Size = Flags.getByValSize(); 3843 Align Alignment = Flags.getNonZeroByValAlign(); 3844 3845 int FI = 3846 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 3847 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 3848 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 3849 3850 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 3851 /*IsVolatile=*/false, 3852 /*AlwaysInline=*/false, IsTailCall, 3853 MachinePointerInfo(), MachinePointerInfo()); 3854 ByValArgs.push_back(FIPtr); 3855 } 3856 3857 if (!IsTailCall) 3858 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 3859 3860 // Copy argument values to their designated locations. 3861 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 3862 SmallVector<SDValue, 8> MemOpChains; 3863 SDValue StackPtr; 3864 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 3865 CCValAssign &VA = ArgLocs[i]; 3866 SDValue ArgValue = OutVals[i]; 3867 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3868 3869 // Handle passing f64 on RV32D with a soft float ABI as a special case. 3870 bool IsF64OnRV32DSoftABI = 3871 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 3872 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 3873 SDValue SplitF64 = DAG.getNode( 3874 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 3875 SDValue Lo = SplitF64.getValue(0); 3876 SDValue Hi = SplitF64.getValue(1); 3877 3878 Register RegLo = VA.getLocReg(); 3879 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 3880 3881 if (RegLo == RISCV::X17) { 3882 // Second half of f64 is passed on the stack. 3883 // Work out the address of the stack slot. 3884 if (!StackPtr.getNode()) 3885 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 3886 // Emit the store. 3887 MemOpChains.push_back( 3888 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 3889 } else { 3890 // Second half of f64 is passed in another GPR. 3891 assert(RegLo < RISCV::X31 && "Invalid register pair"); 3892 Register RegHigh = RegLo + 1; 3893 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 3894 } 3895 continue; 3896 } 3897 3898 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 3899 // as any other MemLoc. 3900 3901 // Promote the value if needed. 3902 // For now, only handle fully promoted and indirect arguments. 3903 if (VA.getLocInfo() == CCValAssign::Indirect) { 3904 // Store the argument in a stack slot and pass its address. 3905 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 3906 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 3907 MemOpChains.push_back( 3908 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 3909 MachinePointerInfo::getFixedStack(MF, FI))); 3910 // If the original argument was split (e.g. i128), we need 3911 // to store all parts of it here (and pass just one address). 3912 unsigned ArgIndex = Outs[i].OrigArgIndex; 3913 assert(Outs[i].PartOffset == 0); 3914 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 3915 SDValue PartValue = OutVals[i + 1]; 3916 unsigned PartOffset = Outs[i + 1].PartOffset; 3917 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 3918 DAG.getIntPtrConstant(PartOffset, DL)); 3919 MemOpChains.push_back( 3920 DAG.getStore(Chain, DL, PartValue, Address, 3921 MachinePointerInfo::getFixedStack(MF, FI))); 3922 ++i; 3923 } 3924 ArgValue = SpillSlot; 3925 } else { 3926 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 3927 } 3928 3929 // Use local copy if it is a byval arg. 3930 if (Flags.isByVal()) 3931 ArgValue = ByValArgs[j++]; 3932 3933 if (VA.isRegLoc()) { 3934 // Queue up the argument copies and emit them at the end. 3935 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 3936 } else { 3937 assert(VA.isMemLoc() && "Argument not register or memory"); 3938 assert(!IsTailCall && "Tail call not allowed if stack is used " 3939 "for passing parameters"); 3940 3941 // Work out the address of the stack slot. 3942 if (!StackPtr.getNode()) 3943 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 3944 SDValue Address = 3945 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 3946 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 3947 3948 // Emit the store. 3949 MemOpChains.push_back( 3950 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 3951 } 3952 } 3953 3954 // Join the stores, which are independent of one another. 3955 if (!MemOpChains.empty()) 3956 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 3957 3958 SDValue Glue; 3959 3960 // Build a sequence of copy-to-reg nodes, chained and glued together. 3961 for (auto &Reg : RegsToPass) { 3962 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 3963 Glue = Chain.getValue(1); 3964 } 3965 3966 // Validate that none of the argument registers have been marked as 3967 // reserved, if so report an error. Do the same for the return address if this 3968 // is not a tailcall. 3969 validateCCReservedRegs(RegsToPass, MF); 3970 if (!IsTailCall && 3971 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 3972 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 3973 MF.getFunction(), 3974 "Return address register required, but has been reserved."}); 3975 3976 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 3977 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 3978 // split it and then direct call can be matched by PseudoCALL. 3979 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 3980 const GlobalValue *GV = S->getGlobal(); 3981 3982 unsigned OpFlags = RISCVII::MO_CALL; 3983 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 3984 OpFlags = RISCVII::MO_PLT; 3985 3986 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 3987 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 3988 unsigned OpFlags = RISCVII::MO_CALL; 3989 3990 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 3991 nullptr)) 3992 OpFlags = RISCVII::MO_PLT; 3993 3994 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 3995 } 3996 3997 // The first call operand is the chain and the second is the target address. 3998 SmallVector<SDValue, 8> Ops; 3999 Ops.push_back(Chain); 4000 Ops.push_back(Callee); 4001 4002 // Add argument registers to the end of the list so that they are 4003 // known live into the call. 4004 for (auto &Reg : RegsToPass) 4005 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 4006 4007 if (!IsTailCall) { 4008 // Add a register mask operand representing the call-preserved registers. 4009 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4010 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 4011 assert(Mask && "Missing call preserved mask for calling convention"); 4012 Ops.push_back(DAG.getRegisterMask(Mask)); 4013 } 4014 4015 // Glue the call to the argument copies, if any. 4016 if (Glue.getNode()) 4017 Ops.push_back(Glue); 4018 4019 // Emit the call. 4020 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 4021 4022 if (IsTailCall) { 4023 MF.getFrameInfo().setHasTailCall(); 4024 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 4025 } 4026 4027 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 4028 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 4029 Glue = Chain.getValue(1); 4030 4031 // Mark the end of the call, which is glued to the call itself. 4032 Chain = DAG.getCALLSEQ_END(Chain, 4033 DAG.getConstant(NumBytes, DL, PtrVT, true), 4034 DAG.getConstant(0, DL, PtrVT, true), 4035 Glue, DL); 4036 Glue = Chain.getValue(1); 4037 4038 // Assign locations to each value returned by this call. 4039 SmallVector<CCValAssign, 16> RVLocs; 4040 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 4041 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 4042 4043 // Copy all of the result registers out of their specified physreg. 4044 for (auto &VA : RVLocs) { 4045 // Copy the value out 4046 SDValue RetValue = 4047 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 4048 // Glue the RetValue to the end of the call sequence 4049 Chain = RetValue.getValue(1); 4050 Glue = RetValue.getValue(2); 4051 4052 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 4053 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 4054 SDValue RetValue2 = 4055 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 4056 Chain = RetValue2.getValue(1); 4057 Glue = RetValue2.getValue(2); 4058 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 4059 RetValue2); 4060 } 4061 4062 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 4063 4064 InVals.push_back(RetValue); 4065 } 4066 4067 return Chain; 4068 } 4069 4070 bool RISCVTargetLowering::CanLowerReturn( 4071 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 4072 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 4073 SmallVector<CCValAssign, 16> RVLocs; 4074 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 4075 4076 Optional<unsigned> FirstMaskArgument; 4077 if (Subtarget.hasStdExtV()) 4078 FirstMaskArgument = preAssignMask(Outs); 4079 4080 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 4081 MVT VT = Outs[i].VT; 4082 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 4083 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 4084 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 4085 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 4086 *this, FirstMaskArgument)) 4087 return false; 4088 } 4089 return true; 4090 } 4091 4092 SDValue 4093 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 4094 bool IsVarArg, 4095 const SmallVectorImpl<ISD::OutputArg> &Outs, 4096 const SmallVectorImpl<SDValue> &OutVals, 4097 const SDLoc &DL, SelectionDAG &DAG) const { 4098 const MachineFunction &MF = DAG.getMachineFunction(); 4099 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 4100 4101 // Stores the assignment of the return value to a location. 4102 SmallVector<CCValAssign, 16> RVLocs; 4103 4104 // Info about the registers and stack slot. 4105 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 4106 *DAG.getContext()); 4107 4108 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 4109 nullptr); 4110 4111 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 4112 report_fatal_error("GHC functions return void only"); 4113 4114 SDValue Glue; 4115 SmallVector<SDValue, 4> RetOps(1, Chain); 4116 4117 // Copy the result values into the output registers. 4118 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 4119 SDValue Val = OutVals[i]; 4120 CCValAssign &VA = RVLocs[i]; 4121 assert(VA.isRegLoc() && "Can only return in registers!"); 4122 4123 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 4124 // Handle returning f64 on RV32D with a soft float ABI. 4125 assert(VA.isRegLoc() && "Expected return via registers"); 4126 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 4127 DAG.getVTList(MVT::i32, MVT::i32), Val); 4128 SDValue Lo = SplitF64.getValue(0); 4129 SDValue Hi = SplitF64.getValue(1); 4130 Register RegLo = VA.getLocReg(); 4131 assert(RegLo < RISCV::X31 && "Invalid register pair"); 4132 Register RegHi = RegLo + 1; 4133 4134 if (STI.isRegisterReservedByUser(RegLo) || 4135 STI.isRegisterReservedByUser(RegHi)) 4136 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 4137 MF.getFunction(), 4138 "Return value register required, but has been reserved."}); 4139 4140 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 4141 Glue = Chain.getValue(1); 4142 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 4143 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 4144 Glue = Chain.getValue(1); 4145 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 4146 } else { 4147 // Handle a 'normal' return. 4148 Val = convertValVTToLocVT(DAG, Val, VA, DL); 4149 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 4150 4151 if (STI.isRegisterReservedByUser(VA.getLocReg())) 4152 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 4153 MF.getFunction(), 4154 "Return value register required, but has been reserved."}); 4155 4156 // Guarantee that all emitted copies are stuck together. 4157 Glue = Chain.getValue(1); 4158 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 4159 } 4160 } 4161 4162 RetOps[0] = Chain; // Update chain. 4163 4164 // Add the glue node if we have it. 4165 if (Glue.getNode()) { 4166 RetOps.push_back(Glue); 4167 } 4168 4169 // Interrupt service routines use different return instructions. 4170 const Function &Func = DAG.getMachineFunction().getFunction(); 4171 if (Func.hasFnAttribute("interrupt")) { 4172 if (!Func.getReturnType()->isVoidTy()) 4173 report_fatal_error( 4174 "Functions with the interrupt attribute must have void return type!"); 4175 4176 MachineFunction &MF = DAG.getMachineFunction(); 4177 StringRef Kind = 4178 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 4179 4180 unsigned RetOpc; 4181 if (Kind == "user") 4182 RetOpc = RISCVISD::URET_FLAG; 4183 else if (Kind == "supervisor") 4184 RetOpc = RISCVISD::SRET_FLAG; 4185 else 4186 RetOpc = RISCVISD::MRET_FLAG; 4187 4188 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 4189 } 4190 4191 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 4192 } 4193 4194 void RISCVTargetLowering::validateCCReservedRegs( 4195 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 4196 MachineFunction &MF) const { 4197 const Function &F = MF.getFunction(); 4198 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 4199 4200 if (llvm::any_of(Regs, [&STI](auto Reg) { 4201 return STI.isRegisterReservedByUser(Reg.first); 4202 })) 4203 F.getContext().diagnose(DiagnosticInfoUnsupported{ 4204 F, "Argument register required, but has been reserved."}); 4205 } 4206 4207 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 4208 return CI->isTailCall(); 4209 } 4210 4211 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 4212 #define NODE_NAME_CASE(NODE) \ 4213 case RISCVISD::NODE: \ 4214 return "RISCVISD::" #NODE; 4215 // clang-format off 4216 switch ((RISCVISD::NodeType)Opcode) { 4217 case RISCVISD::FIRST_NUMBER: 4218 break; 4219 NODE_NAME_CASE(RET_FLAG) 4220 NODE_NAME_CASE(URET_FLAG) 4221 NODE_NAME_CASE(SRET_FLAG) 4222 NODE_NAME_CASE(MRET_FLAG) 4223 NODE_NAME_CASE(CALL) 4224 NODE_NAME_CASE(SELECT_CC) 4225 NODE_NAME_CASE(BuildPairF64) 4226 NODE_NAME_CASE(SplitF64) 4227 NODE_NAME_CASE(TAIL) 4228 NODE_NAME_CASE(SLLW) 4229 NODE_NAME_CASE(SRAW) 4230 NODE_NAME_CASE(SRLW) 4231 NODE_NAME_CASE(DIVW) 4232 NODE_NAME_CASE(DIVUW) 4233 NODE_NAME_CASE(REMUW) 4234 NODE_NAME_CASE(ROLW) 4235 NODE_NAME_CASE(RORW) 4236 NODE_NAME_CASE(FSLW) 4237 NODE_NAME_CASE(FSRW) 4238 NODE_NAME_CASE(FSL) 4239 NODE_NAME_CASE(FSR) 4240 NODE_NAME_CASE(FMV_H_X) 4241 NODE_NAME_CASE(FMV_X_ANYEXTH) 4242 NODE_NAME_CASE(FMV_W_X_RV64) 4243 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 4244 NODE_NAME_CASE(READ_CYCLE_WIDE) 4245 NODE_NAME_CASE(GREVI) 4246 NODE_NAME_CASE(GREVIW) 4247 NODE_NAME_CASE(GORCI) 4248 NODE_NAME_CASE(GORCIW) 4249 NODE_NAME_CASE(VMV_X_S) 4250 NODE_NAME_CASE(SPLAT_VECTOR_I64) 4251 NODE_NAME_CASE(READ_VLENB) 4252 NODE_NAME_CASE(TRUNCATE_VECTOR) 4253 NODE_NAME_CASE(VLEFF) 4254 NODE_NAME_CASE(VLEFF_MASK) 4255 NODE_NAME_CASE(VSLIDEUP) 4256 NODE_NAME_CASE(VSLIDEDOWN) 4257 NODE_NAME_CASE(VID) 4258 NODE_NAME_CASE(VFNCVT_ROD) 4259 NODE_NAME_CASE(VECREDUCE_ADD) 4260 NODE_NAME_CASE(VECREDUCE_UMAX) 4261 NODE_NAME_CASE(VECREDUCE_SMAX) 4262 NODE_NAME_CASE(VECREDUCE_UMIN) 4263 NODE_NAME_CASE(VECREDUCE_SMIN) 4264 NODE_NAME_CASE(VECREDUCE_AND) 4265 NODE_NAME_CASE(VECREDUCE_OR) 4266 NODE_NAME_CASE(VECREDUCE_XOR) 4267 } 4268 // clang-format on 4269 return nullptr; 4270 #undef NODE_NAME_CASE 4271 } 4272 4273 /// getConstraintType - Given a constraint letter, return the type of 4274 /// constraint it is for this target. 4275 RISCVTargetLowering::ConstraintType 4276 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 4277 if (Constraint.size() == 1) { 4278 switch (Constraint[0]) { 4279 default: 4280 break; 4281 case 'f': 4282 return C_RegisterClass; 4283 case 'I': 4284 case 'J': 4285 case 'K': 4286 return C_Immediate; 4287 case 'A': 4288 return C_Memory; 4289 } 4290 } 4291 return TargetLowering::getConstraintType(Constraint); 4292 } 4293 4294 std::pair<unsigned, const TargetRegisterClass *> 4295 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 4296 StringRef Constraint, 4297 MVT VT) const { 4298 // First, see if this is a constraint that directly corresponds to a 4299 // RISCV register class. 4300 if (Constraint.size() == 1) { 4301 switch (Constraint[0]) { 4302 case 'r': 4303 return std::make_pair(0U, &RISCV::GPRRegClass); 4304 case 'f': 4305 if (Subtarget.hasStdExtZfh() && VT == MVT::f16) 4306 return std::make_pair(0U, &RISCV::FPR16RegClass); 4307 if (Subtarget.hasStdExtF() && VT == MVT::f32) 4308 return std::make_pair(0U, &RISCV::FPR32RegClass); 4309 if (Subtarget.hasStdExtD() && VT == MVT::f64) 4310 return std::make_pair(0U, &RISCV::FPR64RegClass); 4311 break; 4312 default: 4313 break; 4314 } 4315 } 4316 4317 // Clang will correctly decode the usage of register name aliases into their 4318 // official names. However, other frontends like `rustc` do not. This allows 4319 // users of these frontends to use the ABI names for registers in LLVM-style 4320 // register constraints. 4321 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 4322 .Case("{zero}", RISCV::X0) 4323 .Case("{ra}", RISCV::X1) 4324 .Case("{sp}", RISCV::X2) 4325 .Case("{gp}", RISCV::X3) 4326 .Case("{tp}", RISCV::X4) 4327 .Case("{t0}", RISCV::X5) 4328 .Case("{t1}", RISCV::X6) 4329 .Case("{t2}", RISCV::X7) 4330 .Cases("{s0}", "{fp}", RISCV::X8) 4331 .Case("{s1}", RISCV::X9) 4332 .Case("{a0}", RISCV::X10) 4333 .Case("{a1}", RISCV::X11) 4334 .Case("{a2}", RISCV::X12) 4335 .Case("{a3}", RISCV::X13) 4336 .Case("{a4}", RISCV::X14) 4337 .Case("{a5}", RISCV::X15) 4338 .Case("{a6}", RISCV::X16) 4339 .Case("{a7}", RISCV::X17) 4340 .Case("{s2}", RISCV::X18) 4341 .Case("{s3}", RISCV::X19) 4342 .Case("{s4}", RISCV::X20) 4343 .Case("{s5}", RISCV::X21) 4344 .Case("{s6}", RISCV::X22) 4345 .Case("{s7}", RISCV::X23) 4346 .Case("{s8}", RISCV::X24) 4347 .Case("{s9}", RISCV::X25) 4348 .Case("{s10}", RISCV::X26) 4349 .Case("{s11}", RISCV::X27) 4350 .Case("{t3}", RISCV::X28) 4351 .Case("{t4}", RISCV::X29) 4352 .Case("{t5}", RISCV::X30) 4353 .Case("{t6}", RISCV::X31) 4354 .Default(RISCV::NoRegister); 4355 if (XRegFromAlias != RISCV::NoRegister) 4356 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 4357 4358 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 4359 // TableGen record rather than the AsmName to choose registers for InlineAsm 4360 // constraints, plus we want to match those names to the widest floating point 4361 // register type available, manually select floating point registers here. 4362 // 4363 // The second case is the ABI name of the register, so that frontends can also 4364 // use the ABI names in register constraint lists. 4365 if (Subtarget.hasStdExtF()) { 4366 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 4367 .Cases("{f0}", "{ft0}", RISCV::F0_F) 4368 .Cases("{f1}", "{ft1}", RISCV::F1_F) 4369 .Cases("{f2}", "{ft2}", RISCV::F2_F) 4370 .Cases("{f3}", "{ft3}", RISCV::F3_F) 4371 .Cases("{f4}", "{ft4}", RISCV::F4_F) 4372 .Cases("{f5}", "{ft5}", RISCV::F5_F) 4373 .Cases("{f6}", "{ft6}", RISCV::F6_F) 4374 .Cases("{f7}", "{ft7}", RISCV::F7_F) 4375 .Cases("{f8}", "{fs0}", RISCV::F8_F) 4376 .Cases("{f9}", "{fs1}", RISCV::F9_F) 4377 .Cases("{f10}", "{fa0}", RISCV::F10_F) 4378 .Cases("{f11}", "{fa1}", RISCV::F11_F) 4379 .Cases("{f12}", "{fa2}", RISCV::F12_F) 4380 .Cases("{f13}", "{fa3}", RISCV::F13_F) 4381 .Cases("{f14}", "{fa4}", RISCV::F14_F) 4382 .Cases("{f15}", "{fa5}", RISCV::F15_F) 4383 .Cases("{f16}", "{fa6}", RISCV::F16_F) 4384 .Cases("{f17}", "{fa7}", RISCV::F17_F) 4385 .Cases("{f18}", "{fs2}", RISCV::F18_F) 4386 .Cases("{f19}", "{fs3}", RISCV::F19_F) 4387 .Cases("{f20}", "{fs4}", RISCV::F20_F) 4388 .Cases("{f21}", "{fs5}", RISCV::F21_F) 4389 .Cases("{f22}", "{fs6}", RISCV::F22_F) 4390 .Cases("{f23}", "{fs7}", RISCV::F23_F) 4391 .Cases("{f24}", "{fs8}", RISCV::F24_F) 4392 .Cases("{f25}", "{fs9}", RISCV::F25_F) 4393 .Cases("{f26}", "{fs10}", RISCV::F26_F) 4394 .Cases("{f27}", "{fs11}", RISCV::F27_F) 4395 .Cases("{f28}", "{ft8}", RISCV::F28_F) 4396 .Cases("{f29}", "{ft9}", RISCV::F29_F) 4397 .Cases("{f30}", "{ft10}", RISCV::F30_F) 4398 .Cases("{f31}", "{ft11}", RISCV::F31_F) 4399 .Default(RISCV::NoRegister); 4400 if (FReg != RISCV::NoRegister) { 4401 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 4402 if (Subtarget.hasStdExtD()) { 4403 unsigned RegNo = FReg - RISCV::F0_F; 4404 unsigned DReg = RISCV::F0_D + RegNo; 4405 return std::make_pair(DReg, &RISCV::FPR64RegClass); 4406 } 4407 return std::make_pair(FReg, &RISCV::FPR32RegClass); 4408 } 4409 } 4410 4411 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 4412 } 4413 4414 unsigned 4415 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 4416 // Currently only support length 1 constraints. 4417 if (ConstraintCode.size() == 1) { 4418 switch (ConstraintCode[0]) { 4419 case 'A': 4420 return InlineAsm::Constraint_A; 4421 default: 4422 break; 4423 } 4424 } 4425 4426 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 4427 } 4428 4429 void RISCVTargetLowering::LowerAsmOperandForConstraint( 4430 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 4431 SelectionDAG &DAG) const { 4432 // Currently only support length 1 constraints. 4433 if (Constraint.length() == 1) { 4434 switch (Constraint[0]) { 4435 case 'I': 4436 // Validate & create a 12-bit signed immediate operand. 4437 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4438 uint64_t CVal = C->getSExtValue(); 4439 if (isInt<12>(CVal)) 4440 Ops.push_back( 4441 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 4442 } 4443 return; 4444 case 'J': 4445 // Validate & create an integer zero operand. 4446 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 4447 if (C->getZExtValue() == 0) 4448 Ops.push_back( 4449 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 4450 return; 4451 case 'K': 4452 // Validate & create a 5-bit unsigned immediate operand. 4453 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4454 uint64_t CVal = C->getZExtValue(); 4455 if (isUInt<5>(CVal)) 4456 Ops.push_back( 4457 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 4458 } 4459 return; 4460 default: 4461 break; 4462 } 4463 } 4464 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 4465 } 4466 4467 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 4468 Instruction *Inst, 4469 AtomicOrdering Ord) const { 4470 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 4471 return Builder.CreateFence(Ord); 4472 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 4473 return Builder.CreateFence(AtomicOrdering::Release); 4474 return nullptr; 4475 } 4476 4477 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 4478 Instruction *Inst, 4479 AtomicOrdering Ord) const { 4480 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 4481 return Builder.CreateFence(AtomicOrdering::Acquire); 4482 return nullptr; 4483 } 4484 4485 TargetLowering::AtomicExpansionKind 4486 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 4487 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 4488 // point operations can't be used in an lr/sc sequence without breaking the 4489 // forward-progress guarantee. 4490 if (AI->isFloatingPointOperation()) 4491 return AtomicExpansionKind::CmpXChg; 4492 4493 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 4494 if (Size == 8 || Size == 16) 4495 return AtomicExpansionKind::MaskedIntrinsic; 4496 return AtomicExpansionKind::None; 4497 } 4498 4499 static Intrinsic::ID 4500 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 4501 if (XLen == 32) { 4502 switch (BinOp) { 4503 default: 4504 llvm_unreachable("Unexpected AtomicRMW BinOp"); 4505 case AtomicRMWInst::Xchg: 4506 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 4507 case AtomicRMWInst::Add: 4508 return Intrinsic::riscv_masked_atomicrmw_add_i32; 4509 case AtomicRMWInst::Sub: 4510 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 4511 case AtomicRMWInst::Nand: 4512 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 4513 case AtomicRMWInst::Max: 4514 return Intrinsic::riscv_masked_atomicrmw_max_i32; 4515 case AtomicRMWInst::Min: 4516 return Intrinsic::riscv_masked_atomicrmw_min_i32; 4517 case AtomicRMWInst::UMax: 4518 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 4519 case AtomicRMWInst::UMin: 4520 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 4521 } 4522 } 4523 4524 if (XLen == 64) { 4525 switch (BinOp) { 4526 default: 4527 llvm_unreachable("Unexpected AtomicRMW BinOp"); 4528 case AtomicRMWInst::Xchg: 4529 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 4530 case AtomicRMWInst::Add: 4531 return Intrinsic::riscv_masked_atomicrmw_add_i64; 4532 case AtomicRMWInst::Sub: 4533 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 4534 case AtomicRMWInst::Nand: 4535 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 4536 case AtomicRMWInst::Max: 4537 return Intrinsic::riscv_masked_atomicrmw_max_i64; 4538 case AtomicRMWInst::Min: 4539 return Intrinsic::riscv_masked_atomicrmw_min_i64; 4540 case AtomicRMWInst::UMax: 4541 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 4542 case AtomicRMWInst::UMin: 4543 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 4544 } 4545 } 4546 4547 llvm_unreachable("Unexpected XLen\n"); 4548 } 4549 4550 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 4551 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 4552 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 4553 unsigned XLen = Subtarget.getXLen(); 4554 Value *Ordering = 4555 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 4556 Type *Tys[] = {AlignedAddr->getType()}; 4557 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 4558 AI->getModule(), 4559 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 4560 4561 if (XLen == 64) { 4562 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 4563 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 4564 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 4565 } 4566 4567 Value *Result; 4568 4569 // Must pass the shift amount needed to sign extend the loaded value prior 4570 // to performing a signed comparison for min/max. ShiftAmt is the number of 4571 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 4572 // is the number of bits to left+right shift the value in order to 4573 // sign-extend. 4574 if (AI->getOperation() == AtomicRMWInst::Min || 4575 AI->getOperation() == AtomicRMWInst::Max) { 4576 const DataLayout &DL = AI->getModule()->getDataLayout(); 4577 unsigned ValWidth = 4578 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 4579 Value *SextShamt = 4580 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 4581 Result = Builder.CreateCall(LrwOpScwLoop, 4582 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 4583 } else { 4584 Result = 4585 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 4586 } 4587 4588 if (XLen == 64) 4589 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 4590 return Result; 4591 } 4592 4593 TargetLowering::AtomicExpansionKind 4594 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 4595 AtomicCmpXchgInst *CI) const { 4596 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 4597 if (Size == 8 || Size == 16) 4598 return AtomicExpansionKind::MaskedIntrinsic; 4599 return AtomicExpansionKind::None; 4600 } 4601 4602 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 4603 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 4604 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 4605 unsigned XLen = Subtarget.getXLen(); 4606 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 4607 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 4608 if (XLen == 64) { 4609 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 4610 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 4611 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 4612 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 4613 } 4614 Type *Tys[] = {AlignedAddr->getType()}; 4615 Function *MaskedCmpXchg = 4616 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 4617 Value *Result = Builder.CreateCall( 4618 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 4619 if (XLen == 64) 4620 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 4621 return Result; 4622 } 4623 4624 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 4625 EVT VT) const { 4626 VT = VT.getScalarType(); 4627 4628 if (!VT.isSimple()) 4629 return false; 4630 4631 switch (VT.getSimpleVT().SimpleTy) { 4632 case MVT::f16: 4633 return Subtarget.hasStdExtZfh(); 4634 case MVT::f32: 4635 return Subtarget.hasStdExtF(); 4636 case MVT::f64: 4637 return Subtarget.hasStdExtD(); 4638 default: 4639 break; 4640 } 4641 4642 return false; 4643 } 4644 4645 Register RISCVTargetLowering::getExceptionPointerRegister( 4646 const Constant *PersonalityFn) const { 4647 return RISCV::X10; 4648 } 4649 4650 Register RISCVTargetLowering::getExceptionSelectorRegister( 4651 const Constant *PersonalityFn) const { 4652 return RISCV::X11; 4653 } 4654 4655 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 4656 // Return false to suppress the unnecessary extensions if the LibCall 4657 // arguments or return value is f32 type for LP64 ABI. 4658 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 4659 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 4660 return false; 4661 4662 return true; 4663 } 4664 4665 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 4666 if (Subtarget.is64Bit() && Type == MVT::i32) 4667 return true; 4668 4669 return IsSigned; 4670 } 4671 4672 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 4673 SDValue C) const { 4674 // Check integral scalar types. 4675 if (VT.isScalarInteger()) { 4676 // Omit the optimization if the sub target has the M extension and the data 4677 // size exceeds XLen. 4678 if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) 4679 return false; 4680 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 4681 // Break the MUL to a SLLI and an ADD/SUB. 4682 const APInt &Imm = ConstNode->getAPIntValue(); 4683 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 4684 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 4685 return true; 4686 // Omit the following optimization if the sub target has the M extension 4687 // and the data size >= XLen. 4688 if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) 4689 return false; 4690 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 4691 // a pair of LUI/ADDI. 4692 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { 4693 APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); 4694 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 4695 (1 - ImmS).isPowerOf2()) 4696 return true; 4697 } 4698 } 4699 } 4700 4701 return false; 4702 } 4703 4704 #define GET_REGISTER_MATCHER 4705 #include "RISCVGenAsmMatcher.inc" 4706 4707 Register 4708 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 4709 const MachineFunction &MF) const { 4710 Register Reg = MatchRegisterAltName(RegName); 4711 if (Reg == RISCV::NoRegister) 4712 Reg = MatchRegisterName(RegName); 4713 if (Reg == RISCV::NoRegister) 4714 report_fatal_error( 4715 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 4716 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 4717 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 4718 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 4719 StringRef(RegName) + "\".")); 4720 return Reg; 4721 } 4722 4723 namespace llvm { 4724 namespace RISCVVIntrinsicsTable { 4725 4726 #define GET_RISCVVIntrinsicsTable_IMPL 4727 #include "RISCVGenSearchableTables.inc" 4728 4729 } // namespace RISCVVIntrinsicsTable 4730 4731 namespace RISCVZvlssegTable { 4732 4733 #define GET_RISCVZvlssegTable_IMPL 4734 #include "RISCVGenSearchableTables.inc" 4735 4736 } // namespace RISCVZvlssegTable 4737 } // namespace llvm 4738