1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/IntrinsicsRISCV.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/KnownBits.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "riscv-lower" 42 43 STATISTIC(NumTailCalls, "Number of tail calls"); 44 45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 46 const RISCVSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 if (Subtarget.isRV32E()) 50 report_fatal_error("Codegen not yet implemented for RV32E"); 51 52 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 53 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 54 55 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 56 !Subtarget.hasStdExtF()) { 57 errs() << "Hard-float 'f' ABI can't be used for a target that " 58 "doesn't support the F instruction set extension (ignoring " 59 "target-abi)\n"; 60 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 61 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 62 !Subtarget.hasStdExtD()) { 63 errs() << "Hard-float 'd' ABI can't be used for a target that " 64 "doesn't support the D instruction set extension (ignoring " 65 "target-abi)\n"; 66 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 67 } 68 69 switch (ABI) { 70 default: 71 report_fatal_error("Don't know how to lower this ABI"); 72 case RISCVABI::ABI_ILP32: 73 case RISCVABI::ABI_ILP32F: 74 case RISCVABI::ABI_ILP32D: 75 case RISCVABI::ABI_LP64: 76 case RISCVABI::ABI_LP64F: 77 case RISCVABI::ABI_LP64D: 78 break; 79 } 80 81 MVT XLenVT = Subtarget.getXLenVT(); 82 83 // Set up the register classes. 84 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 85 86 if (Subtarget.hasStdExtZfh()) 87 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 88 if (Subtarget.hasStdExtF()) 89 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 90 if (Subtarget.hasStdExtD()) 91 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 92 93 static const MVT::SimpleValueType BoolVecVTs[] = { 94 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 95 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 96 static const MVT::SimpleValueType IntVecVTs[] = { 97 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 98 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 99 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 100 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 101 MVT::nxv4i64, MVT::nxv8i64}; 102 static const MVT::SimpleValueType F16VecVTs[] = { 103 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 104 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 105 static const MVT::SimpleValueType F32VecVTs[] = { 106 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 107 static const MVT::SimpleValueType F64VecVTs[] = { 108 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 109 110 if (Subtarget.hasStdExtV()) { 111 auto addRegClassForRVV = [this](MVT VT) { 112 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 113 assert(Size <= 512 && isPowerOf2_32(Size)); 114 const TargetRegisterClass *RC; 115 if (Size <= 64) 116 RC = &RISCV::VRRegClass; 117 else if (Size == 128) 118 RC = &RISCV::VRM2RegClass; 119 else if (Size == 256) 120 RC = &RISCV::VRM4RegClass; 121 else 122 RC = &RISCV::VRM8RegClass; 123 124 addRegisterClass(VT, RC); 125 }; 126 127 for (MVT VT : BoolVecVTs) 128 addRegClassForRVV(VT); 129 for (MVT VT : IntVecVTs) 130 addRegClassForRVV(VT); 131 132 if (Subtarget.hasStdExtZfh()) 133 for (MVT VT : F16VecVTs) 134 addRegClassForRVV(VT); 135 136 if (Subtarget.hasStdExtF()) 137 for (MVT VT : F32VecVTs) 138 addRegClassForRVV(VT); 139 140 if (Subtarget.hasStdExtD()) 141 for (MVT VT : F64VecVTs) 142 addRegClassForRVV(VT); 143 144 if (Subtarget.useRVVForFixedLengthVectors()) { 145 auto addRegClassForFixedVectors = [this](MVT VT) { 146 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 147 const TargetRegisterClass *RC; 148 if (LMul == 1) 149 RC = &RISCV::VRRegClass; 150 else if (LMul == 2) 151 RC = &RISCV::VRM2RegClass; 152 else if (LMul == 4) 153 RC = &RISCV::VRM4RegClass; 154 else if (LMul == 8) 155 RC = &RISCV::VRM8RegClass; 156 else 157 llvm_unreachable("Unexpected LMul!"); 158 159 addRegisterClass(VT, RC); 160 }; 161 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 162 if (useRVVForFixedLengthVectorVT(VT)) 163 addRegClassForFixedVectors(VT); 164 165 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) 166 if (useRVVForFixedLengthVectorVT(VT)) 167 addRegClassForFixedVectors(VT); 168 } 169 } 170 171 // Compute derived properties from the register classes. 172 computeRegisterProperties(STI.getRegisterInfo()); 173 174 setStackPointerRegisterToSaveRestore(RISCV::X2); 175 176 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 177 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 178 179 // TODO: add all necessary setOperationAction calls. 180 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 181 182 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 183 setOperationAction(ISD::BR_CC, XLenVT, Expand); 184 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 185 186 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 187 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 188 189 setOperationAction(ISD::VASTART, MVT::Other, Custom); 190 setOperationAction(ISD::VAARG, MVT::Other, Expand); 191 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 192 setOperationAction(ISD::VAEND, MVT::Other, Expand); 193 194 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 195 if (!Subtarget.hasStdExtZbb()) { 196 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 197 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 198 } 199 200 if (Subtarget.is64Bit()) { 201 setOperationAction(ISD::ADD, MVT::i32, Custom); 202 setOperationAction(ISD::SUB, MVT::i32, Custom); 203 setOperationAction(ISD::SHL, MVT::i32, Custom); 204 setOperationAction(ISD::SRA, MVT::i32, Custom); 205 setOperationAction(ISD::SRL, MVT::i32, Custom); 206 } 207 208 if (!Subtarget.hasStdExtM()) { 209 setOperationAction(ISD::MUL, XLenVT, Expand); 210 setOperationAction(ISD::MULHS, XLenVT, Expand); 211 setOperationAction(ISD::MULHU, XLenVT, Expand); 212 setOperationAction(ISD::SDIV, XLenVT, Expand); 213 setOperationAction(ISD::UDIV, XLenVT, Expand); 214 setOperationAction(ISD::SREM, XLenVT, Expand); 215 setOperationAction(ISD::UREM, XLenVT, Expand); 216 } 217 218 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 219 setOperationAction(ISD::MUL, MVT::i32, Custom); 220 221 setOperationAction(ISD::SDIV, MVT::i8, Custom); 222 setOperationAction(ISD::UDIV, MVT::i8, Custom); 223 setOperationAction(ISD::UREM, MVT::i8, Custom); 224 setOperationAction(ISD::SDIV, MVT::i16, Custom); 225 setOperationAction(ISD::UDIV, MVT::i16, Custom); 226 setOperationAction(ISD::UREM, MVT::i16, Custom); 227 setOperationAction(ISD::SDIV, MVT::i32, Custom); 228 setOperationAction(ISD::UDIV, MVT::i32, Custom); 229 setOperationAction(ISD::UREM, MVT::i32, Custom); 230 } 231 232 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 233 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 234 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 235 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 236 237 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 238 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 239 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 240 241 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { 242 if (Subtarget.is64Bit()) { 243 setOperationAction(ISD::ROTL, MVT::i32, Custom); 244 setOperationAction(ISD::ROTR, MVT::i32, Custom); 245 } 246 } else { 247 setOperationAction(ISD::ROTL, XLenVT, Expand); 248 setOperationAction(ISD::ROTR, XLenVT, Expand); 249 } 250 251 if (Subtarget.hasStdExtZbp()) { 252 // Custom lower bswap/bitreverse so we can convert them to GREVI to enable 253 // more combining. 254 setOperationAction(ISD::BITREVERSE, XLenVT, Custom); 255 setOperationAction(ISD::BSWAP, XLenVT, Custom); 256 257 if (Subtarget.is64Bit()) { 258 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 259 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 260 } 261 } else { 262 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 263 // pattern match it directly in isel. 264 setOperationAction(ISD::BSWAP, XLenVT, 265 Subtarget.hasStdExtZbb() ? Legal : Expand); 266 } 267 268 if (Subtarget.hasStdExtZbb()) { 269 setOperationAction(ISD::SMIN, XLenVT, Legal); 270 setOperationAction(ISD::SMAX, XLenVT, Legal); 271 setOperationAction(ISD::UMIN, XLenVT, Legal); 272 setOperationAction(ISD::UMAX, XLenVT, Legal); 273 } else { 274 setOperationAction(ISD::CTTZ, XLenVT, Expand); 275 setOperationAction(ISD::CTLZ, XLenVT, Expand); 276 setOperationAction(ISD::CTPOP, XLenVT, Expand); 277 } 278 279 if (Subtarget.hasStdExtZbt()) { 280 setOperationAction(ISD::FSHL, XLenVT, Custom); 281 setOperationAction(ISD::FSHR, XLenVT, Custom); 282 setOperationAction(ISD::SELECT, XLenVT, Legal); 283 284 if (Subtarget.is64Bit()) { 285 setOperationAction(ISD::FSHL, MVT::i32, Custom); 286 setOperationAction(ISD::FSHR, MVT::i32, Custom); 287 } 288 } else { 289 setOperationAction(ISD::SELECT, XLenVT, Custom); 290 } 291 292 ISD::CondCode FPCCToExpand[] = { 293 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 294 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 295 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 296 297 ISD::NodeType FPOpToExpand[] = { 298 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, 299 ISD::FP_TO_FP16}; 300 301 if (Subtarget.hasStdExtZfh()) 302 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 303 304 if (Subtarget.hasStdExtZfh()) { 305 setOperationAction(ISD::FMINNUM, MVT::f16, Legal); 306 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); 307 for (auto CC : FPCCToExpand) 308 setCondCodeAction(CC, MVT::f16, Expand); 309 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 310 setOperationAction(ISD::SELECT, MVT::f16, Custom); 311 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 312 for (auto Op : FPOpToExpand) 313 setOperationAction(Op, MVT::f16, Expand); 314 } 315 316 if (Subtarget.hasStdExtF()) { 317 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 318 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 319 for (auto CC : FPCCToExpand) 320 setCondCodeAction(CC, MVT::f32, Expand); 321 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 322 setOperationAction(ISD::SELECT, MVT::f32, Custom); 323 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 324 for (auto Op : FPOpToExpand) 325 setOperationAction(Op, MVT::f32, Expand); 326 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 327 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 328 } 329 330 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 331 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 332 333 if (Subtarget.hasStdExtD()) { 334 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 335 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 336 for (auto CC : FPCCToExpand) 337 setCondCodeAction(CC, MVT::f64, Expand); 338 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 339 setOperationAction(ISD::SELECT, MVT::f64, Custom); 340 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 341 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 342 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 343 for (auto Op : FPOpToExpand) 344 setOperationAction(Op, MVT::f64, Expand); 345 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 346 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 347 } 348 349 if (Subtarget.is64Bit()) { 350 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 351 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 352 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 353 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 354 } 355 356 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 357 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 358 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 359 setOperationAction(ISD::JumpTable, XLenVT, Custom); 360 361 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 362 363 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 364 // Unfortunately this can't be determined just from the ISA naming string. 365 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 366 Subtarget.is64Bit() ? Legal : Custom); 367 368 setOperationAction(ISD::TRAP, MVT::Other, Legal); 369 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 370 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 371 372 if (Subtarget.hasStdExtA()) { 373 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 374 setMinCmpXchgSizeInBits(32); 375 } else { 376 setMaxAtomicSizeInBitsSupported(0); 377 } 378 379 setBooleanContents(ZeroOrOneBooleanContent); 380 381 if (Subtarget.hasStdExtV()) { 382 setBooleanVectorContents(ZeroOrOneBooleanContent); 383 384 setOperationAction(ISD::VSCALE, XLenVT, Custom); 385 386 // RVV intrinsics may have illegal operands. 387 // We also need to custom legalize vmv.x.s. 388 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); 389 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); 390 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 391 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); 392 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 393 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 394 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 395 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 396 397 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 398 399 if (!Subtarget.is64Bit()) { 400 // We must custom-lower certain vXi64 operations on RV32 due to the vector 401 // element type being illegal. 402 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); 403 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); 404 405 setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom); 406 setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom); 407 setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom); 408 setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom); 409 setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom); 410 setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom); 411 setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom); 412 setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom); 413 } 414 415 for (MVT VT : BoolVecVTs) { 416 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 417 418 // Mask VTs are custom-expanded into a series of standard nodes 419 setOperationAction(ISD::TRUNCATE, VT, Custom); 420 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 421 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 422 } 423 424 for (MVT VT : IntVecVTs) { 425 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 426 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); 427 428 setOperationAction(ISD::SMIN, VT, Legal); 429 setOperationAction(ISD::SMAX, VT, Legal); 430 setOperationAction(ISD::UMIN, VT, Legal); 431 setOperationAction(ISD::UMAX, VT, Legal); 432 433 setOperationAction(ISD::ROTL, VT, Expand); 434 setOperationAction(ISD::ROTR, VT, Expand); 435 436 // Custom-lower extensions and truncations from/to mask types. 437 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 438 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 439 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 440 441 // RVV has native int->float & float->int conversions where the 442 // element type sizes are within one power-of-two of each other. Any 443 // wider distances between type sizes have to be lowered as sequences 444 // which progressively narrow the gap in stages. 445 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 446 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 447 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 448 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 449 450 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" 451 // nodes which truncate by one power of two at a time. 452 setOperationAction(ISD::TRUNCATE, VT, Custom); 453 454 // Custom-lower insert/extract operations to simplify patterns. 455 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 456 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 457 458 // Custom-lower reduction operations to set up the corresponding custom 459 // nodes' operands. 460 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 461 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 462 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 463 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 464 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 465 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 466 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 467 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 468 469 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 470 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 471 472 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 473 } 474 475 // Expand various CCs to best match the RVV ISA, which natively supports UNE 476 // but no other unordered comparisons, and supports all ordered comparisons 477 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 478 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 479 // and we pattern-match those back to the "original", swapping operands once 480 // more. This way we catch both operations and both "vf" and "fv" forms with 481 // fewer patterns. 482 ISD::CondCode VFPCCToExpand[] = { 483 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 484 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 485 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 486 }; 487 488 // Sets common operation actions on RVV floating-point vector types. 489 const auto SetCommonVFPActions = [&](MVT VT) { 490 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 491 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 492 // sizes are within one power-of-two of each other. Therefore conversions 493 // between vXf16 and vXf64 must be lowered as sequences which convert via 494 // vXf32. 495 setOperationAction(ISD::FP_ROUND, VT, Custom); 496 setOperationAction(ISD::FP_EXTEND, VT, Custom); 497 // Custom-lower insert/extract operations to simplify patterns. 498 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 499 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 500 // Expand various condition codes (explained above). 501 for (auto CC : VFPCCToExpand) 502 setCondCodeAction(CC, VT, Expand); 503 504 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 505 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 506 setOperationAction(ISD::FCOPYSIGN, VT, Legal); 507 508 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 509 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 510 511 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 512 }; 513 514 if (Subtarget.hasStdExtZfh()) 515 for (MVT VT : F16VecVTs) 516 SetCommonVFPActions(VT); 517 518 if (Subtarget.hasStdExtF()) 519 for (MVT VT : F32VecVTs) 520 SetCommonVFPActions(VT); 521 522 if (Subtarget.hasStdExtD()) 523 for (MVT VT : F64VecVTs) 524 SetCommonVFPActions(VT); 525 526 if (Subtarget.useRVVForFixedLengthVectors()) { 527 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 528 if (!useRVVForFixedLengthVectorVT(VT)) 529 continue; 530 531 // By default everything must be expanded. 532 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 533 setOperationAction(Op, VT, Expand); 534 for (MVT OtherVT : MVT::fixedlen_vector_valuetypes()) 535 setTruncStoreAction(VT, OtherVT, Expand); 536 537 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 538 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 539 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 540 541 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 542 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 543 544 setOperationAction(ISD::LOAD, VT, Custom); 545 setOperationAction(ISD::STORE, VT, Custom); 546 547 setOperationAction(ISD::SETCC, VT, Custom); 548 549 setOperationAction(ISD::TRUNCATE, VT, Custom); 550 551 // Operations below are different for between masks and other vectors. 552 if (VT.getVectorElementType() == MVT::i1) { 553 setOperationAction(ISD::AND, VT, Custom); 554 setOperationAction(ISD::OR, VT, Custom); 555 setOperationAction(ISD::XOR, VT, Custom); 556 continue; 557 } 558 559 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 560 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 561 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 562 563 setOperationAction(ISD::ADD, VT, Custom); 564 setOperationAction(ISD::MUL, VT, Custom); 565 setOperationAction(ISD::SUB, VT, Custom); 566 setOperationAction(ISD::AND, VT, Custom); 567 setOperationAction(ISD::OR, VT, Custom); 568 setOperationAction(ISD::XOR, VT, Custom); 569 setOperationAction(ISD::SDIV, VT, Custom); 570 setOperationAction(ISD::SREM, VT, Custom); 571 setOperationAction(ISD::UDIV, VT, Custom); 572 setOperationAction(ISD::UREM, VT, Custom); 573 setOperationAction(ISD::SHL, VT, Custom); 574 setOperationAction(ISD::SRA, VT, Custom); 575 setOperationAction(ISD::SRL, VT, Custom); 576 577 setOperationAction(ISD::SMIN, VT, Custom); 578 setOperationAction(ISD::SMAX, VT, Custom); 579 setOperationAction(ISD::UMIN, VT, Custom); 580 setOperationAction(ISD::UMAX, VT, Custom); 581 setOperationAction(ISD::ABS, VT, Custom); 582 583 setOperationAction(ISD::MULHS, VT, Custom); 584 setOperationAction(ISD::MULHU, VT, Custom); 585 586 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 587 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 588 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 589 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 590 591 setOperationAction(ISD::VSELECT, VT, Custom); 592 593 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 594 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 595 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 596 597 setOperationAction(ISD::BITCAST, VT, Custom); 598 599 // Custom-lower reduction operations to set up the corresponding custom 600 // nodes' operands. 601 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 602 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 603 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 604 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 605 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 606 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 607 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 608 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 609 } 610 611 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { 612 if (!useRVVForFixedLengthVectorVT(VT)) 613 continue; 614 615 // By default everything must be expanded. 616 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 617 setOperationAction(Op, VT, Expand); 618 for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) { 619 setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); 620 setTruncStoreAction(VT, OtherVT, Expand); 621 } 622 623 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 624 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 625 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 626 627 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 628 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 629 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 630 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 631 632 setOperationAction(ISD::LOAD, VT, Custom); 633 setOperationAction(ISD::STORE, VT, Custom); 634 setOperationAction(ISD::FADD, VT, Custom); 635 setOperationAction(ISD::FSUB, VT, Custom); 636 setOperationAction(ISD::FMUL, VT, Custom); 637 setOperationAction(ISD::FDIV, VT, Custom); 638 setOperationAction(ISD::FNEG, VT, Custom); 639 setOperationAction(ISD::FABS, VT, Custom); 640 setOperationAction(ISD::FSQRT, VT, Custom); 641 setOperationAction(ISD::FMA, VT, Custom); 642 643 setOperationAction(ISD::FP_ROUND, VT, Custom); 644 setOperationAction(ISD::FP_EXTEND, VT, Custom); 645 646 for (auto CC : VFPCCToExpand) 647 setCondCodeAction(CC, VT, Expand); 648 649 setOperationAction(ISD::VSELECT, VT, Custom); 650 651 setOperationAction(ISD::BITCAST, VT, Custom); 652 653 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 654 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 655 } 656 } 657 } 658 659 // Function alignments. 660 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 661 setMinFunctionAlignment(FunctionAlignment); 662 setPrefFunctionAlignment(FunctionAlignment); 663 664 setMinimumJumpTableEntries(5); 665 666 // Jumps are expensive, compared to logic 667 setJumpIsExpensive(); 668 669 // We can use any register for comparisons 670 setHasMultipleConditionRegisters(); 671 672 setTargetDAGCombine(ISD::SETCC); 673 if (Subtarget.hasStdExtZbp()) { 674 setTargetDAGCombine(ISD::OR); 675 } 676 if (Subtarget.hasStdExtV()) 677 setTargetDAGCombine(ISD::FCOPYSIGN); 678 } 679 680 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 681 LLVMContext &Context, 682 EVT VT) const { 683 if (!VT.isVector()) 684 return getPointerTy(DL); 685 if (Subtarget.hasStdExtV() && 686 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) 687 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 688 return VT.changeVectorElementTypeToInteger(); 689 } 690 691 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 692 const CallInst &I, 693 MachineFunction &MF, 694 unsigned Intrinsic) const { 695 switch (Intrinsic) { 696 default: 697 return false; 698 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 699 case Intrinsic::riscv_masked_atomicrmw_add_i32: 700 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 701 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 702 case Intrinsic::riscv_masked_atomicrmw_max_i32: 703 case Intrinsic::riscv_masked_atomicrmw_min_i32: 704 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 705 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 706 case Intrinsic::riscv_masked_cmpxchg_i32: 707 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 708 Info.opc = ISD::INTRINSIC_W_CHAIN; 709 Info.memVT = MVT::getVT(PtrTy->getElementType()); 710 Info.ptrVal = I.getArgOperand(0); 711 Info.offset = 0; 712 Info.align = Align(4); 713 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 714 MachineMemOperand::MOVolatile; 715 return true; 716 } 717 } 718 719 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 720 const AddrMode &AM, Type *Ty, 721 unsigned AS, 722 Instruction *I) const { 723 // No global is ever allowed as a base. 724 if (AM.BaseGV) 725 return false; 726 727 // Require a 12-bit signed offset. 728 if (!isInt<12>(AM.BaseOffs)) 729 return false; 730 731 switch (AM.Scale) { 732 case 0: // "r+i" or just "i", depending on HasBaseReg. 733 break; 734 case 1: 735 if (!AM.HasBaseReg) // allow "r+i". 736 break; 737 return false; // disallow "r+r" or "r+r+i". 738 default: 739 return false; 740 } 741 742 return true; 743 } 744 745 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 746 return isInt<12>(Imm); 747 } 748 749 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 750 return isInt<12>(Imm); 751 } 752 753 // On RV32, 64-bit integers are split into their high and low parts and held 754 // in two different registers, so the trunc is free since the low register can 755 // just be used. 756 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 757 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 758 return false; 759 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 760 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 761 return (SrcBits == 64 && DestBits == 32); 762 } 763 764 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 765 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 766 !SrcVT.isInteger() || !DstVT.isInteger()) 767 return false; 768 unsigned SrcBits = SrcVT.getSizeInBits(); 769 unsigned DestBits = DstVT.getSizeInBits(); 770 return (SrcBits == 64 && DestBits == 32); 771 } 772 773 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 774 // Zexts are free if they can be combined with a load. 775 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 776 EVT MemVT = LD->getMemoryVT(); 777 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 778 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 779 (LD->getExtensionType() == ISD::NON_EXTLOAD || 780 LD->getExtensionType() == ISD::ZEXTLOAD)) 781 return true; 782 } 783 784 return TargetLowering::isZExtFree(Val, VT2); 785 } 786 787 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 788 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 789 } 790 791 bool RISCVTargetLowering::isCheapToSpeculateCttz() const { 792 return Subtarget.hasStdExtZbb(); 793 } 794 795 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { 796 return Subtarget.hasStdExtZbb(); 797 } 798 799 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 800 bool ForCodeSize) const { 801 if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) 802 return false; 803 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 804 return false; 805 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 806 return false; 807 if (Imm.isNegZero()) 808 return false; 809 return Imm.isZero(); 810 } 811 812 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 813 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || 814 (VT == MVT::f32 && Subtarget.hasStdExtF()) || 815 (VT == MVT::f64 && Subtarget.hasStdExtD()); 816 } 817 818 // Changes the condition code and swaps operands if necessary, so the SetCC 819 // operation matches one of the comparisons supported directly in the RISC-V 820 // ISA. 821 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 822 switch (CC) { 823 default: 824 break; 825 case ISD::SETGT: 826 case ISD::SETLE: 827 case ISD::SETUGT: 828 case ISD::SETULE: 829 CC = ISD::getSetCCSwappedOperands(CC); 830 std::swap(LHS, RHS); 831 break; 832 } 833 } 834 835 // Return the RISC-V branch opcode that matches the given DAG integer 836 // condition code. The CondCode must be one of those supported by the RISC-V 837 // ISA (see normaliseSetCC). 838 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 839 switch (CC) { 840 default: 841 llvm_unreachable("Unsupported CondCode"); 842 case ISD::SETEQ: 843 return RISCV::BEQ; 844 case ISD::SETNE: 845 return RISCV::BNE; 846 case ISD::SETLT: 847 return RISCV::BLT; 848 case ISD::SETGE: 849 return RISCV::BGE; 850 case ISD::SETULT: 851 return RISCV::BLTU; 852 case ISD::SETUGE: 853 return RISCV::BGEU; 854 } 855 } 856 857 RISCVVLMUL RISCVTargetLowering::getLMUL(MVT VT) { 858 assert(VT.isScalableVector() && "Expecting a scalable vector type"); 859 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); 860 if (VT.getVectorElementType() == MVT::i1) 861 KnownSize *= 8; 862 863 switch (KnownSize) { 864 default: 865 llvm_unreachable("Invalid LMUL."); 866 case 8: 867 return RISCVVLMUL::LMUL_F8; 868 case 16: 869 return RISCVVLMUL::LMUL_F4; 870 case 32: 871 return RISCVVLMUL::LMUL_F2; 872 case 64: 873 return RISCVVLMUL::LMUL_1; 874 case 128: 875 return RISCVVLMUL::LMUL_2; 876 case 256: 877 return RISCVVLMUL::LMUL_4; 878 case 512: 879 return RISCVVLMUL::LMUL_8; 880 } 881 } 882 883 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVVLMUL LMul) { 884 switch (LMul) { 885 default: 886 llvm_unreachable("Invalid LMUL."); 887 case RISCVVLMUL::LMUL_F8: 888 case RISCVVLMUL::LMUL_F4: 889 case RISCVVLMUL::LMUL_F2: 890 case RISCVVLMUL::LMUL_1: 891 return RISCV::VRRegClassID; 892 case RISCVVLMUL::LMUL_2: 893 return RISCV::VRM2RegClassID; 894 case RISCVVLMUL::LMUL_4: 895 return RISCV::VRM4RegClassID; 896 case RISCVVLMUL::LMUL_8: 897 return RISCV::VRM8RegClassID; 898 } 899 } 900 901 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { 902 RISCVVLMUL LMUL = getLMUL(VT); 903 if (LMUL == RISCVVLMUL::LMUL_F8 || LMUL == RISCVVLMUL::LMUL_F4 || 904 LMUL == RISCVVLMUL::LMUL_F2 || LMUL == RISCVVLMUL::LMUL_1) { 905 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 906 "Unexpected subreg numbering"); 907 return RISCV::sub_vrm1_0 + Index; 908 } 909 if (LMUL == RISCVVLMUL::LMUL_2) { 910 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 911 "Unexpected subreg numbering"); 912 return RISCV::sub_vrm2_0 + Index; 913 } 914 if (LMUL == RISCVVLMUL::LMUL_4) { 915 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 916 "Unexpected subreg numbering"); 917 return RISCV::sub_vrm4_0 + Index; 918 } 919 llvm_unreachable("Invalid vector type."); 920 } 921 922 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { 923 if (VT.getVectorElementType() == MVT::i1) 924 return RISCV::VRRegClassID; 925 return getRegClassIDForLMUL(getLMUL(VT)); 926 } 927 928 // Attempt to decompose a subvector insert/extract between VecVT and 929 // SubVecVT via subregister indices. Returns the subregister index that 930 // can perform the subvector insert/extract with the given element index, as 931 // well as the index corresponding to any leftover subvectors that must be 932 // further inserted/extracted within the register class for SubVecVT. 933 std::pair<unsigned, unsigned> 934 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 935 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, 936 const RISCVRegisterInfo *TRI) { 937 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && 938 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && 939 RISCV::VRM2RegClassID > RISCV::VRRegClassID), 940 "Register classes not ordered"); 941 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT); 942 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT); 943 // Try to compose a subregister index that takes us from the incoming 944 // LMUL>1 register class down to the outgoing one. At each step we half 945 // the LMUL: 946 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 947 // Note that this is not guaranteed to find a subregister index, such as 948 // when we are extracting from one VR type to another. 949 unsigned SubRegIdx = RISCV::NoSubRegister; 950 for (const unsigned RCID : 951 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) 952 if (VecRegClassID > RCID && SubRegClassID <= RCID) { 953 VecVT = VecVT.getHalfNumVectorElementsVT(); 954 bool IsHi = 955 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); 956 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, 957 getSubregIndexByMVT(VecVT, IsHi)); 958 if (IsHi) 959 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); 960 } 961 return {SubRegIdx, InsertExtractIdx}; 962 } 963 964 // Return the largest legal scalable vector type that matches VT's element type. 965 MVT RISCVTargetLowering::getContainerForFixedLengthVector( 966 const TargetLowering &TLI, MVT VT, const RISCVSubtarget &Subtarget) { 967 assert(VT.isFixedLengthVector() && TLI.isTypeLegal(VT) && 968 "Expected legal fixed length vector!"); 969 970 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 971 assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!"); 972 973 MVT EltVT = VT.getVectorElementType(); 974 switch (EltVT.SimpleTy) { 975 default: 976 llvm_unreachable("unexpected element type for RVV container"); 977 case MVT::i1: { 978 // Masks are calculated assuming 8-bit elements since that's when we need 979 // the most elements. 980 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8; 981 return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock); 982 } 983 case MVT::i8: 984 case MVT::i16: 985 case MVT::i32: 986 case MVT::i64: 987 case MVT::f16: 988 case MVT::f32: 989 case MVT::f64: { 990 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits(); 991 return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock); 992 } 993 } 994 } 995 996 MVT RISCVTargetLowering::getContainerForFixedLengthVector( 997 SelectionDAG &DAG, MVT VT, const RISCVSubtarget &Subtarget) { 998 return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT, 999 Subtarget); 1000 } 1001 1002 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const { 1003 return getContainerForFixedLengthVector(*this, VT, getSubtarget()); 1004 } 1005 1006 // Grow V to consume an entire RVV register. 1007 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 1008 const RISCVSubtarget &Subtarget) { 1009 assert(VT.isScalableVector() && 1010 "Expected to convert into a scalable vector!"); 1011 assert(V.getValueType().isFixedLengthVector() && 1012 "Expected a fixed length vector operand!"); 1013 SDLoc DL(V); 1014 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1015 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); 1016 } 1017 1018 // Shrink V so it's just big enough to maintain a VT's worth of data. 1019 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 1020 const RISCVSubtarget &Subtarget) { 1021 assert(VT.isFixedLengthVector() && 1022 "Expected to convert into a fixed length vector!"); 1023 assert(V.getValueType().isScalableVector() && 1024 "Expected a scalable vector operand!"); 1025 SDLoc DL(V); 1026 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1027 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); 1028 } 1029 1030 // Gets the two common "VL" operands: an all-ones mask and the vector length. 1031 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is 1032 // the vector type that it is contained in. 1033 static std::pair<SDValue, SDValue> 1034 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, 1035 const RISCVSubtarget &Subtarget) { 1036 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 1037 MVT XLenVT = Subtarget.getXLenVT(); 1038 SDValue VL = VecVT.isFixedLengthVector() 1039 ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT) 1040 : DAG.getRegister(RISCV::X0, XLenVT); 1041 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 1042 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 1043 return {Mask, VL}; 1044 } 1045 1046 // As above but assuming the given type is a scalable vector type. 1047 static std::pair<SDValue, SDValue> 1048 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG, 1049 const RISCVSubtarget &Subtarget) { 1050 assert(VecVT.isScalableVector() && "Expecting a scalable vector"); 1051 return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget); 1052 } 1053 1054 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few 1055 // of either is (currently) supported. This can get us into an infinite loop 1056 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR 1057 // as a ..., etc. 1058 // Until either (or both) of these can reliably lower any node, reporting that 1059 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks 1060 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack, 1061 // which is not desirable. 1062 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( 1063 EVT VT, unsigned DefinedValues) const { 1064 return false; 1065 } 1066 1067 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 1068 const RISCVSubtarget &Subtarget) { 1069 MVT VT = Op.getSimpleValueType(); 1070 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 1071 1072 MVT ContainerVT = 1073 RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); 1074 1075 SDLoc DL(Op); 1076 SDValue Mask, VL; 1077 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1078 1079 if (VT.getVectorElementType() == MVT::i1) { 1080 if (ISD::isBuildVectorAllZeros(Op.getNode())) { 1081 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); 1082 return convertFromScalableVector(VT, VMClr, DAG, Subtarget); 1083 } 1084 1085 if (ISD::isBuildVectorAllOnes(Op.getNode())) { 1086 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 1087 return convertFromScalableVector(VT, VMSet, DAG, Subtarget); 1088 } 1089 1090 return SDValue(); 1091 } 1092 1093 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 1094 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 1095 : RISCVISD::VMV_V_X_VL; 1096 Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL); 1097 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 1098 } 1099 1100 // Try and match an index sequence, which we can lower directly to the vid 1101 // instruction. An all-undef vector is matched by getSplatValue, above. 1102 if (VT.isInteger()) { 1103 bool IsVID = true; 1104 for (unsigned i = 0, e = Op.getNumOperands(); i < e && IsVID; i++) 1105 IsVID &= Op.getOperand(i).isUndef() || 1106 (isa<ConstantSDNode>(Op.getOperand(i)) && 1107 Op.getConstantOperandVal(i) == i); 1108 1109 if (IsVID) { 1110 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); 1111 return convertFromScalableVector(VT, VID, DAG, Subtarget); 1112 } 1113 } 1114 1115 return SDValue(); 1116 } 1117 1118 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, 1119 const RISCVSubtarget &Subtarget) { 1120 SDValue V1 = Op.getOperand(0); 1121 SDLoc DL(Op); 1122 MVT VT = Op.getSimpleValueType(); 1123 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 1124 1125 if (SVN->isSplat()) { 1126 int Lane = SVN->getSplatIndex(); 1127 if (Lane >= 0) { 1128 MVT ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( 1129 DAG, VT, Subtarget); 1130 1131 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 1132 assert(Lane < (int)VT.getVectorNumElements() && "Unexpected lane!"); 1133 1134 SDValue Mask, VL; 1135 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1136 MVT XLenVT = Subtarget.getXLenVT(); 1137 SDValue Gather = 1138 DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1, 1139 DAG.getConstant(Lane, DL, XLenVT), Mask, VL); 1140 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 1141 } 1142 } 1143 1144 return SDValue(); 1145 } 1146 1147 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT, 1148 SDLoc DL, SelectionDAG &DAG, 1149 const RISCVSubtarget &Subtarget) { 1150 if (VT.isScalableVector()) 1151 return DAG.getFPExtendOrRound(Op, DL, VT); 1152 assert(VT.isFixedLengthVector() && 1153 "Unexpected value type for RVV FP extend/round lowering"); 1154 SDValue Mask, VL; 1155 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1156 unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType()) 1157 ? RISCVISD::FP_EXTEND_VL 1158 : RISCVISD::FP_ROUND_VL; 1159 return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL); 1160 } 1161 1162 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 1163 SelectionDAG &DAG) const { 1164 switch (Op.getOpcode()) { 1165 default: 1166 report_fatal_error("unimplemented operand"); 1167 case ISD::GlobalAddress: 1168 return lowerGlobalAddress(Op, DAG); 1169 case ISD::BlockAddress: 1170 return lowerBlockAddress(Op, DAG); 1171 case ISD::ConstantPool: 1172 return lowerConstantPool(Op, DAG); 1173 case ISD::JumpTable: 1174 return lowerJumpTable(Op, DAG); 1175 case ISD::GlobalTLSAddress: 1176 return lowerGlobalTLSAddress(Op, DAG); 1177 case ISD::SELECT: 1178 return lowerSELECT(Op, DAG); 1179 case ISD::VASTART: 1180 return lowerVASTART(Op, DAG); 1181 case ISD::FRAMEADDR: 1182 return lowerFRAMEADDR(Op, DAG); 1183 case ISD::RETURNADDR: 1184 return lowerRETURNADDR(Op, DAG); 1185 case ISD::SHL_PARTS: 1186 return lowerShiftLeftParts(Op, DAG); 1187 case ISD::SRA_PARTS: 1188 return lowerShiftRightParts(Op, DAG, true); 1189 case ISD::SRL_PARTS: 1190 return lowerShiftRightParts(Op, DAG, false); 1191 case ISD::BITCAST: { 1192 SDValue Op0 = Op.getOperand(0); 1193 // We can handle fixed length vector bitcasts with a simple replacement 1194 // in isel. 1195 if (Op.getValueType().isFixedLengthVector()) { 1196 if (Op0.getValueType().isFixedLengthVector()) 1197 return Op; 1198 return SDValue(); 1199 } 1200 assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) || 1201 Subtarget.hasStdExtZfh()) && 1202 "Unexpected custom legalisation"); 1203 SDLoc DL(Op); 1204 if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) { 1205 if (Op0.getValueType() != MVT::i16) 1206 return SDValue(); 1207 SDValue NewOp0 = 1208 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0); 1209 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 1210 return FPConv; 1211 } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && 1212 Subtarget.hasStdExtF()) { 1213 if (Op0.getValueType() != MVT::i32) 1214 return SDValue(); 1215 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 1216 SDValue FPConv = 1217 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 1218 return FPConv; 1219 } 1220 return SDValue(); 1221 } 1222 case ISD::INTRINSIC_WO_CHAIN: 1223 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 1224 case ISD::INTRINSIC_W_CHAIN: 1225 return LowerINTRINSIC_W_CHAIN(Op, DAG); 1226 case ISD::BSWAP: 1227 case ISD::BITREVERSE: { 1228 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. 1229 assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 1230 MVT VT = Op.getSimpleValueType(); 1231 SDLoc DL(Op); 1232 // Start with the maximum immediate value which is the bitwidth - 1. 1233 unsigned Imm = VT.getSizeInBits() - 1; 1234 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. 1235 if (Op.getOpcode() == ISD::BSWAP) 1236 Imm &= ~0x7U; 1237 return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0), 1238 DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT())); 1239 } 1240 case ISD::FSHL: 1241 case ISD::FSHR: { 1242 MVT VT = Op.getSimpleValueType(); 1243 assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization"); 1244 SDLoc DL(Op); 1245 // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only 1246 // use log(XLen) bits. Mask the shift amount accordingly. 1247 unsigned ShAmtWidth = Subtarget.getXLen() - 1; 1248 SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2), 1249 DAG.getConstant(ShAmtWidth, DL, VT)); 1250 unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR; 1251 return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt); 1252 } 1253 case ISD::TRUNCATE: { 1254 SDLoc DL(Op); 1255 MVT VT = Op.getSimpleValueType(); 1256 // Only custom-lower vector truncates 1257 if (!VT.isVector()) 1258 return Op; 1259 1260 // Truncates to mask types are handled differently 1261 if (VT.getVectorElementType() == MVT::i1) 1262 return lowerVectorMaskTrunc(Op, DAG); 1263 1264 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 1265 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which 1266 // truncate by one power of two at a time. 1267 MVT DstEltVT = VT.getVectorElementType(); 1268 1269 SDValue Src = Op.getOperand(0); 1270 MVT SrcVT = Src.getSimpleValueType(); 1271 MVT SrcEltVT = SrcVT.getVectorElementType(); 1272 1273 assert(DstEltVT.bitsLT(SrcEltVT) && 1274 isPowerOf2_64(DstEltVT.getSizeInBits()) && 1275 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 1276 "Unexpected vector truncate lowering"); 1277 1278 MVT ContainerVT = SrcVT; 1279 if (SrcVT.isFixedLengthVector()) { 1280 ContainerVT = getContainerForFixedLengthVector(SrcVT); 1281 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 1282 } 1283 1284 SDValue Result = Src; 1285 SDValue Mask, VL; 1286 std::tie(Mask, VL) = 1287 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 1288 LLVMContext &Context = *DAG.getContext(); 1289 const ElementCount Count = ContainerVT.getVectorElementCount(); 1290 do { 1291 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); 1292 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 1293 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, 1294 Mask, VL); 1295 } while (SrcEltVT != DstEltVT); 1296 1297 if (SrcVT.isFixedLengthVector()) 1298 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 1299 1300 return Result; 1301 } 1302 case ISD::ANY_EXTEND: 1303 case ISD::ZERO_EXTEND: 1304 if (Op.getOperand(0).getValueType().isVector() && 1305 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 1306 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 1307 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL); 1308 case ISD::SIGN_EXTEND: 1309 if (Op.getOperand(0).getValueType().isVector() && 1310 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 1311 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 1312 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); 1313 case ISD::SPLAT_VECTOR_PARTS: 1314 return lowerSPLAT_VECTOR_PARTS(Op, DAG); 1315 case ISD::INSERT_VECTOR_ELT: 1316 return lowerINSERT_VECTOR_ELT(Op, DAG); 1317 case ISD::EXTRACT_VECTOR_ELT: 1318 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 1319 case ISD::VSCALE: { 1320 MVT VT = Op.getSimpleValueType(); 1321 SDLoc DL(Op); 1322 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); 1323 // We define our scalable vector types for lmul=1 to use a 64 bit known 1324 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 1325 // vscale as VLENB / 8. 1326 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, 1327 DAG.getConstant(3, DL, VT)); 1328 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); 1329 } 1330 case ISD::FP_EXTEND: { 1331 // RVV can only do fp_extend to types double the size as the source. We 1332 // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going 1333 // via f32. 1334 SDLoc DL(Op); 1335 MVT VT = Op.getSimpleValueType(); 1336 SDValue Src = Op.getOperand(0); 1337 MVT SrcVT = Src.getSimpleValueType(); 1338 1339 // Prepare any fixed-length vector operands. 1340 MVT ContainerVT = VT; 1341 if (SrcVT.isFixedLengthVector()) { 1342 ContainerVT = getContainerForFixedLengthVector(VT); 1343 MVT SrcContainerVT = 1344 ContainerVT.changeVectorElementType(SrcVT.getVectorElementType()); 1345 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1346 } 1347 1348 if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 || 1349 SrcVT.getVectorElementType() != MVT::f16) { 1350 // For scalable vectors, we only need to close the gap between 1351 // vXf16->vXf64. 1352 if (!VT.isFixedLengthVector()) 1353 return Op; 1354 // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version. 1355 Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); 1356 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1357 } 1358 1359 MVT InterVT = VT.changeVectorElementType(MVT::f32); 1360 MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32); 1361 SDValue IntermediateExtend = getRVVFPExtendOrRound( 1362 Src, InterVT, InterContainerVT, DL, DAG, Subtarget); 1363 1364 SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT, 1365 DL, DAG, Subtarget); 1366 if (VT.isFixedLengthVector()) 1367 return convertFromScalableVector(VT, Extend, DAG, Subtarget); 1368 return Extend; 1369 } 1370 case ISD::FP_ROUND: { 1371 // RVV can only do fp_round to types half the size as the source. We 1372 // custom-lower f64->f16 rounds via RVV's round-to-odd float 1373 // conversion instruction. 1374 SDLoc DL(Op); 1375 MVT VT = Op.getSimpleValueType(); 1376 SDValue Src = Op.getOperand(0); 1377 MVT SrcVT = Src.getSimpleValueType(); 1378 1379 // Prepare any fixed-length vector operands. 1380 MVT ContainerVT = VT; 1381 if (VT.isFixedLengthVector()) { 1382 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 1383 ContainerVT = 1384 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 1385 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1386 } 1387 1388 if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || 1389 SrcVT.getVectorElementType() != MVT::f64) { 1390 // For scalable vectors, we only need to close the gap between 1391 // vXf64<->vXf16. 1392 if (!VT.isFixedLengthVector()) 1393 return Op; 1394 // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version. 1395 Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); 1396 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1397 } 1398 1399 SDValue Mask, VL; 1400 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1401 1402 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); 1403 SDValue IntermediateRound = 1404 DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL); 1405 SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT, 1406 DL, DAG, Subtarget); 1407 1408 if (VT.isFixedLengthVector()) 1409 return convertFromScalableVector(VT, Round, DAG, Subtarget); 1410 return Round; 1411 } 1412 case ISD::FP_TO_SINT: 1413 case ISD::FP_TO_UINT: 1414 case ISD::SINT_TO_FP: 1415 case ISD::UINT_TO_FP: { 1416 // RVV can only do fp<->int conversions to types half/double the size as 1417 // the source. We custom-lower any conversions that do two hops into 1418 // sequences. 1419 MVT VT = Op.getSimpleValueType(); 1420 if (!VT.isVector()) 1421 return Op; 1422 SDLoc DL(Op); 1423 SDValue Src = Op.getOperand(0); 1424 MVT EltVT = VT.getVectorElementType(); 1425 MVT SrcVT = Src.getSimpleValueType(); 1426 MVT SrcEltVT = SrcVT.getVectorElementType(); 1427 unsigned EltSize = EltVT.getSizeInBits(); 1428 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 1429 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 1430 "Unexpected vector element types"); 1431 1432 bool IsInt2FP = SrcEltVT.isInteger(); 1433 // Widening conversions 1434 if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) { 1435 if (IsInt2FP) { 1436 // Do a regular integer sign/zero extension then convert to float. 1437 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()), 1438 VT.getVectorElementCount()); 1439 unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP 1440 ? ISD::ZERO_EXTEND 1441 : ISD::SIGN_EXTEND; 1442 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 1443 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 1444 } 1445 // FP2Int 1446 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 1447 // Do one doubling fp_extend then complete the operation by converting 1448 // to int. 1449 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1450 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 1451 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 1452 } 1453 1454 // Narrowing conversions 1455 if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) { 1456 if (IsInt2FP) { 1457 // One narrowing int_to_fp, then an fp_round. 1458 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 1459 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1460 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 1461 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 1462 } 1463 // FP2Int 1464 // One narrowing fp_to_int, then truncate the integer. If the float isn't 1465 // representable by the integer, the result is poison. 1466 MVT IVecVT = 1467 MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2), 1468 VT.getVectorElementCount()); 1469 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 1470 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 1471 } 1472 1473 // Scalable vectors can exit here. Patterns will handle equally-sized 1474 // conversions halving/doubling ones. 1475 if (!VT.isFixedLengthVector()) 1476 return Op; 1477 1478 // For fixed-length vectors we lower to a custom "VL" node. 1479 unsigned RVVOpc = 0; 1480 switch (Op.getOpcode()) { 1481 default: 1482 llvm_unreachable("Impossible opcode"); 1483 case ISD::FP_TO_SINT: 1484 RVVOpc = RISCVISD::FP_TO_SINT_VL; 1485 break; 1486 case ISD::FP_TO_UINT: 1487 RVVOpc = RISCVISD::FP_TO_UINT_VL; 1488 break; 1489 case ISD::SINT_TO_FP: 1490 RVVOpc = RISCVISD::SINT_TO_FP_VL; 1491 break; 1492 case ISD::UINT_TO_FP: 1493 RVVOpc = RISCVISD::UINT_TO_FP_VL; 1494 break; 1495 } 1496 1497 MVT ContainerVT, SrcContainerVT; 1498 // Derive the reference container type from the larger vector type. 1499 if (SrcEltSize > EltSize) { 1500 SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 1501 ContainerVT = 1502 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 1503 } else { 1504 ContainerVT = getContainerForFixedLengthVector(VT); 1505 SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT); 1506 } 1507 1508 SDValue Mask, VL; 1509 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1510 1511 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1512 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL); 1513 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1514 } 1515 case ISD::VECREDUCE_ADD: 1516 case ISD::VECREDUCE_UMAX: 1517 case ISD::VECREDUCE_SMAX: 1518 case ISD::VECREDUCE_UMIN: 1519 case ISD::VECREDUCE_SMIN: 1520 case ISD::VECREDUCE_AND: 1521 case ISD::VECREDUCE_OR: 1522 case ISD::VECREDUCE_XOR: 1523 return lowerVECREDUCE(Op, DAG); 1524 case ISD::VECREDUCE_FADD: 1525 case ISD::VECREDUCE_SEQ_FADD: 1526 return lowerFPVECREDUCE(Op, DAG); 1527 case ISD::INSERT_SUBVECTOR: 1528 return lowerINSERT_SUBVECTOR(Op, DAG); 1529 case ISD::EXTRACT_SUBVECTOR: 1530 return lowerEXTRACT_SUBVECTOR(Op, DAG); 1531 case ISD::VECTOR_REVERSE: 1532 return lowerVECTOR_REVERSE(Op, DAG); 1533 case ISD::BUILD_VECTOR: 1534 return lowerBUILD_VECTOR(Op, DAG, Subtarget); 1535 case ISD::VECTOR_SHUFFLE: 1536 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); 1537 case ISD::CONCAT_VECTORS: { 1538 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is 1539 // better than going through the stack, as the default expansion does. 1540 SDLoc DL(Op); 1541 MVT VT = Op.getSimpleValueType(); 1542 assert(VT.isFixedLengthVector() && "Unexpected CONCAT_VECTORS lowering"); 1543 unsigned NumOpElts = 1544 Op.getOperand(0).getSimpleValueType().getVectorNumElements(); 1545 SDValue Vec = DAG.getUNDEF(VT); 1546 for (const auto &OpIdx : enumerate(Op->ops())) 1547 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(), 1548 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL)); 1549 return Vec; 1550 } 1551 case ISD::LOAD: 1552 return lowerFixedLengthVectorLoadToRVV(Op, DAG); 1553 case ISD::STORE: 1554 return lowerFixedLengthVectorStoreToRVV(Op, DAG); 1555 case ISD::SETCC: 1556 return lowerFixedLengthVectorSetccToRVV(Op, DAG); 1557 case ISD::ADD: 1558 return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL); 1559 case ISD::SUB: 1560 return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL); 1561 case ISD::MUL: 1562 return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL); 1563 case ISD::MULHS: 1564 return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL); 1565 case ISD::MULHU: 1566 return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL); 1567 case ISD::AND: 1568 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL, 1569 RISCVISD::AND_VL); 1570 case ISD::OR: 1571 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL, 1572 RISCVISD::OR_VL); 1573 case ISD::XOR: 1574 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL, 1575 RISCVISD::XOR_VL); 1576 case ISD::SDIV: 1577 return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL); 1578 case ISD::SREM: 1579 return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL); 1580 case ISD::UDIV: 1581 return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL); 1582 case ISD::UREM: 1583 return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL); 1584 case ISD::SHL: 1585 return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL); 1586 case ISD::SRA: 1587 return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL); 1588 case ISD::SRL: 1589 return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL); 1590 case ISD::FADD: 1591 return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL); 1592 case ISD::FSUB: 1593 return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL); 1594 case ISD::FMUL: 1595 return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL); 1596 case ISD::FDIV: 1597 return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL); 1598 case ISD::FNEG: 1599 return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL); 1600 case ISD::FABS: 1601 return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL); 1602 case ISD::FSQRT: 1603 return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL); 1604 case ISD::FMA: 1605 return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL); 1606 case ISD::SMIN: 1607 return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL); 1608 case ISD::SMAX: 1609 return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL); 1610 case ISD::UMIN: 1611 return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL); 1612 case ISD::UMAX: 1613 return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL); 1614 case ISD::ABS: 1615 return lowerABS(Op, DAG); 1616 case ISD::VSELECT: 1617 return lowerFixedLengthVectorSelectToRVV(Op, DAG); 1618 } 1619 } 1620 1621 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 1622 SelectionDAG &DAG, unsigned Flags) { 1623 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 1624 } 1625 1626 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 1627 SelectionDAG &DAG, unsigned Flags) { 1628 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 1629 Flags); 1630 } 1631 1632 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 1633 SelectionDAG &DAG, unsigned Flags) { 1634 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 1635 N->getOffset(), Flags); 1636 } 1637 1638 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 1639 SelectionDAG &DAG, unsigned Flags) { 1640 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 1641 } 1642 1643 template <class NodeTy> 1644 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 1645 bool IsLocal) const { 1646 SDLoc DL(N); 1647 EVT Ty = getPointerTy(DAG.getDataLayout()); 1648 1649 if (isPositionIndependent()) { 1650 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1651 if (IsLocal) 1652 // Use PC-relative addressing to access the symbol. This generates the 1653 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 1654 // %pcrel_lo(auipc)). 1655 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1656 1657 // Use PC-relative addressing to access the GOT for this symbol, then load 1658 // the address from the GOT. This generates the pattern (PseudoLA sym), 1659 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 1660 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 1661 } 1662 1663 switch (getTargetMachine().getCodeModel()) { 1664 default: 1665 report_fatal_error("Unsupported code model for lowering"); 1666 case CodeModel::Small: { 1667 // Generate a sequence for accessing addresses within the first 2 GiB of 1668 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 1669 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 1670 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 1671 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1672 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 1673 } 1674 case CodeModel::Medium: { 1675 // Generate a sequence for accessing addresses within any 2GiB range within 1676 // the address space. This generates the pattern (PseudoLLA sym), which 1677 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 1678 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1679 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1680 } 1681 } 1682 } 1683 1684 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 1685 SelectionDAG &DAG) const { 1686 SDLoc DL(Op); 1687 EVT Ty = Op.getValueType(); 1688 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1689 int64_t Offset = N->getOffset(); 1690 MVT XLenVT = Subtarget.getXLenVT(); 1691 1692 const GlobalValue *GV = N->getGlobal(); 1693 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 1694 SDValue Addr = getAddr(N, DAG, IsLocal); 1695 1696 // In order to maximise the opportunity for common subexpression elimination, 1697 // emit a separate ADD node for the global address offset instead of folding 1698 // it in the global address node. Later peephole optimisations may choose to 1699 // fold it back in when profitable. 1700 if (Offset != 0) 1701 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1702 DAG.getConstant(Offset, DL, XLenVT)); 1703 return Addr; 1704 } 1705 1706 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 1707 SelectionDAG &DAG) const { 1708 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 1709 1710 return getAddr(N, DAG); 1711 } 1712 1713 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 1714 SelectionDAG &DAG) const { 1715 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 1716 1717 return getAddr(N, DAG); 1718 } 1719 1720 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 1721 SelectionDAG &DAG) const { 1722 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 1723 1724 return getAddr(N, DAG); 1725 } 1726 1727 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 1728 SelectionDAG &DAG, 1729 bool UseGOT) const { 1730 SDLoc DL(N); 1731 EVT Ty = getPointerTy(DAG.getDataLayout()); 1732 const GlobalValue *GV = N->getGlobal(); 1733 MVT XLenVT = Subtarget.getXLenVT(); 1734 1735 if (UseGOT) { 1736 // Use PC-relative addressing to access the GOT for this TLS symbol, then 1737 // load the address from the GOT and add the thread pointer. This generates 1738 // the pattern (PseudoLA_TLS_IE sym), which expands to 1739 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 1740 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1741 SDValue Load = 1742 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 1743 1744 // Add the thread pointer. 1745 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1746 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 1747 } 1748 1749 // Generate a sequence for accessing the address relative to the thread 1750 // pointer, with the appropriate adjustment for the thread pointer offset. 1751 // This generates the pattern 1752 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 1753 SDValue AddrHi = 1754 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 1755 SDValue AddrAdd = 1756 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 1757 SDValue AddrLo = 1758 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 1759 1760 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1761 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1762 SDValue MNAdd = SDValue( 1763 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 1764 0); 1765 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 1766 } 1767 1768 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 1769 SelectionDAG &DAG) const { 1770 SDLoc DL(N); 1771 EVT Ty = getPointerTy(DAG.getDataLayout()); 1772 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 1773 const GlobalValue *GV = N->getGlobal(); 1774 1775 // Use a PC-relative addressing mode to access the global dynamic GOT address. 1776 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 1777 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 1778 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1779 SDValue Load = 1780 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 1781 1782 // Prepare argument list to generate call. 1783 ArgListTy Args; 1784 ArgListEntry Entry; 1785 Entry.Node = Load; 1786 Entry.Ty = CallTy; 1787 Args.push_back(Entry); 1788 1789 // Setup call to __tls_get_addr. 1790 TargetLowering::CallLoweringInfo CLI(DAG); 1791 CLI.setDebugLoc(DL) 1792 .setChain(DAG.getEntryNode()) 1793 .setLibCallee(CallingConv::C, CallTy, 1794 DAG.getExternalSymbol("__tls_get_addr", Ty), 1795 std::move(Args)); 1796 1797 return LowerCallTo(CLI).first; 1798 } 1799 1800 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 1801 SelectionDAG &DAG) const { 1802 SDLoc DL(Op); 1803 EVT Ty = Op.getValueType(); 1804 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1805 int64_t Offset = N->getOffset(); 1806 MVT XLenVT = Subtarget.getXLenVT(); 1807 1808 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 1809 1810 if (DAG.getMachineFunction().getFunction().getCallingConv() == 1811 CallingConv::GHC) 1812 report_fatal_error("In GHC calling convention TLS is not supported"); 1813 1814 SDValue Addr; 1815 switch (Model) { 1816 case TLSModel::LocalExec: 1817 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 1818 break; 1819 case TLSModel::InitialExec: 1820 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 1821 break; 1822 case TLSModel::LocalDynamic: 1823 case TLSModel::GeneralDynamic: 1824 Addr = getDynamicTLSAddr(N, DAG); 1825 break; 1826 } 1827 1828 // In order to maximise the opportunity for common subexpression elimination, 1829 // emit a separate ADD node for the global address offset instead of folding 1830 // it in the global address node. Later peephole optimisations may choose to 1831 // fold it back in when profitable. 1832 if (Offset != 0) 1833 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1834 DAG.getConstant(Offset, DL, XLenVT)); 1835 return Addr; 1836 } 1837 1838 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 1839 SDValue CondV = Op.getOperand(0); 1840 SDValue TrueV = Op.getOperand(1); 1841 SDValue FalseV = Op.getOperand(2); 1842 SDLoc DL(Op); 1843 MVT XLenVT = Subtarget.getXLenVT(); 1844 1845 // If the result type is XLenVT and CondV is the output of a SETCC node 1846 // which also operated on XLenVT inputs, then merge the SETCC node into the 1847 // lowered RISCVISD::SELECT_CC to take advantage of the integer 1848 // compare+branch instructions. i.e.: 1849 // (select (setcc lhs, rhs, cc), truev, falsev) 1850 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 1851 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 1852 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 1853 SDValue LHS = CondV.getOperand(0); 1854 SDValue RHS = CondV.getOperand(1); 1855 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 1856 ISD::CondCode CCVal = CC->get(); 1857 1858 normaliseSetCC(LHS, RHS, CCVal); 1859 1860 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 1861 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 1862 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1863 } 1864 1865 // Otherwise: 1866 // (select condv, truev, falsev) 1867 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 1868 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 1869 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 1870 1871 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 1872 1873 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1874 } 1875 1876 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 1877 MachineFunction &MF = DAG.getMachineFunction(); 1878 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 1879 1880 SDLoc DL(Op); 1881 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1882 getPointerTy(MF.getDataLayout())); 1883 1884 // vastart just stores the address of the VarArgsFrameIndex slot into the 1885 // memory location argument. 1886 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1887 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 1888 MachinePointerInfo(SV)); 1889 } 1890 1891 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 1892 SelectionDAG &DAG) const { 1893 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1894 MachineFunction &MF = DAG.getMachineFunction(); 1895 MachineFrameInfo &MFI = MF.getFrameInfo(); 1896 MFI.setFrameAddressIsTaken(true); 1897 Register FrameReg = RI.getFrameRegister(MF); 1898 int XLenInBytes = Subtarget.getXLen() / 8; 1899 1900 EVT VT = Op.getValueType(); 1901 SDLoc DL(Op); 1902 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 1903 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1904 while (Depth--) { 1905 int Offset = -(XLenInBytes * 2); 1906 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 1907 DAG.getIntPtrConstant(Offset, DL)); 1908 FrameAddr = 1909 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 1910 } 1911 return FrameAddr; 1912 } 1913 1914 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 1915 SelectionDAG &DAG) const { 1916 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1917 MachineFunction &MF = DAG.getMachineFunction(); 1918 MachineFrameInfo &MFI = MF.getFrameInfo(); 1919 MFI.setReturnAddressIsTaken(true); 1920 MVT XLenVT = Subtarget.getXLenVT(); 1921 int XLenInBytes = Subtarget.getXLen() / 8; 1922 1923 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 1924 return SDValue(); 1925 1926 EVT VT = Op.getValueType(); 1927 SDLoc DL(Op); 1928 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1929 if (Depth) { 1930 int Off = -XLenInBytes; 1931 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 1932 SDValue Offset = DAG.getConstant(Off, DL, VT); 1933 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 1934 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 1935 MachinePointerInfo()); 1936 } 1937 1938 // Return the value of the return address register, marking it an implicit 1939 // live-in. 1940 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 1941 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 1942 } 1943 1944 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 1945 SelectionDAG &DAG) const { 1946 SDLoc DL(Op); 1947 SDValue Lo = Op.getOperand(0); 1948 SDValue Hi = Op.getOperand(1); 1949 SDValue Shamt = Op.getOperand(2); 1950 EVT VT = Lo.getValueType(); 1951 1952 // if Shamt-XLEN < 0: // Shamt < XLEN 1953 // Lo = Lo << Shamt 1954 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 1955 // else: 1956 // Lo = 0 1957 // Hi = Lo << (Shamt-XLEN) 1958 1959 SDValue Zero = DAG.getConstant(0, DL, VT); 1960 SDValue One = DAG.getConstant(1, DL, VT); 1961 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1962 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1963 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1964 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1965 1966 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 1967 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 1968 SDValue ShiftRightLo = 1969 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 1970 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 1971 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 1972 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 1973 1974 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1975 1976 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 1977 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1978 1979 SDValue Parts[2] = {Lo, Hi}; 1980 return DAG.getMergeValues(Parts, DL); 1981 } 1982 1983 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 1984 bool IsSRA) const { 1985 SDLoc DL(Op); 1986 SDValue Lo = Op.getOperand(0); 1987 SDValue Hi = Op.getOperand(1); 1988 SDValue Shamt = Op.getOperand(2); 1989 EVT VT = Lo.getValueType(); 1990 1991 // SRA expansion: 1992 // if Shamt-XLEN < 0: // Shamt < XLEN 1993 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1994 // Hi = Hi >>s Shamt 1995 // else: 1996 // Lo = Hi >>s (Shamt-XLEN); 1997 // Hi = Hi >>s (XLEN-1) 1998 // 1999 // SRL expansion: 2000 // if Shamt-XLEN < 0: // Shamt < XLEN 2001 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 2002 // Hi = Hi >>u Shamt 2003 // else: 2004 // Lo = Hi >>u (Shamt-XLEN); 2005 // Hi = 0; 2006 2007 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 2008 2009 SDValue Zero = DAG.getConstant(0, DL, VT); 2010 SDValue One = DAG.getConstant(1, DL, VT); 2011 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 2012 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 2013 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 2014 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 2015 2016 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 2017 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 2018 SDValue ShiftLeftHi = 2019 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 2020 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 2021 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 2022 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 2023 SDValue HiFalse = 2024 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 2025 2026 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 2027 2028 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 2029 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 2030 2031 SDValue Parts[2] = {Lo, Hi}; 2032 return DAG.getMergeValues(Parts, DL); 2033 } 2034 2035 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is 2036 // illegal (currently only vXi64 RV32). 2037 // FIXME: We could also catch non-constant sign-extended i32 values and lower 2038 // them to SPLAT_VECTOR_I64 2039 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op, 2040 SelectionDAG &DAG) const { 2041 SDLoc DL(Op); 2042 EVT VecVT = Op.getValueType(); 2043 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 2044 "Unexpected SPLAT_VECTOR_PARTS lowering"); 2045 2046 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!"); 2047 SDValue Lo = Op.getOperand(0); 2048 SDValue Hi = Op.getOperand(1); 2049 2050 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) { 2051 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue(); 2052 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue(); 2053 // If Hi constant is all the same sign bit as Lo, lower this as a custom 2054 // node in order to try and match RVV vector/scalar instructions. 2055 if ((LoC >> 31) == HiC) 2056 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 2057 } 2058 2059 // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not 2060 // to accidentally sign-extend the 32-bit halves to the e64 SEW: 2061 // vmv.v.x vX, hi 2062 // vsll.vx vX, vX, /*32*/ 2063 // vmv.v.x vY, lo 2064 // vsll.vx vY, vY, /*32*/ 2065 // vsrl.vx vY, vY, /*32*/ 2066 // vor.vv vX, vX, vY 2067 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT); 2068 2069 Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 2070 Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV); 2071 Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV); 2072 2073 if (isNullConstant(Hi)) 2074 return Lo; 2075 2076 Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi); 2077 Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV); 2078 2079 return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi); 2080 } 2081 2082 // Custom-lower extensions from mask vectors by using a vselect either with 1 2083 // for zero/any-extension or -1 for sign-extension: 2084 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 2085 // Note that any-extension is lowered identically to zero-extension. 2086 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 2087 int64_t ExtTrueVal) const { 2088 SDLoc DL(Op); 2089 MVT VecVT = Op.getSimpleValueType(); 2090 SDValue Src = Op.getOperand(0); 2091 // Only custom-lower extensions from mask types 2092 assert(Src.getValueType().isVector() && 2093 Src.getValueType().getVectorElementType() == MVT::i1); 2094 2095 MVT XLenVT = Subtarget.getXLenVT(); 2096 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 2097 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT); 2098 2099 if (VecVT.isScalableVector()) { 2100 // Be careful not to introduce illegal scalar types at this stage, and be 2101 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 2102 // illegal and must be expanded. Since we know that the constants are 2103 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 2104 bool IsRV32E64 = 2105 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 2106 2107 if (!IsRV32E64) { 2108 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 2109 SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal); 2110 } else { 2111 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 2112 SplatTrueVal = 2113 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal); 2114 } 2115 2116 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 2117 } 2118 2119 MVT ContainerVT = getContainerForFixedLengthVector(VecVT); 2120 MVT I1ContainerVT = 2121 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2122 2123 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget); 2124 2125 SDValue Mask, VL; 2126 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2127 2128 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL); 2129 SplatTrueVal = 2130 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL); 2131 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, 2132 SplatTrueVal, SplatZero, VL); 2133 2134 return convertFromScalableVector(VecVT, Select, DAG, Subtarget); 2135 } 2136 2137 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( 2138 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { 2139 MVT ExtVT = Op.getSimpleValueType(); 2140 // Only custom-lower extensions from fixed-length vector types. 2141 if (!ExtVT.isFixedLengthVector()) 2142 return Op; 2143 MVT VT = Op.getOperand(0).getSimpleValueType(); 2144 // Grab the canonical container type for the extended type. Infer the smaller 2145 // type from that to ensure the same number of vector elements, as we know 2146 // the LMUL will be sufficient to hold the smaller type. 2147 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT); 2148 // Get the extended container type manually to ensure the same number of 2149 // vector elements between source and dest. 2150 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), 2151 ContainerExtVT.getVectorElementCount()); 2152 2153 SDValue Op1 = 2154 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 2155 2156 SDLoc DL(Op); 2157 SDValue Mask, VL; 2158 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2159 2160 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL); 2161 2162 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget); 2163 } 2164 2165 // Custom-lower truncations from vectors to mask vectors by using a mask and a 2166 // setcc operation: 2167 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 2168 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, 2169 SelectionDAG &DAG) const { 2170 SDLoc DL(Op); 2171 EVT MaskVT = Op.getValueType(); 2172 // Only expect to custom-lower truncations to mask types 2173 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 2174 "Unexpected type for vector mask lowering"); 2175 SDValue Src = Op.getOperand(0); 2176 MVT VecVT = Src.getSimpleValueType(); 2177 2178 // If this is a fixed vector, we need to convert it to a scalable vector. 2179 MVT ContainerVT = VecVT; 2180 if (VecVT.isFixedLengthVector()) { 2181 ContainerVT = getContainerForFixedLengthVector(VecVT); 2182 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 2183 } 2184 2185 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 2186 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 2187 2188 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne); 2189 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero); 2190 2191 if (VecVT.isScalableVector()) { 2192 SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); 2193 return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); 2194 } 2195 2196 SDValue Mask, VL; 2197 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2198 2199 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 2200 SDValue Trunc = 2201 DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL); 2202 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero, 2203 DAG.getCondCode(ISD::SETNE), Mask, VL); 2204 return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); 2205 } 2206 2207 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 2208 SelectionDAG &DAG) const { 2209 SDLoc DL(Op); 2210 MVT VecVT = Op.getSimpleValueType(); 2211 SDValue Vec = Op.getOperand(0); 2212 SDValue Val = Op.getOperand(1); 2213 SDValue Idx = Op.getOperand(2); 2214 2215 MVT ContainerVT = VecVT; 2216 // If the operand is a fixed-length vector, convert to a scalable one. 2217 if (VecVT.isFixedLengthVector()) { 2218 ContainerVT = getContainerForFixedLengthVector(VecVT); 2219 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2220 } 2221 2222 SDValue Mask, VL; 2223 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2224 2225 // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is 2226 // first slid down into position, the value is inserted into the first 2227 // position, and the vector is slid back up. We do this to simplify patterns. 2228 // (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx), 2229 if (Subtarget.is64Bit() || Val.getValueType() != MVT::i64) { 2230 if (isNullConstant(Idx)) 2231 return DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, Vec, Val, VL); 2232 SDValue Slidedown = 2233 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 2234 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 2235 SDValue InsertElt0 = 2236 DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, Slidedown, Val, VL); 2237 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, InsertElt0, 2238 Idx, Mask, VL); 2239 } 2240 2241 // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type 2242 // is illegal (currently only vXi64 RV32). 2243 // Since there is no easy way of getting a single element into a vector when 2244 // XLEN<SEW, we lower the operation to the following sequence: 2245 // splat vVal, rVal 2246 // vid.v vVid 2247 // vmseq.vx mMask, vVid, rIdx 2248 // vmerge.vvm vDest, vSrc, vVal, mMask 2249 // This essentially merges the original vector with the inserted element by 2250 // using a mask whose only set bit is that corresponding to the insert 2251 // index. 2252 SDValue SplattedVal = DAG.getSplatVector(ContainerVT, DL, Val); 2253 SDValue SplattedIdx = 2254 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, Idx, VL); 2255 2256 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); 2257 auto SetCCVT = 2258 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ContainerVT); 2259 SDValue SelectCond = 2260 DAG.getNode(RISCVISD::SETCC_VL, DL, SetCCVT, VID, SplattedIdx, 2261 DAG.getCondCode(ISD::SETEQ), Mask, VL); 2262 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, 2263 SelectCond, SplattedVal, Vec, VL); 2264 if (!VecVT.isFixedLengthVector()) 2265 return Select; 2266 return convertFromScalableVector(VecVT, Select, DAG, Subtarget); 2267 } 2268 2269 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 2270 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer 2271 // types this is done using VMV_X_S to allow us to glean information about the 2272 // sign bits of the result. 2273 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 2274 SelectionDAG &DAG) const { 2275 SDLoc DL(Op); 2276 SDValue Idx = Op.getOperand(1); 2277 SDValue Vec = Op.getOperand(0); 2278 EVT EltVT = Op.getValueType(); 2279 MVT VecVT = Vec.getSimpleValueType(); 2280 MVT XLenVT = Subtarget.getXLenVT(); 2281 2282 // If this is a fixed vector, we need to convert it to a scalable vector. 2283 MVT ContainerVT = VecVT; 2284 if (VecVT.isFixedLengthVector()) { 2285 ContainerVT = getContainerForFixedLengthVector(VecVT); 2286 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2287 } 2288 2289 // If the index is 0, the vector is already in the right position. 2290 if (!isNullConstant(Idx)) { 2291 // Use a VL of 1 to avoid processing more elements than we need. 2292 SDValue VL = DAG.getConstant(1, DL, XLenVT); 2293 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2294 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2295 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 2296 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 2297 } 2298 2299 if (!EltVT.isInteger()) { 2300 // Floating-point extracts are handled in TableGen. 2301 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 2302 DAG.getConstant(0, DL, XLenVT)); 2303 } 2304 2305 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 2306 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 2307 } 2308 2309 // Called by type legalization to handle splat of i64 on RV32. 2310 // FIXME: We can optimize this when the type has sign or zero bits in one 2311 // of the halves. 2312 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar, 2313 SDValue VL, SelectionDAG &DAG) { 2314 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VT); 2315 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, 2316 DAG.getConstant(0, DL, MVT::i32)); 2317 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, 2318 DAG.getConstant(1, DL, MVT::i32)); 2319 2320 // vmv.v.x vX, hi 2321 // vsll.vx vX, vX, /*32*/ 2322 // vmv.v.x vY, lo 2323 // vsll.vx vY, vY, /*32*/ 2324 // vsrl.vx vY, vY, /*32*/ 2325 // vor.vv vX, vX, vY 2326 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); 2327 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2328 Lo = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL); 2329 Lo = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL); 2330 Lo = DAG.getNode(RISCVISD::SRL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL); 2331 2332 Hi = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Hi, VL); 2333 Hi = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Hi, ThirtyTwoV, Mask, VL); 2334 2335 return DAG.getNode(RISCVISD::OR_VL, DL, VT, Lo, Hi, Mask, VL); 2336 } 2337 2338 // Some RVV intrinsics may claim that they want an integer operand to be 2339 // promoted or expanded. 2340 static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG, 2341 const RISCVSubtarget &Subtarget) { 2342 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 2343 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && 2344 "Unexpected opcode"); 2345 2346 if (!Subtarget.hasStdExtV()) 2347 return SDValue(); 2348 2349 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; 2350 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); 2351 SDLoc DL(Op); 2352 2353 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 2354 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); 2355 if (!II || !II->SplatOperand) 2356 return SDValue(); 2357 2358 unsigned SplatOp = II->SplatOperand + HasChain; 2359 assert(SplatOp < Op.getNumOperands()); 2360 2361 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 2362 SDValue &ScalarOp = Operands[SplatOp]; 2363 MVT OpVT = ScalarOp.getSimpleValueType(); 2364 MVT VT = Op.getSimpleValueType(); 2365 MVT XLenVT = Subtarget.getXLenVT(); 2366 2367 // If this isn't a scalar, or its type is XLenVT we're done. 2368 if (!OpVT.isScalarInteger() || OpVT == XLenVT) 2369 return SDValue(); 2370 2371 // Simplest case is that the operand needs to be promoted to XLenVT. 2372 if (OpVT.bitsLT(XLenVT)) { 2373 // If the operand is a constant, sign extend to increase our chances 2374 // of being able to use a .vi instruction. ANY_EXTEND would become a 2375 // a zero extend and the simm5 check in isel would fail. 2376 // FIXME: Should we ignore the upper bits in isel instead? 2377 unsigned ExtOpc = 2378 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 2379 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp); 2380 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 2381 } 2382 2383 // The more complex case is when the scalar is larger than XLenVT. 2384 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 && 2385 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!"); 2386 2387 // If this is a sign-extended 32-bit constant, we can truncate it and rely 2388 // on the instruction to sign-extend since SEW>XLEN. 2389 if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) { 2390 if (isInt<32>(CVal->getSExtValue())) { 2391 ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); 2392 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 2393 } 2394 } 2395 2396 // We need to convert the scalar to a splat vector. 2397 // FIXME: Can we implicitly truncate the scalar if it is known to 2398 // be sign extended? 2399 // VL should be the last operand. 2400 SDValue VL = Op.getOperand(Op.getNumOperands() - 1); 2401 assert(VL.getValueType() == XLenVT); 2402 ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG); 2403 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 2404 } 2405 2406 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 2407 SelectionDAG &DAG) const { 2408 unsigned IntNo = Op.getConstantOperandVal(0); 2409 SDLoc DL(Op); 2410 MVT XLenVT = Subtarget.getXLenVT(); 2411 2412 switch (IntNo) { 2413 default: 2414 break; // Don't custom lower most intrinsics. 2415 case Intrinsic::thread_pointer: { 2416 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2417 return DAG.getRegister(RISCV::X4, PtrVT); 2418 } 2419 case Intrinsic::riscv_vmv_x_s: 2420 assert(Op.getValueType() == XLenVT && "Unexpected VT!"); 2421 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 2422 Op.getOperand(1)); 2423 case Intrinsic::riscv_vmv_v_x: { 2424 SDValue Scalar = Op.getOperand(1); 2425 if (Scalar.getValueType().bitsLE(XLenVT)) { 2426 unsigned ExtOpc = 2427 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 2428 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); 2429 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), Scalar, 2430 Op.getOperand(2)); 2431 } 2432 2433 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); 2434 2435 // If this is a sign-extended 32-bit constant, we can truncate it and rely 2436 // on the instruction to sign-extend since SEW>XLEN. 2437 if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar)) { 2438 if (isInt<32>(CVal->getSExtValue())) 2439 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), 2440 DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32), 2441 Op.getOperand(2)); 2442 } 2443 2444 // Otherwise use the more complicated splatting algorithm. 2445 return splatSplitI64WithVL(DL, Op.getSimpleValueType(), Scalar, 2446 Op.getOperand(2), DAG); 2447 } 2448 case Intrinsic::riscv_vfmv_v_f: 2449 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), 2450 Op.getOperand(1), Op.getOperand(2)); 2451 case Intrinsic::riscv_vmv_s_x: { 2452 SDValue Scalar = Op.getOperand(2); 2453 2454 if (Scalar.getValueType().bitsLE(XLenVT)) { 2455 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar); 2456 return DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, Op.getValueType(), 2457 Op.getOperand(1), Scalar, Op.getOperand(3)); 2458 } 2459 2460 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); 2461 2462 // This is an i64 value that lives in two scalar registers. We have to 2463 // insert this in a convoluted way. First we build vXi64 splat containing 2464 // the/ two values that we assemble using some bit math. Next we'll use 2465 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask 2466 // to merge element 0 from our splat into the source vector. 2467 // FIXME: This is probably not the best way to do this, but it is 2468 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting 2469 // point. 2470 // vmv.v.x vX, hi 2471 // vsll.vx vX, vX, /*32*/ 2472 // vmv.v.x vY, lo 2473 // vsll.vx vY, vY, /*32*/ 2474 // vsrl.vx vY, vY, /*32*/ 2475 // vor.vv vX, vX, vY 2476 // 2477 // vid.v vVid 2478 // vmseq.vx mMask, vVid, 0 2479 // vmerge.vvm vDest, vSrc, vVal, mMask 2480 MVT VT = Op.getSimpleValueType(); 2481 SDValue Vec = Op.getOperand(1); 2482 SDValue VL = Op.getOperand(3); 2483 2484 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); 2485 SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, 2486 DAG.getConstant(0, DL, MVT::i32), VL); 2487 2488 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); 2489 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2490 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 2491 SDValue SelectCond = 2492 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx, 2493 DAG.getCondCode(ISD::SETEQ), Mask, VL); 2494 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal, 2495 Vec, VL); 2496 } 2497 } 2498 2499 return lowerVectorIntrinsicSplats(Op, DAG, Subtarget); 2500 } 2501 2502 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 2503 SelectionDAG &DAG) const { 2504 return lowerVectorIntrinsicSplats(Op, DAG, Subtarget); 2505 } 2506 2507 static MVT getLMUL1VT(MVT VT) { 2508 assert(VT.getVectorElementType().getSizeInBits() <= 64 && 2509 "Unexpected vector MVT"); 2510 return MVT::getScalableVectorVT( 2511 VT.getVectorElementType(), 2512 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); 2513 } 2514 2515 static unsigned getRVVReductionOp(unsigned ISDOpcode) { 2516 switch (ISDOpcode) { 2517 default: 2518 llvm_unreachable("Unhandled reduction"); 2519 case ISD::VECREDUCE_ADD: 2520 return RISCVISD::VECREDUCE_ADD_VL; 2521 case ISD::VECREDUCE_UMAX: 2522 return RISCVISD::VECREDUCE_UMAX_VL; 2523 case ISD::VECREDUCE_SMAX: 2524 return RISCVISD::VECREDUCE_SMAX_VL; 2525 case ISD::VECREDUCE_UMIN: 2526 return RISCVISD::VECREDUCE_UMIN_VL; 2527 case ISD::VECREDUCE_SMIN: 2528 return RISCVISD::VECREDUCE_SMIN_VL; 2529 case ISD::VECREDUCE_AND: 2530 return RISCVISD::VECREDUCE_AND_VL; 2531 case ISD::VECREDUCE_OR: 2532 return RISCVISD::VECREDUCE_OR_VL; 2533 case ISD::VECREDUCE_XOR: 2534 return RISCVISD::VECREDUCE_XOR_VL; 2535 } 2536 } 2537 2538 // Take a (supported) standard ISD reduction opcode and transform it to a RISCV 2539 // reduction opcode. Note that this returns a vector type, which must be 2540 // further processed to access the scalar result in element 0. 2541 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 2542 SelectionDAG &DAG) const { 2543 SDLoc DL(Op); 2544 SDValue Vec = Op.getOperand(0); 2545 EVT VecEVT = Vec.getValueType(); 2546 2547 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode()); 2548 2549 // Due to ordering in legalize types we may have a vector type that needs to 2550 // be split. Do that manually so we can get down to a legal type. 2551 while (getTypeAction(*DAG.getContext(), VecEVT) == 2552 TargetLowering::TypeSplitVector) { 2553 SDValue Lo, Hi; 2554 std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL); 2555 VecEVT = Lo.getValueType(); 2556 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi); 2557 } 2558 2559 // TODO: The type may need to be widened rather than split. Or widened before 2560 // it can be split. 2561 if (!isTypeLegal(VecEVT)) 2562 return SDValue(); 2563 2564 MVT VecVT = VecEVT.getSimpleVT(); 2565 MVT VecEltVT = VecVT.getVectorElementType(); 2566 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); 2567 2568 MVT ContainerVT = VecVT; 2569 if (VecVT.isFixedLengthVector()) { 2570 ContainerVT = getContainerForFixedLengthVector(VecVT); 2571 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2572 } 2573 2574 MVT M1VT = getLMUL1VT(ContainerVT); 2575 2576 SDValue Mask, VL; 2577 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2578 2579 // FIXME: This is a VLMAX splat which might be too large and can prevent 2580 // vsetvli removal. 2581 SDValue NeutralElem = 2582 DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); 2583 SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem); 2584 SDValue Reduction = 2585 DAG.getNode(RVVOpcode, DL, M1VT, Vec, IdentitySplat, Mask, VL); 2586 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 2587 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 2588 return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); 2589 } 2590 2591 // Given a reduction op, this function returns the matching reduction opcode, 2592 // the vector SDValue and the scalar SDValue required to lower this to a 2593 // RISCVISD node. 2594 static std::tuple<unsigned, SDValue, SDValue> 2595 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { 2596 SDLoc DL(Op); 2597 switch (Op.getOpcode()) { 2598 default: 2599 llvm_unreachable("Unhandled reduction"); 2600 case ISD::VECREDUCE_FADD: 2601 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), 2602 DAG.getConstantFP(0.0, DL, EltVT)); 2603 case ISD::VECREDUCE_SEQ_FADD: 2604 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), 2605 Op.getOperand(0)); 2606 } 2607 } 2608 2609 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, 2610 SelectionDAG &DAG) const { 2611 SDLoc DL(Op); 2612 MVT VecEltVT = Op.getSimpleValueType(); 2613 2614 unsigned RVVOpcode; 2615 SDValue VectorVal, ScalarVal; 2616 std::tie(RVVOpcode, VectorVal, ScalarVal) = 2617 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT); 2618 MVT VecVT = VectorVal.getSimpleValueType(); 2619 2620 MVT ContainerVT = VecVT; 2621 if (VecVT.isFixedLengthVector()) { 2622 ContainerVT = getContainerForFixedLengthVector(VecVT); 2623 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget); 2624 } 2625 2626 MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType()); 2627 2628 SDValue Mask, VL; 2629 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2630 2631 // FIXME: This is a VLMAX splat which might be too large and can prevent 2632 // vsetvli removal. 2633 SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal); 2634 SDValue Reduction = 2635 DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat, Mask, VL); 2636 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 2637 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 2638 } 2639 2640 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, 2641 SelectionDAG &DAG) const { 2642 SDValue Vec = Op.getOperand(0); 2643 SDValue SubVec = Op.getOperand(1); 2644 MVT VecVT = Vec.getSimpleValueType(); 2645 MVT SubVecVT = SubVec.getSimpleValueType(); 2646 2647 SDLoc DL(Op); 2648 MVT XLenVT = Subtarget.getXLenVT(); 2649 unsigned OrigIdx = Op.getConstantOperandVal(2); 2650 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2651 2652 // We don't have the ability to slide mask vectors up indexed by their i1 2653 // elements; the smallest we can do is i8. Often we are able to bitcast to 2654 // equivalent i8 vectors. Note that when inserting a fixed-length vector 2655 // into a scalable one, we might not necessarily have enough scalable 2656 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. 2657 if (SubVecVT.getVectorElementType() == MVT::i1 && 2658 (OrigIdx != 0 || !Vec.isUndef())) { 2659 if (VecVT.getVectorMinNumElements() >= 8 && 2660 SubVecVT.getVectorMinNumElements() >= 8) { 2661 assert(OrigIdx % 8 == 0 && "Invalid index"); 2662 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 2663 SubVecVT.getVectorMinNumElements() % 8 == 0 && 2664 "Unexpected mask vector lowering"); 2665 OrigIdx /= 8; 2666 SubVecVT = 2667 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 2668 SubVecVT.isScalableVector()); 2669 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 2670 VecVT.isScalableVector()); 2671 Vec = DAG.getBitcast(VecVT, Vec); 2672 SubVec = DAG.getBitcast(SubVecVT, SubVec); 2673 } else { 2674 // We can't slide this mask vector up indexed by its i1 elements. 2675 // This poses a problem when we wish to insert a scalable vector which 2676 // can't be re-expressed as a larger type. Just choose the slow path and 2677 // extend to a larger type, then truncate back down. 2678 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 2679 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 2680 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 2681 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec); 2682 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec, 2683 Op.getOperand(2)); 2684 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT); 2685 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE); 2686 } 2687 } 2688 2689 // If the subvector vector is a fixed-length type, we cannot use subregister 2690 // manipulation to simplify the codegen; we don't know which register of a 2691 // LMUL group contains the specific subvector as we only know the minimum 2692 // register size. Therefore we must slide the vector group up the full 2693 // amount. 2694 if (SubVecVT.isFixedLengthVector()) { 2695 if (OrigIdx == 0 && Vec.isUndef()) 2696 return Op; 2697 MVT ContainerVT = VecVT; 2698 if (VecVT.isFixedLengthVector()) { 2699 ContainerVT = getContainerForFixedLengthVector(VecVT); 2700 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2701 } 2702 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, 2703 DAG.getUNDEF(ContainerVT), SubVec, 2704 DAG.getConstant(0, DL, XLenVT)); 2705 SDValue Mask = 2706 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 2707 // Set the vector length to only the number of elements we care about. Note 2708 // that for slideup this includes the offset. 2709 SDValue VL = 2710 DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT); 2711 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 2712 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, 2713 SubVec, SlideupAmt, Mask, VL); 2714 if (!VecVT.isFixedLengthVector()) 2715 return Slideup; 2716 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 2717 } 2718 2719 unsigned SubRegIdx, RemIdx; 2720 std::tie(SubRegIdx, RemIdx) = 2721 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2722 VecVT, SubVecVT, OrigIdx, TRI); 2723 2724 RISCVVLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT); 2725 bool IsSubVecPartReg = SubVecLMUL == RISCVVLMUL::LMUL_F2 || 2726 SubVecLMUL == RISCVVLMUL::LMUL_F4 || 2727 SubVecLMUL == RISCVVLMUL::LMUL_F8; 2728 2729 // 1. If the Idx has been completely eliminated and this subvector's size is 2730 // a vector register or a multiple thereof, or the surrounding elements are 2731 // undef, then this is a subvector insert which naturally aligns to a vector 2732 // register. These can easily be handled using subregister manipulation. 2733 // 2. If the subvector is smaller than a vector register, then the insertion 2734 // must preserve the undisturbed elements of the register. We do this by 2735 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type 2736 // (which resolves to a subregister copy), performing a VSLIDEUP to place the 2737 // subvector within the vector register, and an INSERT_SUBVECTOR of that 2738 // LMUL=1 type back into the larger vector (resolving to another subregister 2739 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type 2740 // to avoid allocating a large register group to hold our subvector. 2741 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef())) 2742 return Op; 2743 2744 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements 2745 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy 2746 // (in our case undisturbed). This means we can set up a subvector insertion 2747 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the 2748 // size of the subvector. 2749 MVT InterSubVT = VecVT; 2750 SDValue AlignedExtract = Vec; 2751 unsigned AlignedIdx = OrigIdx - RemIdx; 2752 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 2753 InterSubVT = getLMUL1VT(VecVT); 2754 // Extract a subvector equal to the nearest full vector register type. This 2755 // should resolve to a EXTRACT_SUBREG instruction. 2756 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 2757 DAG.getConstant(AlignedIdx, DL, XLenVT)); 2758 } 2759 2760 SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT); 2761 // For scalable vectors this must be further multiplied by vscale. 2762 SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt); 2763 2764 SDValue Mask, VL; 2765 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 2766 2767 // Construct the vector length corresponding to RemIdx + length(SubVecVT). 2768 VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT); 2769 VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL); 2770 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL); 2771 2772 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT, 2773 DAG.getUNDEF(InterSubVT), SubVec, 2774 DAG.getConstant(0, DL, XLenVT)); 2775 2776 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT, 2777 AlignedExtract, SubVec, SlideupAmt, Mask, VL); 2778 2779 // If required, insert this subvector back into the correct vector register. 2780 // This should resolve to an INSERT_SUBREG instruction. 2781 if (VecVT.bitsGT(InterSubVT)) 2782 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup, 2783 DAG.getConstant(AlignedIdx, DL, XLenVT)); 2784 2785 // We might have bitcast from a mask type: cast back to the original type if 2786 // required. 2787 return DAG.getBitcast(Op.getSimpleValueType(), Slideup); 2788 } 2789 2790 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, 2791 SelectionDAG &DAG) const { 2792 SDValue Vec = Op.getOperand(0); 2793 MVT SubVecVT = Op.getSimpleValueType(); 2794 MVT VecVT = Vec.getSimpleValueType(); 2795 2796 SDLoc DL(Op); 2797 MVT XLenVT = Subtarget.getXLenVT(); 2798 unsigned OrigIdx = Op.getConstantOperandVal(1); 2799 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2800 2801 // We don't have the ability to slide mask vectors down indexed by their i1 2802 // elements; the smallest we can do is i8. Often we are able to bitcast to 2803 // equivalent i8 vectors. Note that when extracting a fixed-length vector 2804 // from a scalable one, we might not necessarily have enough scalable 2805 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. 2806 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) { 2807 if (VecVT.getVectorMinNumElements() >= 8 && 2808 SubVecVT.getVectorMinNumElements() >= 8) { 2809 assert(OrigIdx % 8 == 0 && "Invalid index"); 2810 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 2811 SubVecVT.getVectorMinNumElements() % 8 == 0 && 2812 "Unexpected mask vector lowering"); 2813 OrigIdx /= 8; 2814 SubVecVT = 2815 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 2816 SubVecVT.isScalableVector()); 2817 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 2818 VecVT.isScalableVector()); 2819 Vec = DAG.getBitcast(VecVT, Vec); 2820 } else { 2821 // We can't slide this mask vector down, indexed by its i1 elements. 2822 // This poses a problem when we wish to extract a scalable vector which 2823 // can't be re-expressed as a larger type. Just choose the slow path and 2824 // extend to a larger type, then truncate back down. 2825 // TODO: We could probably improve this when extracting certain fixed 2826 // from fixed, where we can extract as i8 and shift the correct element 2827 // right to reach the desired subvector? 2828 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 2829 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 2830 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 2831 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec, 2832 Op.getOperand(1)); 2833 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT); 2834 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE); 2835 } 2836 } 2837 2838 // If the subvector vector is a fixed-length type, we cannot use subregister 2839 // manipulation to simplify the codegen; we don't know which register of a 2840 // LMUL group contains the specific subvector as we only know the minimum 2841 // register size. Therefore we must slide the vector group down the full 2842 // amount. 2843 if (SubVecVT.isFixedLengthVector()) { 2844 // With an index of 0 this is a cast-like subvector, which can be performed 2845 // with subregister operations. 2846 if (OrigIdx == 0) 2847 return Op; 2848 MVT ContainerVT = VecVT; 2849 if (VecVT.isFixedLengthVector()) { 2850 ContainerVT = getContainerForFixedLengthVector(VecVT); 2851 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2852 } 2853 SDValue Mask = 2854 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 2855 // Set the vector length to only the number of elements we care about. This 2856 // avoids sliding down elements we're going to discard straight away. 2857 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT); 2858 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 2859 SDValue Slidedown = 2860 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 2861 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL); 2862 // Now we can use a cast-like subvector extract to get the result. 2863 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 2864 DAG.getConstant(0, DL, XLenVT)); 2865 } 2866 2867 unsigned SubRegIdx, RemIdx; 2868 std::tie(SubRegIdx, RemIdx) = 2869 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2870 VecVT, SubVecVT, OrigIdx, TRI); 2871 2872 // If the Idx has been completely eliminated then this is a subvector extract 2873 // which naturally aligns to a vector register. These can easily be handled 2874 // using subregister manipulation. 2875 if (RemIdx == 0) 2876 return Op; 2877 2878 // Else we must shift our vector register directly to extract the subvector. 2879 // Do this using VSLIDEDOWN. 2880 2881 // If the vector type is an LMUL-group type, extract a subvector equal to the 2882 // nearest full vector register type. This should resolve to a EXTRACT_SUBREG 2883 // instruction. 2884 MVT InterSubVT = VecVT; 2885 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 2886 InterSubVT = getLMUL1VT(VecVT); 2887 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 2888 DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT)); 2889 } 2890 2891 // Slide this vector register down by the desired number of elements in order 2892 // to place the desired subvector starting at element 0. 2893 SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT); 2894 // For scalable vectors this must be further multiplied by vscale. 2895 SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt); 2896 2897 SDValue Mask, VL; 2898 std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); 2899 SDValue Slidedown = 2900 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT, 2901 DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL); 2902 2903 // Now the vector is in the right position, extract our final subvector. This 2904 // should resolve to a COPY. 2905 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 2906 DAG.getConstant(0, DL, XLenVT)); 2907 2908 // We might have bitcast from a mask type: cast back to the original type if 2909 // required. 2910 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown); 2911 } 2912 2913 // Implement vector_reverse using vrgather.vv with indices determined by 2914 // subtracting the id of each element from (VLMAX-1). This will convert 2915 // the indices like so: 2916 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). 2917 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. 2918 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, 2919 SelectionDAG &DAG) const { 2920 SDLoc DL(Op); 2921 MVT VecVT = Op.getSimpleValueType(); 2922 unsigned EltSize = VecVT.getScalarSizeInBits(); 2923 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); 2924 2925 unsigned MaxVLMAX = 0; 2926 unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits(); 2927 if (VectorBitsMax != 0) 2928 MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; 2929 2930 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; 2931 MVT IntVT = VecVT.changeVectorElementTypeToInteger(); 2932 2933 // If this is SEW=8 and VLMAX is unknown or more than 256, we need 2934 // to use vrgatherei16.vv. 2935 // TODO: It's also possible to use vrgatherei16.vv for other types to 2936 // decrease register width for the index calculation. 2937 if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) { 2938 // If this is LMUL=8, we have to split before can use vrgatherei16.vv. 2939 // Reverse each half, then reassemble them in reverse order. 2940 // NOTE: It's also possible that after splitting that VLMAX no longer 2941 // requires vrgatherei16.vv. 2942 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { 2943 SDValue Lo, Hi; 2944 std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0); 2945 EVT LoVT, HiVT; 2946 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); 2947 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); 2948 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); 2949 // Reassemble the low and high pieces reversed. 2950 // FIXME: This is a CONCAT_VECTORS. 2951 SDValue Res = 2952 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi, 2953 DAG.getIntPtrConstant(0, DL)); 2954 return DAG.getNode( 2955 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo, 2956 DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL)); 2957 } 2958 2959 // Just promote the int type to i16 which will double the LMUL. 2960 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount()); 2961 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; 2962 } 2963 2964 MVT XLenVT = Subtarget.getXLenVT(); 2965 SDValue Mask, VL; 2966 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 2967 2968 // Calculate VLMAX-1 for the desired SEW. 2969 unsigned MinElts = VecVT.getVectorMinNumElements(); 2970 SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, 2971 DAG.getConstant(MinElts, DL, XLenVT)); 2972 SDValue VLMinus1 = 2973 DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT)); 2974 2975 // Splat VLMAX-1 taking care to handle SEW==64 on RV32. 2976 bool IsRV32E64 = 2977 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64; 2978 SDValue SplatVL; 2979 if (!IsRV32E64) 2980 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1); 2981 else 2982 SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1); 2983 2984 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL); 2985 SDValue Indices = 2986 DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL); 2987 2988 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL); 2989 } 2990 2991 SDValue 2992 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, 2993 SelectionDAG &DAG) const { 2994 auto *Load = cast<LoadSDNode>(Op); 2995 2996 SDLoc DL(Op); 2997 MVT VT = Op.getSimpleValueType(); 2998 MVT ContainerVT = getContainerForFixedLengthVector(VT); 2999 3000 SDValue VL = 3001 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 3002 3003 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 3004 SDValue NewLoad = DAG.getMemIntrinsicNode( 3005 RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL}, 3006 Load->getMemoryVT(), Load->getMemOperand()); 3007 3008 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 3009 return DAG.getMergeValues({Result, Load->getChain()}, DL); 3010 } 3011 3012 SDValue 3013 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, 3014 SelectionDAG &DAG) const { 3015 auto *Store = cast<StoreSDNode>(Op); 3016 3017 SDLoc DL(Op); 3018 MVT VT = Store->getValue().getSimpleValueType(); 3019 3020 // FIXME: We probably need to zero any extra bits in a byte for mask stores. 3021 // This is tricky to do. 3022 3023 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3024 3025 SDValue VL = 3026 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 3027 3028 SDValue NewValue = 3029 convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget); 3030 return DAG.getMemIntrinsicNode( 3031 RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other), 3032 {Store->getChain(), NewValue, Store->getBasePtr(), VL}, 3033 Store->getMemoryVT(), Store->getMemOperand()); 3034 } 3035 3036 SDValue 3037 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, 3038 SelectionDAG &DAG) const { 3039 MVT InVT = Op.getOperand(0).getSimpleValueType(); 3040 MVT ContainerVT = getContainerForFixedLengthVector(InVT); 3041 3042 MVT VT = Op.getSimpleValueType(); 3043 3044 SDValue Op1 = 3045 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 3046 SDValue Op2 = 3047 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 3048 3049 SDLoc DL(Op); 3050 SDValue VL = 3051 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 3052 3053 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 3054 3055 bool Invert = false; 3056 Optional<unsigned> LogicOpc; 3057 if (ContainerVT.isFloatingPoint()) { 3058 bool Swap = false; 3059 switch (CC) { 3060 default: 3061 break; 3062 case ISD::SETULE: 3063 case ISD::SETULT: 3064 Swap = true; 3065 LLVM_FALLTHROUGH; 3066 case ISD::SETUGE: 3067 case ISD::SETUGT: 3068 CC = getSetCCInverse(CC, ContainerVT); 3069 Invert = true; 3070 break; 3071 case ISD::SETOGE: 3072 case ISD::SETOGT: 3073 case ISD::SETGE: 3074 case ISD::SETGT: 3075 Swap = true; 3076 break; 3077 case ISD::SETUEQ: 3078 // Use !((OLT Op1, Op2) || (OLT Op2, Op1)) 3079 Invert = true; 3080 LogicOpc = RISCVISD::VMOR_VL; 3081 CC = ISD::SETOLT; 3082 break; 3083 case ISD::SETONE: 3084 // Use ((OLT Op1, Op2) || (OLT Op2, Op1)) 3085 LogicOpc = RISCVISD::VMOR_VL; 3086 CC = ISD::SETOLT; 3087 break; 3088 case ISD::SETO: 3089 // Use (OEQ Op1, Op1) && (OEQ Op2, Op2) 3090 LogicOpc = RISCVISD::VMAND_VL; 3091 CC = ISD::SETOEQ; 3092 break; 3093 case ISD::SETUO: 3094 // Use (UNE Op1, Op1) || (UNE Op2, Op2) 3095 LogicOpc = RISCVISD::VMOR_VL; 3096 CC = ISD::SETUNE; 3097 break; 3098 } 3099 3100 if (Swap) { 3101 CC = getSetCCSwappedOperands(CC); 3102 std::swap(Op1, Op2); 3103 } 3104 } 3105 3106 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3107 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 3108 3109 // There are 3 cases we need to emit. 3110 // 1. For (OEQ Op1, Op1) && (OEQ Op2, Op2) or (UNE Op1, Op1) || (UNE Op2, Op2) 3111 // we need to compare each operand with itself. 3112 // 2. For (OLT Op1, Op2) || (OLT Op2, Op1) we need to compare Op1 and Op2 in 3113 // both orders. 3114 // 3. For any other case we just need one compare with Op1 and Op2. 3115 SDValue Cmp; 3116 if (LogicOpc && (CC == ISD::SETOEQ || CC == ISD::SETUNE)) { 3117 Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op1, 3118 DAG.getCondCode(CC), Mask, VL); 3119 SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op2, 3120 DAG.getCondCode(CC), Mask, VL); 3121 Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL); 3122 } else { 3123 Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2, 3124 DAG.getCondCode(CC), Mask, VL); 3125 if (LogicOpc) { 3126 SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op1, 3127 DAG.getCondCode(CC), Mask, VL); 3128 Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL); 3129 } 3130 } 3131 3132 if (Invert) { 3133 SDValue AllOnes = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 3134 Cmp = DAG.getNode(RISCVISD::VMXOR_VL, DL, MaskVT, Cmp, AllOnes, VL); 3135 } 3136 3137 return convertFromScalableVector(VT, Cmp, DAG, Subtarget); 3138 } 3139 3140 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV( 3141 SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const { 3142 MVT VT = Op.getSimpleValueType(); 3143 3144 if (VT.getVectorElementType() == MVT::i1) 3145 return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false); 3146 3147 return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true); 3148 } 3149 3150 // Lower vector ABS to smax(X, sub(0, X)). 3151 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { 3152 SDLoc DL(Op); 3153 MVT VT = Op.getSimpleValueType(); 3154 SDValue X = Op.getOperand(0); 3155 3156 assert(VT.isFixedLengthVector() && "Unexpected type"); 3157 3158 MVT ContainerVT = 3159 RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); 3160 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); 3161 3162 SDValue Mask, VL; 3163 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3164 3165 SDValue SplatZero = 3166 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 3167 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 3168 SDValue NegX = 3169 DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL); 3170 SDValue Max = 3171 DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL); 3172 3173 return convertFromScalableVector(VT, Max, DAG, Subtarget); 3174 } 3175 3176 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( 3177 SDValue Op, SelectionDAG &DAG) const { 3178 MVT VT = Op.getSimpleValueType(); 3179 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3180 3181 MVT I1ContainerVT = 3182 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3183 3184 SDValue CC = 3185 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget); 3186 SDValue Op1 = 3187 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 3188 SDValue Op2 = 3189 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget); 3190 3191 SDLoc DL(Op); 3192 SDValue Mask, VL; 3193 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3194 3195 SDValue Select = 3196 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL); 3197 3198 return convertFromScalableVector(VT, Select, DAG, Subtarget); 3199 } 3200 3201 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, 3202 unsigned NewOpc, 3203 bool HasMask) const { 3204 MVT VT = Op.getSimpleValueType(); 3205 assert(useRVVForFixedLengthVectorVT(VT) && 3206 "Only expected to lower fixed length vector operation!"); 3207 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3208 3209 // Create list of operands by converting existing ones to scalable types. 3210 SmallVector<SDValue, 6> Ops; 3211 for (const SDValue &V : Op->op_values()) { 3212 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 3213 3214 // Pass through non-vector operands. 3215 if (!V.getValueType().isVector()) { 3216 Ops.push_back(V); 3217 continue; 3218 } 3219 3220 // "cast" fixed length vector to a scalable vector. 3221 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && 3222 "Only fixed length vectors are supported!"); 3223 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 3224 } 3225 3226 SDLoc DL(Op); 3227 SDValue Mask, VL; 3228 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3229 if (HasMask) 3230 Ops.push_back(Mask); 3231 Ops.push_back(VL); 3232 3233 SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops); 3234 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 3235 } 3236 3237 // Returns the opcode of the target-specific SDNode that implements the 32-bit 3238 // form of the given Opcode. 3239 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 3240 switch (Opcode) { 3241 default: 3242 llvm_unreachable("Unexpected opcode"); 3243 case ISD::SHL: 3244 return RISCVISD::SLLW; 3245 case ISD::SRA: 3246 return RISCVISD::SRAW; 3247 case ISD::SRL: 3248 return RISCVISD::SRLW; 3249 case ISD::SDIV: 3250 return RISCVISD::DIVW; 3251 case ISD::UDIV: 3252 return RISCVISD::DIVUW; 3253 case ISD::UREM: 3254 return RISCVISD::REMUW; 3255 case ISD::ROTL: 3256 return RISCVISD::ROLW; 3257 case ISD::ROTR: 3258 return RISCVISD::RORW; 3259 case RISCVISD::GREVI: 3260 return RISCVISD::GREVIW; 3261 case RISCVISD::GORCI: 3262 return RISCVISD::GORCIW; 3263 } 3264 } 3265 3266 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 3267 // Because i32 isn't a legal type for RV64, these operations would otherwise 3268 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 3269 // later one because the fact the operation was originally of type i32 is 3270 // lost. 3271 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 3272 unsigned ExtOpc = ISD::ANY_EXTEND) { 3273 SDLoc DL(N); 3274 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 3275 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 3276 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 3277 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 3278 // ReplaceNodeResults requires we maintain the same type for the return value. 3279 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 3280 } 3281 3282 // Converts the given 32-bit operation to a i64 operation with signed extension 3283 // semantic to reduce the signed extension instructions. 3284 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 3285 SDLoc DL(N); 3286 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3287 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 3288 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 3289 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 3290 DAG.getValueType(MVT::i32)); 3291 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 3292 } 3293 3294 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 3295 SmallVectorImpl<SDValue> &Results, 3296 SelectionDAG &DAG) const { 3297 SDLoc DL(N); 3298 switch (N->getOpcode()) { 3299 default: 3300 llvm_unreachable("Don't know how to custom type legalize this operation!"); 3301 case ISD::STRICT_FP_TO_SINT: 3302 case ISD::STRICT_FP_TO_UINT: 3303 case ISD::FP_TO_SINT: 3304 case ISD::FP_TO_UINT: { 3305 bool IsStrict = N->isStrictFPOpcode(); 3306 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3307 "Unexpected custom legalisation"); 3308 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 3309 // If the FP type needs to be softened, emit a library call using the 'si' 3310 // version. If we left it to default legalization we'd end up with 'di'. If 3311 // the FP type doesn't need to be softened just let generic type 3312 // legalization promote the result type. 3313 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 3314 TargetLowering::TypeSoftenFloat) 3315 return; 3316 RTLIB::Libcall LC; 3317 if (N->getOpcode() == ISD::FP_TO_SINT || 3318 N->getOpcode() == ISD::STRICT_FP_TO_SINT) 3319 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 3320 else 3321 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 3322 MakeLibCallOptions CallOptions; 3323 EVT OpVT = Op0.getValueType(); 3324 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 3325 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 3326 SDValue Result; 3327 std::tie(Result, Chain) = 3328 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 3329 Results.push_back(Result); 3330 if (IsStrict) 3331 Results.push_back(Chain); 3332 break; 3333 } 3334 case ISD::READCYCLECOUNTER: { 3335 assert(!Subtarget.is64Bit() && 3336 "READCYCLECOUNTER only has custom type legalization on riscv32"); 3337 3338 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 3339 SDValue RCW = 3340 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 3341 3342 Results.push_back( 3343 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 3344 Results.push_back(RCW.getValue(2)); 3345 break; 3346 } 3347 case ISD::ADD: 3348 case ISD::SUB: 3349 case ISD::MUL: 3350 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3351 "Unexpected custom legalisation"); 3352 if (N->getOperand(1).getOpcode() == ISD::Constant) 3353 return; 3354 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 3355 break; 3356 case ISD::SHL: 3357 case ISD::SRA: 3358 case ISD::SRL: 3359 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3360 "Unexpected custom legalisation"); 3361 if (N->getOperand(1).getOpcode() == ISD::Constant) 3362 return; 3363 Results.push_back(customLegalizeToWOp(N, DAG)); 3364 break; 3365 case ISD::ROTL: 3366 case ISD::ROTR: 3367 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3368 "Unexpected custom legalisation"); 3369 Results.push_back(customLegalizeToWOp(N, DAG)); 3370 break; 3371 case ISD::SDIV: 3372 case ISD::UDIV: 3373 case ISD::UREM: { 3374 MVT VT = N->getSimpleValueType(0); 3375 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 3376 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 3377 "Unexpected custom legalisation"); 3378 if (N->getOperand(0).getOpcode() == ISD::Constant || 3379 N->getOperand(1).getOpcode() == ISD::Constant) 3380 return; 3381 3382 // If the input is i32, use ANY_EXTEND since the W instructions don't read 3383 // the upper 32 bits. For other types we need to sign or zero extend 3384 // based on the opcode. 3385 unsigned ExtOpc = ISD::ANY_EXTEND; 3386 if (VT != MVT::i32) 3387 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 3388 : ISD::ZERO_EXTEND; 3389 3390 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 3391 break; 3392 } 3393 case ISD::BITCAST: { 3394 assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3395 Subtarget.hasStdExtF()) || 3396 (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) && 3397 "Unexpected custom legalisation"); 3398 SDValue Op0 = N->getOperand(0); 3399 if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) { 3400 if (Op0.getValueType() != MVT::f16) 3401 return; 3402 SDValue FPConv = 3403 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0); 3404 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 3405 } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3406 Subtarget.hasStdExtF()) { 3407 if (Op0.getValueType() != MVT::f32) 3408 return; 3409 SDValue FPConv = 3410 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 3411 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 3412 } 3413 break; 3414 } 3415 case RISCVISD::GREVI: 3416 case RISCVISD::GORCI: { 3417 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3418 "Unexpected custom legalisation"); 3419 // This is similar to customLegalizeToWOp, except that we pass the second 3420 // operand (a TargetConstant) straight through: it is already of type 3421 // XLenVT. 3422 SDLoc DL(N); 3423 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 3424 SDValue NewOp0 = 3425 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3426 SDValue NewRes = 3427 DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1)); 3428 // ReplaceNodeResults requires we maintain the same type for the return 3429 // value. 3430 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 3431 break; 3432 } 3433 case RISCVISD::SHFLI: { 3434 // There is no SHFLIW instruction, but we can just promote the operation. 3435 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3436 "Unexpected custom legalisation"); 3437 SDLoc DL(N); 3438 SDValue NewOp0 = 3439 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3440 SDValue NewRes = 3441 DAG.getNode(RISCVISD::SHFLI, DL, MVT::i64, NewOp0, N->getOperand(1)); 3442 // ReplaceNodeResults requires we maintain the same type for the return 3443 // value. 3444 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 3445 break; 3446 } 3447 case ISD::BSWAP: 3448 case ISD::BITREVERSE: { 3449 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3450 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 3451 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 3452 N->getOperand(0)); 3453 unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24; 3454 SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0, 3455 DAG.getTargetConstant(Imm, DL, 3456 Subtarget.getXLenVT())); 3457 // ReplaceNodeResults requires we maintain the same type for the return 3458 // value. 3459 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW)); 3460 break; 3461 } 3462 case ISD::FSHL: 3463 case ISD::FSHR: { 3464 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3465 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); 3466 SDValue NewOp0 = 3467 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3468 SDValue NewOp1 = 3469 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 3470 SDValue NewOp2 = 3471 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 3472 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. 3473 // Mask the shift amount to 5 bits. 3474 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, 3475 DAG.getConstant(0x1f, DL, MVT::i64)); 3476 unsigned Opc = 3477 N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; 3478 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); 3479 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); 3480 break; 3481 } 3482 case ISD::EXTRACT_VECTOR_ELT: { 3483 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 3484 // type is illegal (currently only vXi64 RV32). 3485 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 3486 // transferred to the destination register. We issue two of these from the 3487 // upper- and lower- halves of the SEW-bit vector element, slid down to the 3488 // first element. 3489 SDLoc DL(N); 3490 SDValue Vec = N->getOperand(0); 3491 SDValue Idx = N->getOperand(1); 3492 3493 // The vector type hasn't been legalized yet so we can't issue target 3494 // specific nodes if it needs legalization. 3495 // FIXME: We would manually legalize if it's important. 3496 if (!isTypeLegal(Vec.getValueType())) 3497 return; 3498 3499 MVT VecVT = Vec.getSimpleValueType(); 3500 3501 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 3502 VecVT.getVectorElementType() == MVT::i64 && 3503 "Unexpected EXTRACT_VECTOR_ELT legalization"); 3504 3505 // If this is a fixed vector, we need to convert it to a scalable vector. 3506 MVT ContainerVT = VecVT; 3507 if (VecVT.isFixedLengthVector()) { 3508 ContainerVT = getContainerForFixedLengthVector(VecVT); 3509 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3510 } 3511 3512 MVT XLenVT = Subtarget.getXLenVT(); 3513 3514 // Use a VL of 1 to avoid processing more elements than we need. 3515 MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); 3516 SDValue VL = DAG.getConstant(1, DL, XLenVT); 3517 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 3518 3519 // Unless the index is known to be 0, we must slide the vector down to get 3520 // the desired element into index 0. 3521 if (!isNullConstant(Idx)) { 3522 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 3523 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 3524 } 3525 3526 // Extract the lower XLEN bits of the correct vector element. 3527 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 3528 3529 // To extract the upper XLEN bits of the vector element, shift the first 3530 // element right by 32 bits and re-extract the lower XLEN bits. 3531 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 3532 DAG.getConstant(32, DL, XLenVT), VL); 3533 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, 3534 ThirtyTwoV, Mask, VL); 3535 3536 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 3537 3538 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 3539 break; 3540 } 3541 case ISD::INTRINSIC_WO_CHAIN: { 3542 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 3543 switch (IntNo) { 3544 default: 3545 llvm_unreachable( 3546 "Don't know how to custom type legalize this intrinsic!"); 3547 case Intrinsic::riscv_vmv_x_s: { 3548 EVT VT = N->getValueType(0); 3549 assert((VT == MVT::i8 || VT == MVT::i16 || 3550 (Subtarget.is64Bit() && VT == MVT::i32)) && 3551 "Unexpected custom legalisation!"); 3552 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 3553 Subtarget.getXLenVT(), N->getOperand(1)); 3554 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 3555 break; 3556 } 3557 } 3558 break; 3559 } 3560 case ISD::VECREDUCE_ADD: 3561 case ISD::VECREDUCE_AND: 3562 case ISD::VECREDUCE_OR: 3563 case ISD::VECREDUCE_XOR: 3564 case ISD::VECREDUCE_SMAX: 3565 case ISD::VECREDUCE_UMAX: 3566 case ISD::VECREDUCE_SMIN: 3567 case ISD::VECREDUCE_UMIN: 3568 // The custom-lowering for these nodes returns a vector whose first element 3569 // is the result of the reduction. Extract its first element and let the 3570 // legalization for EXTRACT_VECTOR_ELT do the rest of the job. 3571 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG)) 3572 Results.push_back(V); 3573 break; 3574 } 3575 } 3576 3577 // A structure to hold one of the bit-manipulation patterns below. Together, a 3578 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source: 3579 // (or (and (shl x, 1), 0xAAAAAAAA), 3580 // (and (srl x, 1), 0x55555555)) 3581 struct RISCVBitmanipPat { 3582 SDValue Op; 3583 unsigned ShAmt; 3584 bool IsSHL; 3585 3586 bool formsPairWith(const RISCVBitmanipPat &Other) const { 3587 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; 3588 } 3589 }; 3590 3591 // Matches patterns of the form 3592 // (and (shl x, C2), (C1 << C2)) 3593 // (and (srl x, C2), C1) 3594 // (shl (and x, C1), C2) 3595 // (srl (and x, (C1 << C2)), C2) 3596 // Where C2 is a power of 2 and C1 has at least that many leading zeroes. 3597 // The expected masks for each shift amount are specified in BitmanipMasks where 3598 // BitmanipMasks[log2(C2)] specifies the expected C1 value. 3599 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether 3600 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible 3601 // XLen is 64. 3602 static Optional<RISCVBitmanipPat> 3603 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) { 3604 assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) && 3605 "Unexpected number of masks"); 3606 Optional<uint64_t> Mask; 3607 // Optionally consume a mask around the shift operation. 3608 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { 3609 Mask = Op.getConstantOperandVal(1); 3610 Op = Op.getOperand(0); 3611 } 3612 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) 3613 return None; 3614 bool IsSHL = Op.getOpcode() == ISD::SHL; 3615 3616 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3617 return None; 3618 uint64_t ShAmt = Op.getConstantOperandVal(1); 3619 3620 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 3621 if (ShAmt >= Width && !isPowerOf2_64(ShAmt)) 3622 return None; 3623 // If we don't have enough masks for 64 bit, then we must be trying to 3624 // match SHFL so we're only allowed to shift 1/4 of the width. 3625 if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2)) 3626 return None; 3627 3628 SDValue Src = Op.getOperand(0); 3629 3630 // The expected mask is shifted left when the AND is found around SHL 3631 // patterns. 3632 // ((x >> 1) & 0x55555555) 3633 // ((x << 1) & 0xAAAAAAAA) 3634 bool SHLExpMask = IsSHL; 3635 3636 if (!Mask) { 3637 // Sometimes LLVM keeps the mask as an operand of the shift, typically when 3638 // the mask is all ones: consume that now. 3639 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { 3640 Mask = Src.getConstantOperandVal(1); 3641 Src = Src.getOperand(0); 3642 // The expected mask is now in fact shifted left for SRL, so reverse the 3643 // decision. 3644 // ((x & 0xAAAAAAAA) >> 1) 3645 // ((x & 0x55555555) << 1) 3646 SHLExpMask = !SHLExpMask; 3647 } else { 3648 // Use a default shifted mask of all-ones if there's no AND, truncated 3649 // down to the expected width. This simplifies the logic later on. 3650 Mask = maskTrailingOnes<uint64_t>(Width); 3651 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); 3652 } 3653 } 3654 3655 unsigned MaskIdx = Log2_32(ShAmt); 3656 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 3657 3658 if (SHLExpMask) 3659 ExpMask <<= ShAmt; 3660 3661 if (Mask != ExpMask) 3662 return None; 3663 3664 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; 3665 } 3666 3667 // Matches any of the following bit-manipulation patterns: 3668 // (and (shl x, 1), (0x55555555 << 1)) 3669 // (and (srl x, 1), 0x55555555) 3670 // (shl (and x, 0x55555555), 1) 3671 // (srl (and x, (0x55555555 << 1)), 1) 3672 // where the shift amount and mask may vary thus: 3673 // [1] = 0x55555555 / 0xAAAAAAAA 3674 // [2] = 0x33333333 / 0xCCCCCCCC 3675 // [4] = 0x0F0F0F0F / 0xF0F0F0F0 3676 // [8] = 0x00FF00FF / 0xFF00FF00 3677 // [16] = 0x0000FFFF / 0xFFFFFFFF 3678 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) 3679 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) { 3680 // These are the unshifted masks which we use to match bit-manipulation 3681 // patterns. They may be shifted left in certain circumstances. 3682 static const uint64_t BitmanipMasks[] = { 3683 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 3684 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; 3685 3686 return matchRISCVBitmanipPat(Op, BitmanipMasks); 3687 } 3688 3689 // Match the following pattern as a GREVI(W) operation 3690 // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) 3691 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, 3692 const RISCVSubtarget &Subtarget) { 3693 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 3694 EVT VT = Op.getValueType(); 3695 3696 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 3697 auto LHS = matchGREVIPat(Op.getOperand(0)); 3698 auto RHS = matchGREVIPat(Op.getOperand(1)); 3699 if (LHS && RHS && LHS->formsPairWith(*RHS)) { 3700 SDLoc DL(Op); 3701 return DAG.getNode( 3702 RISCVISD::GREVI, DL, VT, LHS->Op, 3703 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 3704 } 3705 } 3706 return SDValue(); 3707 } 3708 3709 // Matches any the following pattern as a GORCI(W) operation 3710 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2 3711 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2 3712 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) 3713 // Note that with the variant of 3., 3714 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) 3715 // the inner pattern will first be matched as GREVI and then the outer 3716 // pattern will be matched to GORC via the first rule above. 3717 // 4. (or (rotl/rotr x, bitwidth/2), x) 3718 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, 3719 const RISCVSubtarget &Subtarget) { 3720 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 3721 EVT VT = Op.getValueType(); 3722 3723 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 3724 SDLoc DL(Op); 3725 SDValue Op0 = Op.getOperand(0); 3726 SDValue Op1 = Op.getOperand(1); 3727 3728 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { 3729 if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X && 3730 isPowerOf2_32(Reverse.getConstantOperandVal(1))) 3731 return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1)); 3732 // We can also form GORCI from ROTL/ROTR by half the bitwidth. 3733 if ((Reverse.getOpcode() == ISD::ROTL || 3734 Reverse.getOpcode() == ISD::ROTR) && 3735 Reverse.getOperand(0) == X && 3736 isa<ConstantSDNode>(Reverse.getOperand(1))) { 3737 uint64_t RotAmt = Reverse.getConstantOperandVal(1); 3738 if (RotAmt == (VT.getSizeInBits() / 2)) 3739 return DAG.getNode( 3740 RISCVISD::GORCI, DL, VT, X, 3741 DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT())); 3742 } 3743 return SDValue(); 3744 }; 3745 3746 // Check for either commutable permutation of (or (GREVI x, shamt), x) 3747 if (SDValue V = MatchOROfReverse(Op0, Op1)) 3748 return V; 3749 if (SDValue V = MatchOROfReverse(Op1, Op0)) 3750 return V; 3751 3752 // OR is commutable so canonicalize its OR operand to the left 3753 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) 3754 std::swap(Op0, Op1); 3755 if (Op0.getOpcode() != ISD::OR) 3756 return SDValue(); 3757 SDValue OrOp0 = Op0.getOperand(0); 3758 SDValue OrOp1 = Op0.getOperand(1); 3759 auto LHS = matchGREVIPat(OrOp0); 3760 // OR is commutable so swap the operands and try again: x might have been 3761 // on the left 3762 if (!LHS) { 3763 std::swap(OrOp0, OrOp1); 3764 LHS = matchGREVIPat(OrOp0); 3765 } 3766 auto RHS = matchGREVIPat(Op1); 3767 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { 3768 return DAG.getNode( 3769 RISCVISD::GORCI, DL, VT, LHS->Op, 3770 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 3771 } 3772 } 3773 return SDValue(); 3774 } 3775 3776 // Matches any of the following bit-manipulation patterns: 3777 // (and (shl x, 1), (0x22222222 << 1)) 3778 // (and (srl x, 1), 0x22222222) 3779 // (shl (and x, 0x22222222), 1) 3780 // (srl (and x, (0x22222222 << 1)), 1) 3781 // where the shift amount and mask may vary thus: 3782 // [1] = 0x22222222 / 0x44444444 3783 // [2] = 0x0C0C0C0C / 0x3C3C3C3C 3784 // [4] = 0x00F000F0 / 0x0F000F00 3785 // [8] = 0x0000FF00 / 0x00FF0000 3786 // [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64) 3787 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) { 3788 // These are the unshifted masks which we use to match bit-manipulation 3789 // patterns. They may be shifted left in certain circumstances. 3790 static const uint64_t BitmanipMasks[] = { 3791 0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL, 3792 0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL}; 3793 3794 return matchRISCVBitmanipPat(Op, BitmanipMasks); 3795 } 3796 3797 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x) 3798 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG, 3799 const RISCVSubtarget &Subtarget) { 3800 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 3801 EVT VT = Op.getValueType(); 3802 3803 if (VT != MVT::i32 && VT != Subtarget.getXLenVT()) 3804 return SDValue(); 3805 3806 SDValue Op0 = Op.getOperand(0); 3807 SDValue Op1 = Op.getOperand(1); 3808 3809 // Or is commutable so canonicalize the second OR to the LHS. 3810 if (Op0.getOpcode() != ISD::OR) 3811 std::swap(Op0, Op1); 3812 if (Op0.getOpcode() != ISD::OR) 3813 return SDValue(); 3814 3815 // We found an inner OR, so our operands are the operands of the inner OR 3816 // and the other operand of the outer OR. 3817 SDValue A = Op0.getOperand(0); 3818 SDValue B = Op0.getOperand(1); 3819 SDValue C = Op1; 3820 3821 auto Match1 = matchSHFLPat(A); 3822 auto Match2 = matchSHFLPat(B); 3823 3824 // If neither matched, we failed. 3825 if (!Match1 && !Match2) 3826 return SDValue(); 3827 3828 // We had at least one match. if one failed, try the remaining C operand. 3829 if (!Match1) { 3830 std::swap(A, C); 3831 Match1 = matchSHFLPat(A); 3832 if (!Match1) 3833 return SDValue(); 3834 } else if (!Match2) { 3835 std::swap(B, C); 3836 Match2 = matchSHFLPat(B); 3837 if (!Match2) 3838 return SDValue(); 3839 } 3840 assert(Match1 && Match2); 3841 3842 // Make sure our matches pair up. 3843 if (!Match1->formsPairWith(*Match2)) 3844 return SDValue(); 3845 3846 // All the remains is to make sure C is an AND with the same input, that masks 3847 // out the bits that are being shuffled. 3848 if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) || 3849 C.getOperand(0) != Match1->Op) 3850 return SDValue(); 3851 3852 uint64_t Mask = C.getConstantOperandVal(1); 3853 3854 static const uint64_t BitmanipMasks[] = { 3855 0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL, 3856 0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL, 3857 }; 3858 3859 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 3860 unsigned MaskIdx = Log2_32(Match1->ShAmt); 3861 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 3862 3863 if (Mask != ExpMask) 3864 return SDValue(); 3865 3866 SDLoc DL(Op); 3867 return DAG.getNode( 3868 RISCVISD::SHFLI, DL, VT, Match1->Op, 3869 DAG.getTargetConstant(Match1->ShAmt, DL, Subtarget.getXLenVT())); 3870 } 3871 3872 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is 3873 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. 3874 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does 3875 // not undo itself, but they are redundant. 3876 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { 3877 unsigned ShAmt1 = N->getConstantOperandVal(1); 3878 SDValue Src = N->getOperand(0); 3879 3880 if (Src.getOpcode() != N->getOpcode()) 3881 return SDValue(); 3882 3883 unsigned ShAmt2 = Src.getConstantOperandVal(1); 3884 Src = Src.getOperand(0); 3885 3886 unsigned CombinedShAmt; 3887 if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW) 3888 CombinedShAmt = ShAmt1 | ShAmt2; 3889 else 3890 CombinedShAmt = ShAmt1 ^ ShAmt2; 3891 3892 if (CombinedShAmt == 0) 3893 return Src; 3894 3895 SDLoc DL(N); 3896 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src, 3897 DAG.getTargetConstant(CombinedShAmt, DL, 3898 N->getOperand(1).getValueType())); 3899 } 3900 3901 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 3902 DAGCombinerInfo &DCI) const { 3903 SelectionDAG &DAG = DCI.DAG; 3904 3905 switch (N->getOpcode()) { 3906 default: 3907 break; 3908 case RISCVISD::SplitF64: { 3909 SDValue Op0 = N->getOperand(0); 3910 // If the input to SplitF64 is just BuildPairF64 then the operation is 3911 // redundant. Instead, use BuildPairF64's operands directly. 3912 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 3913 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 3914 3915 SDLoc DL(N); 3916 3917 // It's cheaper to materialise two 32-bit integers than to load a double 3918 // from the constant pool and transfer it to integer registers through the 3919 // stack. 3920 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 3921 APInt V = C->getValueAPF().bitcastToAPInt(); 3922 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 3923 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 3924 return DCI.CombineTo(N, Lo, Hi); 3925 } 3926 3927 // This is a target-specific version of a DAGCombine performed in 3928 // DAGCombiner::visitBITCAST. It performs the equivalent of: 3929 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 3930 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 3931 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 3932 !Op0.getNode()->hasOneUse()) 3933 break; 3934 SDValue NewSplitF64 = 3935 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 3936 Op0.getOperand(0)); 3937 SDValue Lo = NewSplitF64.getValue(0); 3938 SDValue Hi = NewSplitF64.getValue(1); 3939 APInt SignBit = APInt::getSignMask(32); 3940 if (Op0.getOpcode() == ISD::FNEG) { 3941 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 3942 DAG.getConstant(SignBit, DL, MVT::i32)); 3943 return DCI.CombineTo(N, Lo, NewHi); 3944 } 3945 assert(Op0.getOpcode() == ISD::FABS); 3946 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 3947 DAG.getConstant(~SignBit, DL, MVT::i32)); 3948 return DCI.CombineTo(N, Lo, NewHi); 3949 } 3950 case RISCVISD::SLLW: 3951 case RISCVISD::SRAW: 3952 case RISCVISD::SRLW: 3953 case RISCVISD::ROLW: 3954 case RISCVISD::RORW: { 3955 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 3956 SDValue LHS = N->getOperand(0); 3957 SDValue RHS = N->getOperand(1); 3958 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 3959 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 3960 if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) || 3961 SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) { 3962 if (N->getOpcode() != ISD::DELETED_NODE) 3963 DCI.AddToWorklist(N); 3964 return SDValue(N, 0); 3965 } 3966 break; 3967 } 3968 case RISCVISD::FSL: 3969 case RISCVISD::FSR: { 3970 // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read. 3971 SDValue ShAmt = N->getOperand(2); 3972 unsigned BitWidth = ShAmt.getValueSizeInBits(); 3973 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width"); 3974 APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1); 3975 if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 3976 if (N->getOpcode() != ISD::DELETED_NODE) 3977 DCI.AddToWorklist(N); 3978 return SDValue(N, 0); 3979 } 3980 break; 3981 } 3982 case RISCVISD::FSLW: 3983 case RISCVISD::FSRW: { 3984 // Only the lower 32 bits of Values and lower 6 bits of shift amount are 3985 // read. 3986 SDValue Op0 = N->getOperand(0); 3987 SDValue Op1 = N->getOperand(1); 3988 SDValue ShAmt = N->getOperand(2); 3989 APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 3990 APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6); 3991 if (SimplifyDemandedBits(Op0, OpMask, DCI) || 3992 SimplifyDemandedBits(Op1, OpMask, DCI) || 3993 SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 3994 if (N->getOpcode() != ISD::DELETED_NODE) 3995 DCI.AddToWorklist(N); 3996 return SDValue(N, 0); 3997 } 3998 break; 3999 } 4000 case RISCVISD::GREVIW: 4001 case RISCVISD::GORCIW: { 4002 // Only the lower 32 bits of the first operand are read 4003 SDValue Op0 = N->getOperand(0); 4004 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 4005 if (SimplifyDemandedBits(Op0, Mask, DCI)) { 4006 if (N->getOpcode() != ISD::DELETED_NODE) 4007 DCI.AddToWorklist(N); 4008 return SDValue(N, 0); 4009 } 4010 4011 return combineGREVI_GORCI(N, DCI.DAG); 4012 } 4013 case RISCVISD::FMV_X_ANYEXTW_RV64: { 4014 SDLoc DL(N); 4015 SDValue Op0 = N->getOperand(0); 4016 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 4017 // conversion is unnecessary and can be replaced with an ANY_EXTEND 4018 // of the FMV_W_X_RV64 operand. 4019 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 4020 assert(Op0.getOperand(0).getValueType() == MVT::i64 && 4021 "Unexpected value type!"); 4022 return Op0.getOperand(0); 4023 } 4024 4025 // This is a target-specific version of a DAGCombine performed in 4026 // DAGCombiner::visitBITCAST. It performs the equivalent of: 4027 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 4028 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 4029 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 4030 !Op0.getNode()->hasOneUse()) 4031 break; 4032 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 4033 Op0.getOperand(0)); 4034 APInt SignBit = APInt::getSignMask(32).sext(64); 4035 if (Op0.getOpcode() == ISD::FNEG) 4036 return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 4037 DAG.getConstant(SignBit, DL, MVT::i64)); 4038 4039 assert(Op0.getOpcode() == ISD::FABS); 4040 return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 4041 DAG.getConstant(~SignBit, DL, MVT::i64)); 4042 } 4043 case RISCVISD::GREVI: 4044 case RISCVISD::GORCI: 4045 return combineGREVI_GORCI(N, DCI.DAG); 4046 case ISD::OR: 4047 if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) 4048 return GREV; 4049 if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) 4050 return GORC; 4051 if (auto SHFL = combineORToSHFL(SDValue(N, 0), DCI.DAG, Subtarget)) 4052 return SHFL; 4053 break; 4054 case RISCVISD::SELECT_CC: { 4055 // Transform 4056 SDValue LHS = N->getOperand(0); 4057 SDValue RHS = N->getOperand(1); 4058 auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2)); 4059 if (!ISD::isIntEqualitySetCC(CCVal)) 4060 break; 4061 4062 // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) -> 4063 // (select_cc X, Y, lt, trueV, falseV) 4064 // Sometimes the setcc is introduced after select_cc has been formed. 4065 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && 4066 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { 4067 // If we're looking for eq 0 instead of ne 0, we need to invert the 4068 // condition. 4069 bool Invert = CCVal == ISD::SETEQ; 4070 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); 4071 if (Invert) 4072 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4073 4074 RHS = LHS.getOperand(1); 4075 LHS = LHS.getOperand(0); 4076 normaliseSetCC(LHS, RHS, CCVal); 4077 4078 SDLoc DL(N); 4079 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 4080 return DAG.getNode( 4081 RISCVISD::SELECT_CC, DL, N->getValueType(0), 4082 {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)}); 4083 } 4084 4085 // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) -> 4086 // (select_cc X, Y, eq/ne, trueV, falseV) 4087 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) 4088 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0), 4089 {LHS.getOperand(0), LHS.getOperand(1), 4090 N->getOperand(2), N->getOperand(3), 4091 N->getOperand(4)}); 4092 // (select_cc X, 1, setne, trueV, falseV) -> 4093 // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. 4094 // This can occur when legalizing some floating point comparisons. 4095 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 4096 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { 4097 SDLoc DL(N); 4098 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4099 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 4100 RHS = DAG.getConstant(0, DL, LHS.getValueType()); 4101 return DAG.getNode( 4102 RISCVISD::SELECT_CC, DL, N->getValueType(0), 4103 {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)}); 4104 } 4105 4106 break; 4107 } 4108 case ISD::SETCC: { 4109 // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1. 4110 // Comparing with 0 may allow us to fold into bnez/beqz. 4111 SDValue LHS = N->getOperand(0); 4112 SDValue RHS = N->getOperand(1); 4113 if (LHS.getValueType().isScalableVector()) 4114 break; 4115 auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 4116 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 4117 if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) && 4118 DAG.MaskedValueIsZero(LHS, Mask)) { 4119 SDLoc DL(N); 4120 SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType()); 4121 CC = ISD::getSetCCInverse(CC, LHS.getValueType()); 4122 return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC); 4123 } 4124 break; 4125 } 4126 case ISD::FCOPYSIGN: { 4127 EVT VT = N->getValueType(0); 4128 if (!VT.isVector()) 4129 break; 4130 // There is a form of VFSGNJ which injects the negated sign of its second 4131 // operand. Try and bubble any FNEG up after the extend/round to produce 4132 // this optimized pattern. Avoid modifying cases where FP_ROUND and 4133 // TRUNC=1. 4134 SDValue In2 = N->getOperand(1); 4135 // Avoid cases where the extend/round has multiple uses, as duplicating 4136 // those is typically more expensive than removing a fneg. 4137 if (!In2.hasOneUse()) 4138 break; 4139 if (In2.getOpcode() != ISD::FP_EXTEND && 4140 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0)) 4141 break; 4142 In2 = In2.getOperand(0); 4143 if (In2.getOpcode() != ISD::FNEG) 4144 break; 4145 SDLoc DL(N); 4146 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT); 4147 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), 4148 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); 4149 } 4150 } 4151 4152 return SDValue(); 4153 } 4154 4155 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 4156 const SDNode *N, CombineLevel Level) const { 4157 // The following folds are only desirable if `(OP _, c1 << c2)` can be 4158 // materialised in fewer instructions than `(OP _, c1)`: 4159 // 4160 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 4161 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 4162 SDValue N0 = N->getOperand(0); 4163 EVT Ty = N0.getValueType(); 4164 if (Ty.isScalarInteger() && 4165 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 4166 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 4167 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 4168 if (C1 && C2) { 4169 const APInt &C1Int = C1->getAPIntValue(); 4170 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 4171 4172 // We can materialise `c1 << c2` into an add immediate, so it's "free", 4173 // and the combine should happen, to potentially allow further combines 4174 // later. 4175 if (ShiftedC1Int.getMinSignedBits() <= 64 && 4176 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 4177 return true; 4178 4179 // We can materialise `c1` in an add immediate, so it's "free", and the 4180 // combine should be prevented. 4181 if (C1Int.getMinSignedBits() <= 64 && 4182 isLegalAddImmediate(C1Int.getSExtValue())) 4183 return false; 4184 4185 // Neither constant will fit into an immediate, so find materialisation 4186 // costs. 4187 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 4188 Subtarget.is64Bit()); 4189 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 4190 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); 4191 4192 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 4193 // combine should be prevented. 4194 if (C1Cost < ShiftedC1Cost) 4195 return false; 4196 } 4197 } 4198 return true; 4199 } 4200 4201 bool RISCVTargetLowering::targetShrinkDemandedConstant( 4202 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 4203 TargetLoweringOpt &TLO) const { 4204 // Delay this optimization as late as possible. 4205 if (!TLO.LegalOps) 4206 return false; 4207 4208 EVT VT = Op.getValueType(); 4209 if (VT.isVector()) 4210 return false; 4211 4212 // Only handle AND for now. 4213 if (Op.getOpcode() != ISD::AND) 4214 return false; 4215 4216 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 4217 if (!C) 4218 return false; 4219 4220 const APInt &Mask = C->getAPIntValue(); 4221 4222 // Clear all non-demanded bits initially. 4223 APInt ShrunkMask = Mask & DemandedBits; 4224 4225 // If the shrunk mask fits in sign extended 12 bits, let the target 4226 // independent code apply it. 4227 if (ShrunkMask.isSignedIntN(12)) 4228 return false; 4229 4230 // Try to make a smaller immediate by setting undemanded bits. 4231 4232 // We need to be able to make a negative number through a combination of mask 4233 // and undemanded bits. 4234 APInt ExpandedMask = Mask | ~DemandedBits; 4235 if (!ExpandedMask.isNegative()) 4236 return false; 4237 4238 // What is the fewest number of bits we need to represent the negative number. 4239 unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); 4240 4241 // Try to make a 12 bit negative immediate. If that fails try to make a 32 4242 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 4243 APInt NewMask = ShrunkMask; 4244 if (MinSignedBits <= 12) 4245 NewMask.setBitsFrom(11); 4246 else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 4247 NewMask.setBitsFrom(31); 4248 else 4249 return false; 4250 4251 // Sanity check that our new mask is a subset of the demanded mask. 4252 assert(NewMask.isSubsetOf(ExpandedMask)); 4253 4254 // If we aren't changing the mask, just return true to keep it and prevent 4255 // the caller from optimizing. 4256 if (NewMask == Mask) 4257 return true; 4258 4259 // Replace the constant with the new mask. 4260 SDLoc DL(Op); 4261 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); 4262 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); 4263 return TLO.CombineTo(Op, NewOp); 4264 } 4265 4266 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 4267 KnownBits &Known, 4268 const APInt &DemandedElts, 4269 const SelectionDAG &DAG, 4270 unsigned Depth) const { 4271 unsigned BitWidth = Known.getBitWidth(); 4272 unsigned Opc = Op.getOpcode(); 4273 assert((Opc >= ISD::BUILTIN_OP_END || 4274 Opc == ISD::INTRINSIC_WO_CHAIN || 4275 Opc == ISD::INTRINSIC_W_CHAIN || 4276 Opc == ISD::INTRINSIC_VOID) && 4277 "Should use MaskedValueIsZero if you don't know whether Op" 4278 " is a target node!"); 4279 4280 Known.resetAll(); 4281 switch (Opc) { 4282 default: break; 4283 case RISCVISD::REMUW: { 4284 KnownBits Known2; 4285 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4286 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4287 // We only care about the lower 32 bits. 4288 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 4289 // Restore the original width by sign extending. 4290 Known = Known.sext(BitWidth); 4291 break; 4292 } 4293 case RISCVISD::DIVUW: { 4294 KnownBits Known2; 4295 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4296 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4297 // We only care about the lower 32 bits. 4298 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 4299 // Restore the original width by sign extending. 4300 Known = Known.sext(BitWidth); 4301 break; 4302 } 4303 case RISCVISD::READ_VLENB: 4304 // We assume VLENB is at least 8 bytes. 4305 // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits. 4306 Known.Zero.setLowBits(3); 4307 break; 4308 } 4309 } 4310 4311 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 4312 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 4313 unsigned Depth) const { 4314 switch (Op.getOpcode()) { 4315 default: 4316 break; 4317 case RISCVISD::SLLW: 4318 case RISCVISD::SRAW: 4319 case RISCVISD::SRLW: 4320 case RISCVISD::DIVW: 4321 case RISCVISD::DIVUW: 4322 case RISCVISD::REMUW: 4323 case RISCVISD::ROLW: 4324 case RISCVISD::RORW: 4325 case RISCVISD::GREVIW: 4326 case RISCVISD::GORCIW: 4327 case RISCVISD::FSLW: 4328 case RISCVISD::FSRW: 4329 // TODO: As the result is sign-extended, this is conservatively correct. A 4330 // more precise answer could be calculated for SRAW depending on known 4331 // bits in the shift amount. 4332 return 33; 4333 case RISCVISD::SHFLI: { 4334 // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word 4335 // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but 4336 // will stay within the upper 32 bits. If there were more than 32 sign bits 4337 // before there will be at least 33 sign bits after. 4338 if (Op.getValueType() == MVT::i64 && 4339 (Op.getConstantOperandVal(1) & 0x10) == 0) { 4340 unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); 4341 if (Tmp > 32) 4342 return 33; 4343 } 4344 break; 4345 } 4346 case RISCVISD::VMV_X_S: 4347 // The number of sign bits of the scalar result is computed by obtaining the 4348 // element type of the input vector operand, subtracting its width from the 4349 // XLEN, and then adding one (sign bit within the element type). If the 4350 // element type is wider than XLen, the least-significant XLEN bits are 4351 // taken. 4352 if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen()) 4353 return 1; 4354 return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; 4355 } 4356 4357 return 1; 4358 } 4359 4360 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 4361 MachineBasicBlock *BB) { 4362 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 4363 4364 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 4365 // Should the count have wrapped while it was being read, we need to try 4366 // again. 4367 // ... 4368 // read: 4369 // rdcycleh x3 # load high word of cycle 4370 // rdcycle x2 # load low word of cycle 4371 // rdcycleh x4 # load high word of cycle 4372 // bne x3, x4, read # check if high word reads match, otherwise try again 4373 // ... 4374 4375 MachineFunction &MF = *BB->getParent(); 4376 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4377 MachineFunction::iterator It = ++BB->getIterator(); 4378 4379 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 4380 MF.insert(It, LoopMBB); 4381 4382 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 4383 MF.insert(It, DoneMBB); 4384 4385 // Transfer the remainder of BB and its successor edges to DoneMBB. 4386 DoneMBB->splice(DoneMBB->begin(), BB, 4387 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 4388 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 4389 4390 BB->addSuccessor(LoopMBB); 4391 4392 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4393 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 4394 Register LoReg = MI.getOperand(0).getReg(); 4395 Register HiReg = MI.getOperand(1).getReg(); 4396 DebugLoc DL = MI.getDebugLoc(); 4397 4398 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 4399 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 4400 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 4401 .addReg(RISCV::X0); 4402 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 4403 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 4404 .addReg(RISCV::X0); 4405 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 4406 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 4407 .addReg(RISCV::X0); 4408 4409 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 4410 .addReg(HiReg) 4411 .addReg(ReadAgainReg) 4412 .addMBB(LoopMBB); 4413 4414 LoopMBB->addSuccessor(LoopMBB); 4415 LoopMBB->addSuccessor(DoneMBB); 4416 4417 MI.eraseFromParent(); 4418 4419 return DoneMBB; 4420 } 4421 4422 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 4423 MachineBasicBlock *BB) { 4424 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 4425 4426 MachineFunction &MF = *BB->getParent(); 4427 DebugLoc DL = MI.getDebugLoc(); 4428 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 4429 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 4430 Register LoReg = MI.getOperand(0).getReg(); 4431 Register HiReg = MI.getOperand(1).getReg(); 4432 Register SrcReg = MI.getOperand(2).getReg(); 4433 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 4434 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 4435 4436 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 4437 RI); 4438 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 4439 MachineMemOperand *MMOLo = 4440 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 4441 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 4442 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 4443 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 4444 .addFrameIndex(FI) 4445 .addImm(0) 4446 .addMemOperand(MMOLo); 4447 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 4448 .addFrameIndex(FI) 4449 .addImm(4) 4450 .addMemOperand(MMOHi); 4451 MI.eraseFromParent(); // The pseudo instruction is gone now. 4452 return BB; 4453 } 4454 4455 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 4456 MachineBasicBlock *BB) { 4457 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 4458 "Unexpected instruction"); 4459 4460 MachineFunction &MF = *BB->getParent(); 4461 DebugLoc DL = MI.getDebugLoc(); 4462 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 4463 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 4464 Register DstReg = MI.getOperand(0).getReg(); 4465 Register LoReg = MI.getOperand(1).getReg(); 4466 Register HiReg = MI.getOperand(2).getReg(); 4467 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 4468 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 4469 4470 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 4471 MachineMemOperand *MMOLo = 4472 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 4473 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 4474 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 4475 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 4476 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 4477 .addFrameIndex(FI) 4478 .addImm(0) 4479 .addMemOperand(MMOLo); 4480 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 4481 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 4482 .addFrameIndex(FI) 4483 .addImm(4) 4484 .addMemOperand(MMOHi); 4485 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 4486 MI.eraseFromParent(); // The pseudo instruction is gone now. 4487 return BB; 4488 } 4489 4490 static bool isSelectPseudo(MachineInstr &MI) { 4491 switch (MI.getOpcode()) { 4492 default: 4493 return false; 4494 case RISCV::Select_GPR_Using_CC_GPR: 4495 case RISCV::Select_FPR16_Using_CC_GPR: 4496 case RISCV::Select_FPR32_Using_CC_GPR: 4497 case RISCV::Select_FPR64_Using_CC_GPR: 4498 return true; 4499 } 4500 } 4501 4502 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 4503 MachineBasicBlock *BB) { 4504 // To "insert" Select_* instructions, we actually have to insert the triangle 4505 // control-flow pattern. The incoming instructions know the destination vreg 4506 // to set, the condition code register to branch on, the true/false values to 4507 // select between, and the condcode to use to select the appropriate branch. 4508 // 4509 // We produce the following control flow: 4510 // HeadMBB 4511 // | \ 4512 // | IfFalseMBB 4513 // | / 4514 // TailMBB 4515 // 4516 // When we find a sequence of selects we attempt to optimize their emission 4517 // by sharing the control flow. Currently we only handle cases where we have 4518 // multiple selects with the exact same condition (same LHS, RHS and CC). 4519 // The selects may be interleaved with other instructions if the other 4520 // instructions meet some requirements we deem safe: 4521 // - They are debug instructions. Otherwise, 4522 // - They do not have side-effects, do not access memory and their inputs do 4523 // not depend on the results of the select pseudo-instructions. 4524 // The TrueV/FalseV operands of the selects cannot depend on the result of 4525 // previous selects in the sequence. 4526 // These conditions could be further relaxed. See the X86 target for a 4527 // related approach and more information. 4528 Register LHS = MI.getOperand(1).getReg(); 4529 Register RHS = MI.getOperand(2).getReg(); 4530 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 4531 4532 SmallVector<MachineInstr *, 4> SelectDebugValues; 4533 SmallSet<Register, 4> SelectDests; 4534 SelectDests.insert(MI.getOperand(0).getReg()); 4535 4536 MachineInstr *LastSelectPseudo = &MI; 4537 4538 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 4539 SequenceMBBI != E; ++SequenceMBBI) { 4540 if (SequenceMBBI->isDebugInstr()) 4541 continue; 4542 else if (isSelectPseudo(*SequenceMBBI)) { 4543 if (SequenceMBBI->getOperand(1).getReg() != LHS || 4544 SequenceMBBI->getOperand(2).getReg() != RHS || 4545 SequenceMBBI->getOperand(3).getImm() != CC || 4546 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 4547 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 4548 break; 4549 LastSelectPseudo = &*SequenceMBBI; 4550 SequenceMBBI->collectDebugValues(SelectDebugValues); 4551 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 4552 } else { 4553 if (SequenceMBBI->hasUnmodeledSideEffects() || 4554 SequenceMBBI->mayLoadOrStore()) 4555 break; 4556 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 4557 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 4558 })) 4559 break; 4560 } 4561 } 4562 4563 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 4564 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4565 DebugLoc DL = MI.getDebugLoc(); 4566 MachineFunction::iterator I = ++BB->getIterator(); 4567 4568 MachineBasicBlock *HeadMBB = BB; 4569 MachineFunction *F = BB->getParent(); 4570 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 4571 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 4572 4573 F->insert(I, IfFalseMBB); 4574 F->insert(I, TailMBB); 4575 4576 // Transfer debug instructions associated with the selects to TailMBB. 4577 for (MachineInstr *DebugInstr : SelectDebugValues) { 4578 TailMBB->push_back(DebugInstr->removeFromParent()); 4579 } 4580 4581 // Move all instructions after the sequence to TailMBB. 4582 TailMBB->splice(TailMBB->end(), HeadMBB, 4583 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 4584 // Update machine-CFG edges by transferring all successors of the current 4585 // block to the new block which will contain the Phi nodes for the selects. 4586 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 4587 // Set the successors for HeadMBB. 4588 HeadMBB->addSuccessor(IfFalseMBB); 4589 HeadMBB->addSuccessor(TailMBB); 4590 4591 // Insert appropriate branch. 4592 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 4593 4594 BuildMI(HeadMBB, DL, TII.get(Opcode)) 4595 .addReg(LHS) 4596 .addReg(RHS) 4597 .addMBB(TailMBB); 4598 4599 // IfFalseMBB just falls through to TailMBB. 4600 IfFalseMBB->addSuccessor(TailMBB); 4601 4602 // Create PHIs for all of the select pseudo-instructions. 4603 auto SelectMBBI = MI.getIterator(); 4604 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 4605 auto InsertionPoint = TailMBB->begin(); 4606 while (SelectMBBI != SelectEnd) { 4607 auto Next = std::next(SelectMBBI); 4608 if (isSelectPseudo(*SelectMBBI)) { 4609 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 4610 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 4611 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 4612 .addReg(SelectMBBI->getOperand(4).getReg()) 4613 .addMBB(HeadMBB) 4614 .addReg(SelectMBBI->getOperand(5).getReg()) 4615 .addMBB(IfFalseMBB); 4616 SelectMBBI->eraseFromParent(); 4617 } 4618 SelectMBBI = Next; 4619 } 4620 4621 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 4622 return TailMBB; 4623 } 4624 4625 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, 4626 int VLIndex, unsigned SEWIndex, 4627 RISCVVLMUL VLMul, bool ForceTailAgnostic) { 4628 MachineFunction &MF = *BB->getParent(); 4629 DebugLoc DL = MI.getDebugLoc(); 4630 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 4631 4632 unsigned SEW = MI.getOperand(SEWIndex).getImm(); 4633 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 4634 RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8)); 4635 4636 MachineRegisterInfo &MRI = MF.getRegInfo(); 4637 4638 auto BuildVSETVLI = [&]() { 4639 if (VLIndex >= 0) { 4640 Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 4641 Register VLReg = MI.getOperand(VLIndex).getReg(); 4642 4643 // VL might be a compile time constant, but isel would have to put it 4644 // in a register. See if VL comes from an ADDI X0, imm. 4645 if (VLReg.isVirtual()) { 4646 MachineInstr *Def = MRI.getVRegDef(VLReg); 4647 if (Def && Def->getOpcode() == RISCV::ADDI && 4648 Def->getOperand(1).getReg() == RISCV::X0 && 4649 Def->getOperand(2).isImm()) { 4650 uint64_t Imm = Def->getOperand(2).getImm(); 4651 // VSETIVLI allows a 5-bit zero extended immediate. 4652 if (isUInt<5>(Imm)) 4653 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI)) 4654 .addReg(DestReg, RegState::Define | RegState::Dead) 4655 .addImm(Imm); 4656 } 4657 } 4658 4659 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) 4660 .addReg(DestReg, RegState::Define | RegState::Dead) 4661 .addReg(VLReg); 4662 } 4663 4664 // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). 4665 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) 4666 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 4667 .addReg(RISCV::X0, RegState::Kill); 4668 }; 4669 4670 MachineInstrBuilder MIB = BuildVSETVLI(); 4671 4672 // Default to tail agnostic unless the destination is tied to a source. In 4673 // that case the user would have some control over the tail values. The tail 4674 // policy is also ignored on instructions that only update element 0 like 4675 // vmv.s.x or reductions so use agnostic there to match the common case. 4676 // FIXME: This is conservatively correct, but we might want to detect that 4677 // the input is undefined. 4678 bool TailAgnostic = true; 4679 unsigned UseOpIdx; 4680 if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 4681 TailAgnostic = false; 4682 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 4683 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 4684 MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); 4685 if (UseMI && UseMI->isImplicitDef()) 4686 TailAgnostic = true; 4687 } 4688 4689 // For simplicity we reuse the vtype representation here. 4690 MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth, 4691 /*TailAgnostic*/ TailAgnostic, 4692 /*MaskAgnostic*/ false)); 4693 4694 // Remove (now) redundant operands from pseudo 4695 if (VLIndex >= 0) { 4696 MI.getOperand(VLIndex).setReg(RISCV::NoRegister); 4697 MI.getOperand(VLIndex).setIsKill(false); 4698 } 4699 4700 return BB; 4701 } 4702 4703 MachineBasicBlock * 4704 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 4705 MachineBasicBlock *BB) const { 4706 uint64_t TSFlags = MI.getDesc().TSFlags; 4707 4708 if (TSFlags & RISCVII::HasSEWOpMask) { 4709 unsigned NumOperands = MI.getNumExplicitOperands(); 4710 int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1; 4711 unsigned SEWIndex = NumOperands - 1; 4712 bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask; 4713 4714 RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >> 4715 RISCVII::VLMulShift); 4716 return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic); 4717 } 4718 4719 switch (MI.getOpcode()) { 4720 default: 4721 llvm_unreachable("Unexpected instr type to insert"); 4722 case RISCV::ReadCycleWide: 4723 assert(!Subtarget.is64Bit() && 4724 "ReadCycleWrite is only to be used on riscv32"); 4725 return emitReadCycleWidePseudo(MI, BB); 4726 case RISCV::Select_GPR_Using_CC_GPR: 4727 case RISCV::Select_FPR16_Using_CC_GPR: 4728 case RISCV::Select_FPR32_Using_CC_GPR: 4729 case RISCV::Select_FPR64_Using_CC_GPR: 4730 return emitSelectPseudo(MI, BB); 4731 case RISCV::BuildPairF64Pseudo: 4732 return emitBuildPairF64Pseudo(MI, BB); 4733 case RISCV::SplitF64Pseudo: 4734 return emitSplitF64Pseudo(MI, BB); 4735 } 4736 } 4737 4738 // Calling Convention Implementation. 4739 // The expectations for frontend ABI lowering vary from target to target. 4740 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 4741 // details, but this is a longer term goal. For now, we simply try to keep the 4742 // role of the frontend as simple and well-defined as possible. The rules can 4743 // be summarised as: 4744 // * Never split up large scalar arguments. We handle them here. 4745 // * If a hardfloat calling convention is being used, and the struct may be 4746 // passed in a pair of registers (fp+fp, int+fp), and both registers are 4747 // available, then pass as two separate arguments. If either the GPRs or FPRs 4748 // are exhausted, then pass according to the rule below. 4749 // * If a struct could never be passed in registers or directly in a stack 4750 // slot (as it is larger than 2*XLEN and the floating point rules don't 4751 // apply), then pass it using a pointer with the byval attribute. 4752 // * If a struct is less than 2*XLEN, then coerce to either a two-element 4753 // word-sized array or a 2*XLEN scalar (depending on alignment). 4754 // * The frontend can determine whether a struct is returned by reference or 4755 // not based on its size and fields. If it will be returned by reference, the 4756 // frontend must modify the prototype so a pointer with the sret annotation is 4757 // passed as the first argument. This is not necessary for large scalar 4758 // returns. 4759 // * Struct return values and varargs should be coerced to structs containing 4760 // register-size fields in the same situations they would be for fixed 4761 // arguments. 4762 4763 static const MCPhysReg ArgGPRs[] = { 4764 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 4765 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 4766 }; 4767 static const MCPhysReg ArgFPR16s[] = { 4768 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 4769 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 4770 }; 4771 static const MCPhysReg ArgFPR32s[] = { 4772 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 4773 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 4774 }; 4775 static const MCPhysReg ArgFPR64s[] = { 4776 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 4777 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 4778 }; 4779 // This is an interim calling convention and it may be changed in the future. 4780 static const MCPhysReg ArgVRs[] = { 4781 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 4782 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 4783 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 4784 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 4785 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 4786 RISCV::V20M2, RISCV::V22M2}; 4787 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 4788 RISCV::V20M4}; 4789 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 4790 4791 // Pass a 2*XLEN argument that has been split into two XLEN values through 4792 // registers or the stack as necessary. 4793 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 4794 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 4795 MVT ValVT2, MVT LocVT2, 4796 ISD::ArgFlagsTy ArgFlags2) { 4797 unsigned XLenInBytes = XLen / 8; 4798 if (Register Reg = State.AllocateReg(ArgGPRs)) { 4799 // At least one half can be passed via register. 4800 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 4801 VA1.getLocVT(), CCValAssign::Full)); 4802 } else { 4803 // Both halves must be passed on the stack, with proper alignment. 4804 Align StackAlign = 4805 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 4806 State.addLoc( 4807 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 4808 State.AllocateStack(XLenInBytes, StackAlign), 4809 VA1.getLocVT(), CCValAssign::Full)); 4810 State.addLoc(CCValAssign::getMem( 4811 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 4812 LocVT2, CCValAssign::Full)); 4813 return false; 4814 } 4815 4816 if (Register Reg = State.AllocateReg(ArgGPRs)) { 4817 // The second half can also be passed via register. 4818 State.addLoc( 4819 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 4820 } else { 4821 // The second half is passed via the stack, without additional alignment. 4822 State.addLoc(CCValAssign::getMem( 4823 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 4824 LocVT2, CCValAssign::Full)); 4825 } 4826 4827 return false; 4828 } 4829 4830 // Implements the RISC-V calling convention. Returns true upon failure. 4831 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 4832 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 4833 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 4834 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 4835 Optional<unsigned> FirstMaskArgument) { 4836 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 4837 assert(XLen == 32 || XLen == 64); 4838 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 4839 4840 // Any return value split in to more than two values can't be returned 4841 // directly. 4842 if (IsRet && ValNo > 1) 4843 return true; 4844 4845 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 4846 // variadic argument, or if no F16/F32 argument registers are available. 4847 bool UseGPRForF16_F32 = true; 4848 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 4849 // variadic argument, or if no F64 argument registers are available. 4850 bool UseGPRForF64 = true; 4851 4852 switch (ABI) { 4853 default: 4854 llvm_unreachable("Unexpected ABI"); 4855 case RISCVABI::ABI_ILP32: 4856 case RISCVABI::ABI_LP64: 4857 break; 4858 case RISCVABI::ABI_ILP32F: 4859 case RISCVABI::ABI_LP64F: 4860 UseGPRForF16_F32 = !IsFixed; 4861 break; 4862 case RISCVABI::ABI_ILP32D: 4863 case RISCVABI::ABI_LP64D: 4864 UseGPRForF16_F32 = !IsFixed; 4865 UseGPRForF64 = !IsFixed; 4866 break; 4867 } 4868 4869 // FPR16, FPR32, and FPR64 alias each other. 4870 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { 4871 UseGPRForF16_F32 = true; 4872 UseGPRForF64 = true; 4873 } 4874 4875 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 4876 // similar local variables rather than directly checking against the target 4877 // ABI. 4878 4879 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { 4880 LocVT = XLenVT; 4881 LocInfo = CCValAssign::BCvt; 4882 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 4883 LocVT = MVT::i64; 4884 LocInfo = CCValAssign::BCvt; 4885 } 4886 4887 // If this is a variadic argument, the RISC-V calling convention requires 4888 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 4889 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 4890 // be used regardless of whether the original argument was split during 4891 // legalisation or not. The argument will not be passed by registers if the 4892 // original type is larger than 2*XLEN, so the register alignment rule does 4893 // not apply. 4894 unsigned TwoXLenInBytes = (2 * XLen) / 8; 4895 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 4896 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 4897 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 4898 // Skip 'odd' register if necessary. 4899 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 4900 State.AllocateReg(ArgGPRs); 4901 } 4902 4903 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 4904 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 4905 State.getPendingArgFlags(); 4906 4907 assert(PendingLocs.size() == PendingArgFlags.size() && 4908 "PendingLocs and PendingArgFlags out of sync"); 4909 4910 // Handle passing f64 on RV32D with a soft float ABI or when floating point 4911 // registers are exhausted. 4912 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 4913 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 4914 "Can't lower f64 if it is split"); 4915 // Depending on available argument GPRS, f64 may be passed in a pair of 4916 // GPRs, split between a GPR and the stack, or passed completely on the 4917 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 4918 // cases. 4919 Register Reg = State.AllocateReg(ArgGPRs); 4920 LocVT = MVT::i32; 4921 if (!Reg) { 4922 unsigned StackOffset = State.AllocateStack(8, Align(8)); 4923 State.addLoc( 4924 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 4925 return false; 4926 } 4927 if (!State.AllocateReg(ArgGPRs)) 4928 State.AllocateStack(4, Align(4)); 4929 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4930 return false; 4931 } 4932 4933 // Split arguments might be passed indirectly, so keep track of the pending 4934 // values. 4935 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 4936 LocVT = XLenVT; 4937 LocInfo = CCValAssign::Indirect; 4938 PendingLocs.push_back( 4939 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 4940 PendingArgFlags.push_back(ArgFlags); 4941 if (!ArgFlags.isSplitEnd()) { 4942 return false; 4943 } 4944 } 4945 4946 // If the split argument only had two elements, it should be passed directly 4947 // in registers or on the stack. 4948 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 4949 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 4950 // Apply the normal calling convention rules to the first half of the 4951 // split argument. 4952 CCValAssign VA = PendingLocs[0]; 4953 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 4954 PendingLocs.clear(); 4955 PendingArgFlags.clear(); 4956 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 4957 ArgFlags); 4958 } 4959 4960 // Allocate to a register if possible, or else a stack slot. 4961 Register Reg; 4962 if (ValVT == MVT::f16 && !UseGPRForF16_F32) 4963 Reg = State.AllocateReg(ArgFPR16s); 4964 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 4965 Reg = State.AllocateReg(ArgFPR32s); 4966 else if (ValVT == MVT::f64 && !UseGPRForF64) 4967 Reg = State.AllocateReg(ArgFPR64s); 4968 else if (ValVT.isScalableVector()) { 4969 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 4970 if (RC == &RISCV::VRRegClass) { 4971 // Assign the first mask argument to V0. 4972 // This is an interim calling convention and it may be changed in the 4973 // future. 4974 if (FirstMaskArgument.hasValue() && 4975 ValNo == FirstMaskArgument.getValue()) { 4976 Reg = State.AllocateReg(RISCV::V0); 4977 } else { 4978 Reg = State.AllocateReg(ArgVRs); 4979 } 4980 } else if (RC == &RISCV::VRM2RegClass) { 4981 Reg = State.AllocateReg(ArgVRM2s); 4982 } else if (RC == &RISCV::VRM4RegClass) { 4983 Reg = State.AllocateReg(ArgVRM4s); 4984 } else if (RC == &RISCV::VRM8RegClass) { 4985 Reg = State.AllocateReg(ArgVRM8s); 4986 } else { 4987 llvm_unreachable("Unhandled class register for ValueType"); 4988 } 4989 if (!Reg) { 4990 LocInfo = CCValAssign::Indirect; 4991 // Try using a GPR to pass the address 4992 Reg = State.AllocateReg(ArgGPRs); 4993 LocVT = XLenVT; 4994 } 4995 } else 4996 Reg = State.AllocateReg(ArgGPRs); 4997 unsigned StackOffset = 4998 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); 4999 5000 // If we reach this point and PendingLocs is non-empty, we must be at the 5001 // end of a split argument that must be passed indirectly. 5002 if (!PendingLocs.empty()) { 5003 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 5004 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 5005 5006 for (auto &It : PendingLocs) { 5007 if (Reg) 5008 It.convertToReg(Reg); 5009 else 5010 It.convertToMem(StackOffset); 5011 State.addLoc(It); 5012 } 5013 PendingLocs.clear(); 5014 PendingArgFlags.clear(); 5015 return false; 5016 } 5017 5018 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 5019 (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) && 5020 "Expected an XLenVT or scalable vector types at this stage"); 5021 5022 if (Reg) { 5023 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5024 return false; 5025 } 5026 5027 // When a floating-point value is passed on the stack, no bit-conversion is 5028 // needed. 5029 if (ValVT.isFloatingPoint()) { 5030 LocVT = ValVT; 5031 LocInfo = CCValAssign::Full; 5032 } 5033 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 5034 return false; 5035 } 5036 5037 template <typename ArgTy> 5038 static Optional<unsigned> preAssignMask(const ArgTy &Args) { 5039 for (const auto &ArgIdx : enumerate(Args)) { 5040 MVT ArgVT = ArgIdx.value().VT; 5041 if (ArgVT.isScalableVector() && 5042 ArgVT.getVectorElementType().SimpleTy == MVT::i1) 5043 return ArgIdx.index(); 5044 } 5045 return None; 5046 } 5047 5048 void RISCVTargetLowering::analyzeInputArgs( 5049 MachineFunction &MF, CCState &CCInfo, 5050 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 5051 unsigned NumArgs = Ins.size(); 5052 FunctionType *FType = MF.getFunction().getFunctionType(); 5053 5054 Optional<unsigned> FirstMaskArgument; 5055 if (Subtarget.hasStdExtV()) 5056 FirstMaskArgument = preAssignMask(Ins); 5057 5058 for (unsigned i = 0; i != NumArgs; ++i) { 5059 MVT ArgVT = Ins[i].VT; 5060 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 5061 5062 Type *ArgTy = nullptr; 5063 if (IsRet) 5064 ArgTy = FType->getReturnType(); 5065 else if (Ins[i].isOrigArg()) 5066 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 5067 5068 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 5069 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 5070 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 5071 FirstMaskArgument)) { 5072 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 5073 << EVT(ArgVT).getEVTString() << '\n'); 5074 llvm_unreachable(nullptr); 5075 } 5076 } 5077 } 5078 5079 void RISCVTargetLowering::analyzeOutputArgs( 5080 MachineFunction &MF, CCState &CCInfo, 5081 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 5082 CallLoweringInfo *CLI) const { 5083 unsigned NumArgs = Outs.size(); 5084 5085 Optional<unsigned> FirstMaskArgument; 5086 if (Subtarget.hasStdExtV()) 5087 FirstMaskArgument = preAssignMask(Outs); 5088 5089 for (unsigned i = 0; i != NumArgs; i++) { 5090 MVT ArgVT = Outs[i].VT; 5091 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 5092 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 5093 5094 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 5095 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 5096 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 5097 FirstMaskArgument)) { 5098 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 5099 << EVT(ArgVT).getEVTString() << "\n"); 5100 llvm_unreachable(nullptr); 5101 } 5102 } 5103 } 5104 5105 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 5106 // values. 5107 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 5108 const CCValAssign &VA, const SDLoc &DL) { 5109 switch (VA.getLocInfo()) { 5110 default: 5111 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5112 case CCValAssign::Full: 5113 break; 5114 case CCValAssign::BCvt: 5115 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 5116 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); 5117 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 5118 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 5119 else 5120 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 5121 break; 5122 } 5123 return Val; 5124 } 5125 5126 // The caller is responsible for loading the full value if the argument is 5127 // passed with CCValAssign::Indirect. 5128 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 5129 const CCValAssign &VA, const SDLoc &DL, 5130 const RISCVTargetLowering &TLI) { 5131 MachineFunction &MF = DAG.getMachineFunction(); 5132 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5133 EVT LocVT = VA.getLocVT(); 5134 SDValue Val; 5135 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 5136 Register VReg = RegInfo.createVirtualRegister(RC); 5137 RegInfo.addLiveIn(VA.getLocReg(), VReg); 5138 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 5139 5140 if (VA.getLocInfo() == CCValAssign::Indirect) 5141 return Val; 5142 5143 return convertLocVTToValVT(DAG, Val, VA, DL); 5144 } 5145 5146 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 5147 const CCValAssign &VA, const SDLoc &DL) { 5148 EVT LocVT = VA.getLocVT(); 5149 5150 switch (VA.getLocInfo()) { 5151 default: 5152 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5153 case CCValAssign::Full: 5154 break; 5155 case CCValAssign::BCvt: 5156 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 5157 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 5158 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 5159 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 5160 else 5161 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 5162 break; 5163 } 5164 return Val; 5165 } 5166 5167 // The caller is responsible for loading the full value if the argument is 5168 // passed with CCValAssign::Indirect. 5169 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 5170 const CCValAssign &VA, const SDLoc &DL) { 5171 MachineFunction &MF = DAG.getMachineFunction(); 5172 MachineFrameInfo &MFI = MF.getFrameInfo(); 5173 EVT LocVT = VA.getLocVT(); 5174 EVT ValVT = VA.getValVT(); 5175 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 5176 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 5177 VA.getLocMemOffset(), /*Immutable=*/true); 5178 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 5179 SDValue Val; 5180 5181 ISD::LoadExtType ExtType; 5182 switch (VA.getLocInfo()) { 5183 default: 5184 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5185 case CCValAssign::Full: 5186 case CCValAssign::Indirect: 5187 case CCValAssign::BCvt: 5188 ExtType = ISD::NON_EXTLOAD; 5189 break; 5190 } 5191 Val = DAG.getExtLoad( 5192 ExtType, DL, LocVT, Chain, FIN, 5193 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 5194 return Val; 5195 } 5196 5197 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 5198 const CCValAssign &VA, const SDLoc &DL) { 5199 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 5200 "Unexpected VA"); 5201 MachineFunction &MF = DAG.getMachineFunction(); 5202 MachineFrameInfo &MFI = MF.getFrameInfo(); 5203 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5204 5205 if (VA.isMemLoc()) { 5206 // f64 is passed on the stack. 5207 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 5208 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 5209 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 5210 MachinePointerInfo::getFixedStack(MF, FI)); 5211 } 5212 5213 assert(VA.isRegLoc() && "Expected register VA assignment"); 5214 5215 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 5216 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 5217 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 5218 SDValue Hi; 5219 if (VA.getLocReg() == RISCV::X17) { 5220 // Second half of f64 is passed on the stack. 5221 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 5222 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 5223 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 5224 MachinePointerInfo::getFixedStack(MF, FI)); 5225 } else { 5226 // Second half of f64 is passed in another GPR. 5227 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 5228 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 5229 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 5230 } 5231 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 5232 } 5233 5234 // FastCC has less than 1% performance improvement for some particular 5235 // benchmark. But theoretically, it may has benenfit for some cases. 5236 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, 5237 CCValAssign::LocInfo LocInfo, 5238 ISD::ArgFlagsTy ArgFlags, CCState &State) { 5239 5240 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 5241 // X5 and X6 might be used for save-restore libcall. 5242 static const MCPhysReg GPRList[] = { 5243 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 5244 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 5245 RISCV::X29, RISCV::X30, RISCV::X31}; 5246 if (unsigned Reg = State.AllocateReg(GPRList)) { 5247 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5248 return false; 5249 } 5250 } 5251 5252 if (LocVT == MVT::f16) { 5253 static const MCPhysReg FPR16List[] = { 5254 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 5255 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 5256 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 5257 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 5258 if (unsigned Reg = State.AllocateReg(FPR16List)) { 5259 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5260 return false; 5261 } 5262 } 5263 5264 if (LocVT == MVT::f32) { 5265 static const MCPhysReg FPR32List[] = { 5266 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 5267 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 5268 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 5269 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 5270 if (unsigned Reg = State.AllocateReg(FPR32List)) { 5271 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5272 return false; 5273 } 5274 } 5275 5276 if (LocVT == MVT::f64) { 5277 static const MCPhysReg FPR64List[] = { 5278 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 5279 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 5280 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 5281 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 5282 if (unsigned Reg = State.AllocateReg(FPR64List)) { 5283 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5284 return false; 5285 } 5286 } 5287 5288 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 5289 unsigned Offset4 = State.AllocateStack(4, Align(4)); 5290 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 5291 return false; 5292 } 5293 5294 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 5295 unsigned Offset5 = State.AllocateStack(8, Align(8)); 5296 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 5297 return false; 5298 } 5299 5300 return true; // CC didn't match. 5301 } 5302 5303 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 5304 CCValAssign::LocInfo LocInfo, 5305 ISD::ArgFlagsTy ArgFlags, CCState &State) { 5306 5307 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 5308 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 5309 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 5310 static const MCPhysReg GPRList[] = { 5311 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 5312 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 5313 if (unsigned Reg = State.AllocateReg(GPRList)) { 5314 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5315 return false; 5316 } 5317 } 5318 5319 if (LocVT == MVT::f32) { 5320 // Pass in STG registers: F1, ..., F6 5321 // fs0 ... fs5 5322 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 5323 RISCV::F18_F, RISCV::F19_F, 5324 RISCV::F20_F, RISCV::F21_F}; 5325 if (unsigned Reg = State.AllocateReg(FPR32List)) { 5326 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5327 return false; 5328 } 5329 } 5330 5331 if (LocVT == MVT::f64) { 5332 // Pass in STG registers: D1, ..., D6 5333 // fs6 ... fs11 5334 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 5335 RISCV::F24_D, RISCV::F25_D, 5336 RISCV::F26_D, RISCV::F27_D}; 5337 if (unsigned Reg = State.AllocateReg(FPR64List)) { 5338 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5339 return false; 5340 } 5341 } 5342 5343 report_fatal_error("No registers left in GHC calling convention"); 5344 return true; 5345 } 5346 5347 // Transform physical registers into virtual registers. 5348 SDValue RISCVTargetLowering::LowerFormalArguments( 5349 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 5350 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 5351 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 5352 5353 MachineFunction &MF = DAG.getMachineFunction(); 5354 5355 switch (CallConv) { 5356 default: 5357 report_fatal_error("Unsupported calling convention"); 5358 case CallingConv::C: 5359 case CallingConv::Fast: 5360 break; 5361 case CallingConv::GHC: 5362 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || 5363 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) 5364 report_fatal_error( 5365 "GHC calling convention requires the F and D instruction set extensions"); 5366 } 5367 5368 const Function &Func = MF.getFunction(); 5369 if (Func.hasFnAttribute("interrupt")) { 5370 if (!Func.arg_empty()) 5371 report_fatal_error( 5372 "Functions with the interrupt attribute cannot have arguments!"); 5373 5374 StringRef Kind = 5375 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 5376 5377 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 5378 report_fatal_error( 5379 "Function interrupt attribute argument not supported!"); 5380 } 5381 5382 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5383 MVT XLenVT = Subtarget.getXLenVT(); 5384 unsigned XLenInBytes = Subtarget.getXLen() / 8; 5385 // Used with vargs to acumulate store chains. 5386 std::vector<SDValue> OutChains; 5387 5388 // Assign locations to all of the incoming arguments. 5389 SmallVector<CCValAssign, 16> ArgLocs; 5390 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 5391 5392 if (CallConv == CallingConv::Fast) 5393 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); 5394 else if (CallConv == CallingConv::GHC) 5395 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); 5396 else 5397 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 5398 5399 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 5400 CCValAssign &VA = ArgLocs[i]; 5401 SDValue ArgValue; 5402 // Passing f64 on RV32D with a soft float ABI must be handled as a special 5403 // case. 5404 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 5405 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 5406 else if (VA.isRegLoc()) 5407 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 5408 else 5409 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 5410 5411 if (VA.getLocInfo() == CCValAssign::Indirect) { 5412 // If the original argument was split and passed by reference (e.g. i128 5413 // on RV32), we need to load all parts of it here (using the same 5414 // address). 5415 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 5416 MachinePointerInfo())); 5417 unsigned ArgIndex = Ins[i].OrigArgIndex; 5418 assert(Ins[i].PartOffset == 0); 5419 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 5420 CCValAssign &PartVA = ArgLocs[i + 1]; 5421 unsigned PartOffset = Ins[i + 1].PartOffset; 5422 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 5423 DAG.getIntPtrConstant(PartOffset, DL)); 5424 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 5425 MachinePointerInfo())); 5426 ++i; 5427 } 5428 continue; 5429 } 5430 InVals.push_back(ArgValue); 5431 } 5432 5433 if (IsVarArg) { 5434 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 5435 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 5436 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 5437 MachineFrameInfo &MFI = MF.getFrameInfo(); 5438 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5439 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 5440 5441 // Offset of the first variable argument from stack pointer, and size of 5442 // the vararg save area. For now, the varargs save area is either zero or 5443 // large enough to hold a0-a7. 5444 int VaArgOffset, VarArgsSaveSize; 5445 5446 // If all registers are allocated, then all varargs must be passed on the 5447 // stack and we don't need to save any argregs. 5448 if (ArgRegs.size() == Idx) { 5449 VaArgOffset = CCInfo.getNextStackOffset(); 5450 VarArgsSaveSize = 0; 5451 } else { 5452 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 5453 VaArgOffset = -VarArgsSaveSize; 5454 } 5455 5456 // Record the frame index of the first variable argument 5457 // which is a value necessary to VASTART. 5458 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 5459 RVFI->setVarArgsFrameIndex(FI); 5460 5461 // If saving an odd number of registers then create an extra stack slot to 5462 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 5463 // offsets to even-numbered registered remain 2*XLEN-aligned. 5464 if (Idx % 2) { 5465 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 5466 VarArgsSaveSize += XLenInBytes; 5467 } 5468 5469 // Copy the integer registers that may have been used for passing varargs 5470 // to the vararg save area. 5471 for (unsigned I = Idx; I < ArgRegs.size(); 5472 ++I, VaArgOffset += XLenInBytes) { 5473 const Register Reg = RegInfo.createVirtualRegister(RC); 5474 RegInfo.addLiveIn(ArgRegs[I], Reg); 5475 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 5476 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 5477 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 5478 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 5479 MachinePointerInfo::getFixedStack(MF, FI)); 5480 cast<StoreSDNode>(Store.getNode()) 5481 ->getMemOperand() 5482 ->setValue((Value *)nullptr); 5483 OutChains.push_back(Store); 5484 } 5485 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 5486 } 5487 5488 // All stores are grouped in one node to allow the matching between 5489 // the size of Ins and InVals. This only happens for vararg functions. 5490 if (!OutChains.empty()) { 5491 OutChains.push_back(Chain); 5492 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 5493 } 5494 5495 return Chain; 5496 } 5497 5498 /// isEligibleForTailCallOptimization - Check whether the call is eligible 5499 /// for tail call optimization. 5500 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 5501 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 5502 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 5503 const SmallVector<CCValAssign, 16> &ArgLocs) const { 5504 5505 auto &Callee = CLI.Callee; 5506 auto CalleeCC = CLI.CallConv; 5507 auto &Outs = CLI.Outs; 5508 auto &Caller = MF.getFunction(); 5509 auto CallerCC = Caller.getCallingConv(); 5510 5511 // Exception-handling functions need a special set of instructions to 5512 // indicate a return to the hardware. Tail-calling another function would 5513 // probably break this. 5514 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 5515 // should be expanded as new function attributes are introduced. 5516 if (Caller.hasFnAttribute("interrupt")) 5517 return false; 5518 5519 // Do not tail call opt if the stack is used to pass parameters. 5520 if (CCInfo.getNextStackOffset() != 0) 5521 return false; 5522 5523 // Do not tail call opt if any parameters need to be passed indirectly. 5524 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 5525 // passed indirectly. So the address of the value will be passed in a 5526 // register, or if not available, then the address is put on the stack. In 5527 // order to pass indirectly, space on the stack often needs to be allocated 5528 // in order to store the value. In this case the CCInfo.getNextStackOffset() 5529 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 5530 // are passed CCValAssign::Indirect. 5531 for (auto &VA : ArgLocs) 5532 if (VA.getLocInfo() == CCValAssign::Indirect) 5533 return false; 5534 5535 // Do not tail call opt if either caller or callee uses struct return 5536 // semantics. 5537 auto IsCallerStructRet = Caller.hasStructRetAttr(); 5538 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 5539 if (IsCallerStructRet || IsCalleeStructRet) 5540 return false; 5541 5542 // Externally-defined functions with weak linkage should not be 5543 // tail-called. The behaviour of branch instructions in this situation (as 5544 // used for tail calls) is implementation-defined, so we cannot rely on the 5545 // linker replacing the tail call with a return. 5546 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 5547 const GlobalValue *GV = G->getGlobal(); 5548 if (GV->hasExternalWeakLinkage()) 5549 return false; 5550 } 5551 5552 // The callee has to preserve all registers the caller needs to preserve. 5553 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 5554 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 5555 if (CalleeCC != CallerCC) { 5556 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 5557 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 5558 return false; 5559 } 5560 5561 // Byval parameters hand the function a pointer directly into the stack area 5562 // we want to reuse during a tail call. Working around this *is* possible 5563 // but less efficient and uglier in LowerCall. 5564 for (auto &Arg : Outs) 5565 if (Arg.Flags.isByVal()) 5566 return false; 5567 5568 return true; 5569 } 5570 5571 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 5572 // and output parameter nodes. 5573 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 5574 SmallVectorImpl<SDValue> &InVals) const { 5575 SelectionDAG &DAG = CLI.DAG; 5576 SDLoc &DL = CLI.DL; 5577 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 5578 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 5579 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 5580 SDValue Chain = CLI.Chain; 5581 SDValue Callee = CLI.Callee; 5582 bool &IsTailCall = CLI.IsTailCall; 5583 CallingConv::ID CallConv = CLI.CallConv; 5584 bool IsVarArg = CLI.IsVarArg; 5585 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5586 MVT XLenVT = Subtarget.getXLenVT(); 5587 5588 MachineFunction &MF = DAG.getMachineFunction(); 5589 5590 // Analyze the operands of the call, assigning locations to each operand. 5591 SmallVector<CCValAssign, 16> ArgLocs; 5592 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 5593 5594 if (CallConv == CallingConv::Fast) 5595 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); 5596 else if (CallConv == CallingConv::GHC) 5597 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); 5598 else 5599 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 5600 5601 // Check if it's really possible to do a tail call. 5602 if (IsTailCall) 5603 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 5604 5605 if (IsTailCall) 5606 ++NumTailCalls; 5607 else if (CLI.CB && CLI.CB->isMustTailCall()) 5608 report_fatal_error("failed to perform tail call elimination on a call " 5609 "site marked musttail"); 5610 5611 // Get a count of how many bytes are to be pushed on the stack. 5612 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 5613 5614 // Create local copies for byval args 5615 SmallVector<SDValue, 8> ByValArgs; 5616 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 5617 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5618 if (!Flags.isByVal()) 5619 continue; 5620 5621 SDValue Arg = OutVals[i]; 5622 unsigned Size = Flags.getByValSize(); 5623 Align Alignment = Flags.getNonZeroByValAlign(); 5624 5625 int FI = 5626 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 5627 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 5628 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 5629 5630 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 5631 /*IsVolatile=*/false, 5632 /*AlwaysInline=*/false, IsTailCall, 5633 MachinePointerInfo(), MachinePointerInfo()); 5634 ByValArgs.push_back(FIPtr); 5635 } 5636 5637 if (!IsTailCall) 5638 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 5639 5640 // Copy argument values to their designated locations. 5641 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 5642 SmallVector<SDValue, 8> MemOpChains; 5643 SDValue StackPtr; 5644 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 5645 CCValAssign &VA = ArgLocs[i]; 5646 SDValue ArgValue = OutVals[i]; 5647 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5648 5649 // Handle passing f64 on RV32D with a soft float ABI as a special case. 5650 bool IsF64OnRV32DSoftABI = 5651 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 5652 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 5653 SDValue SplitF64 = DAG.getNode( 5654 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 5655 SDValue Lo = SplitF64.getValue(0); 5656 SDValue Hi = SplitF64.getValue(1); 5657 5658 Register RegLo = VA.getLocReg(); 5659 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 5660 5661 if (RegLo == RISCV::X17) { 5662 // Second half of f64 is passed on the stack. 5663 // Work out the address of the stack slot. 5664 if (!StackPtr.getNode()) 5665 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 5666 // Emit the store. 5667 MemOpChains.push_back( 5668 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 5669 } else { 5670 // Second half of f64 is passed in another GPR. 5671 assert(RegLo < RISCV::X31 && "Invalid register pair"); 5672 Register RegHigh = RegLo + 1; 5673 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 5674 } 5675 continue; 5676 } 5677 5678 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 5679 // as any other MemLoc. 5680 5681 // Promote the value if needed. 5682 // For now, only handle fully promoted and indirect arguments. 5683 if (VA.getLocInfo() == CCValAssign::Indirect) { 5684 // Store the argument in a stack slot and pass its address. 5685 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 5686 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 5687 MemOpChains.push_back( 5688 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 5689 MachinePointerInfo::getFixedStack(MF, FI))); 5690 // If the original argument was split (e.g. i128), we need 5691 // to store all parts of it here (and pass just one address). 5692 unsigned ArgIndex = Outs[i].OrigArgIndex; 5693 assert(Outs[i].PartOffset == 0); 5694 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 5695 SDValue PartValue = OutVals[i + 1]; 5696 unsigned PartOffset = Outs[i + 1].PartOffset; 5697 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 5698 DAG.getIntPtrConstant(PartOffset, DL)); 5699 MemOpChains.push_back( 5700 DAG.getStore(Chain, DL, PartValue, Address, 5701 MachinePointerInfo::getFixedStack(MF, FI))); 5702 ++i; 5703 } 5704 ArgValue = SpillSlot; 5705 } else { 5706 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 5707 } 5708 5709 // Use local copy if it is a byval arg. 5710 if (Flags.isByVal()) 5711 ArgValue = ByValArgs[j++]; 5712 5713 if (VA.isRegLoc()) { 5714 // Queue up the argument copies and emit them at the end. 5715 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 5716 } else { 5717 assert(VA.isMemLoc() && "Argument not register or memory"); 5718 assert(!IsTailCall && "Tail call not allowed if stack is used " 5719 "for passing parameters"); 5720 5721 // Work out the address of the stack slot. 5722 if (!StackPtr.getNode()) 5723 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 5724 SDValue Address = 5725 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 5726 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 5727 5728 // Emit the store. 5729 MemOpChains.push_back( 5730 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 5731 } 5732 } 5733 5734 // Join the stores, which are independent of one another. 5735 if (!MemOpChains.empty()) 5736 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 5737 5738 SDValue Glue; 5739 5740 // Build a sequence of copy-to-reg nodes, chained and glued together. 5741 for (auto &Reg : RegsToPass) { 5742 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 5743 Glue = Chain.getValue(1); 5744 } 5745 5746 // Validate that none of the argument registers have been marked as 5747 // reserved, if so report an error. Do the same for the return address if this 5748 // is not a tailcall. 5749 validateCCReservedRegs(RegsToPass, MF); 5750 if (!IsTailCall && 5751 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 5752 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 5753 MF.getFunction(), 5754 "Return address register required, but has been reserved."}); 5755 5756 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 5757 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 5758 // split it and then direct call can be matched by PseudoCALL. 5759 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 5760 const GlobalValue *GV = S->getGlobal(); 5761 5762 unsigned OpFlags = RISCVII::MO_CALL; 5763 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 5764 OpFlags = RISCVII::MO_PLT; 5765 5766 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 5767 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 5768 unsigned OpFlags = RISCVII::MO_CALL; 5769 5770 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 5771 nullptr)) 5772 OpFlags = RISCVII::MO_PLT; 5773 5774 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 5775 } 5776 5777 // The first call operand is the chain and the second is the target address. 5778 SmallVector<SDValue, 8> Ops; 5779 Ops.push_back(Chain); 5780 Ops.push_back(Callee); 5781 5782 // Add argument registers to the end of the list so that they are 5783 // known live into the call. 5784 for (auto &Reg : RegsToPass) 5785 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 5786 5787 if (!IsTailCall) { 5788 // Add a register mask operand representing the call-preserved registers. 5789 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 5790 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 5791 assert(Mask && "Missing call preserved mask for calling convention"); 5792 Ops.push_back(DAG.getRegisterMask(Mask)); 5793 } 5794 5795 // Glue the call to the argument copies, if any. 5796 if (Glue.getNode()) 5797 Ops.push_back(Glue); 5798 5799 // Emit the call. 5800 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 5801 5802 if (IsTailCall) { 5803 MF.getFrameInfo().setHasTailCall(); 5804 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 5805 } 5806 5807 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 5808 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 5809 Glue = Chain.getValue(1); 5810 5811 // Mark the end of the call, which is glued to the call itself. 5812 Chain = DAG.getCALLSEQ_END(Chain, 5813 DAG.getConstant(NumBytes, DL, PtrVT, true), 5814 DAG.getConstant(0, DL, PtrVT, true), 5815 Glue, DL); 5816 Glue = Chain.getValue(1); 5817 5818 // Assign locations to each value returned by this call. 5819 SmallVector<CCValAssign, 16> RVLocs; 5820 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 5821 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 5822 5823 // Copy all of the result registers out of their specified physreg. 5824 for (auto &VA : RVLocs) { 5825 // Copy the value out 5826 SDValue RetValue = 5827 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 5828 // Glue the RetValue to the end of the call sequence 5829 Chain = RetValue.getValue(1); 5830 Glue = RetValue.getValue(2); 5831 5832 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 5833 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 5834 SDValue RetValue2 = 5835 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 5836 Chain = RetValue2.getValue(1); 5837 Glue = RetValue2.getValue(2); 5838 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 5839 RetValue2); 5840 } 5841 5842 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 5843 5844 InVals.push_back(RetValue); 5845 } 5846 5847 return Chain; 5848 } 5849 5850 bool RISCVTargetLowering::CanLowerReturn( 5851 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 5852 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 5853 SmallVector<CCValAssign, 16> RVLocs; 5854 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 5855 5856 Optional<unsigned> FirstMaskArgument; 5857 if (Subtarget.hasStdExtV()) 5858 FirstMaskArgument = preAssignMask(Outs); 5859 5860 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 5861 MVT VT = Outs[i].VT; 5862 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 5863 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 5864 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 5865 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 5866 *this, FirstMaskArgument)) 5867 return false; 5868 } 5869 return true; 5870 } 5871 5872 SDValue 5873 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 5874 bool IsVarArg, 5875 const SmallVectorImpl<ISD::OutputArg> &Outs, 5876 const SmallVectorImpl<SDValue> &OutVals, 5877 const SDLoc &DL, SelectionDAG &DAG) const { 5878 const MachineFunction &MF = DAG.getMachineFunction(); 5879 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 5880 5881 // Stores the assignment of the return value to a location. 5882 SmallVector<CCValAssign, 16> RVLocs; 5883 5884 // Info about the registers and stack slot. 5885 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 5886 *DAG.getContext()); 5887 5888 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 5889 nullptr); 5890 5891 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 5892 report_fatal_error("GHC functions return void only"); 5893 5894 SDValue Glue; 5895 SmallVector<SDValue, 4> RetOps(1, Chain); 5896 5897 // Copy the result values into the output registers. 5898 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 5899 SDValue Val = OutVals[i]; 5900 CCValAssign &VA = RVLocs[i]; 5901 assert(VA.isRegLoc() && "Can only return in registers!"); 5902 5903 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 5904 // Handle returning f64 on RV32D with a soft float ABI. 5905 assert(VA.isRegLoc() && "Expected return via registers"); 5906 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 5907 DAG.getVTList(MVT::i32, MVT::i32), Val); 5908 SDValue Lo = SplitF64.getValue(0); 5909 SDValue Hi = SplitF64.getValue(1); 5910 Register RegLo = VA.getLocReg(); 5911 assert(RegLo < RISCV::X31 && "Invalid register pair"); 5912 Register RegHi = RegLo + 1; 5913 5914 if (STI.isRegisterReservedByUser(RegLo) || 5915 STI.isRegisterReservedByUser(RegHi)) 5916 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 5917 MF.getFunction(), 5918 "Return value register required, but has been reserved."}); 5919 5920 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 5921 Glue = Chain.getValue(1); 5922 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 5923 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 5924 Glue = Chain.getValue(1); 5925 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 5926 } else { 5927 // Handle a 'normal' return. 5928 Val = convertValVTToLocVT(DAG, Val, VA, DL); 5929 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 5930 5931 if (STI.isRegisterReservedByUser(VA.getLocReg())) 5932 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 5933 MF.getFunction(), 5934 "Return value register required, but has been reserved."}); 5935 5936 // Guarantee that all emitted copies are stuck together. 5937 Glue = Chain.getValue(1); 5938 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 5939 } 5940 } 5941 5942 RetOps[0] = Chain; // Update chain. 5943 5944 // Add the glue node if we have it. 5945 if (Glue.getNode()) { 5946 RetOps.push_back(Glue); 5947 } 5948 5949 // Interrupt service routines use different return instructions. 5950 const Function &Func = DAG.getMachineFunction().getFunction(); 5951 if (Func.hasFnAttribute("interrupt")) { 5952 if (!Func.getReturnType()->isVoidTy()) 5953 report_fatal_error( 5954 "Functions with the interrupt attribute must have void return type!"); 5955 5956 MachineFunction &MF = DAG.getMachineFunction(); 5957 StringRef Kind = 5958 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 5959 5960 unsigned RetOpc; 5961 if (Kind == "user") 5962 RetOpc = RISCVISD::URET_FLAG; 5963 else if (Kind == "supervisor") 5964 RetOpc = RISCVISD::SRET_FLAG; 5965 else 5966 RetOpc = RISCVISD::MRET_FLAG; 5967 5968 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 5969 } 5970 5971 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 5972 } 5973 5974 void RISCVTargetLowering::validateCCReservedRegs( 5975 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 5976 MachineFunction &MF) const { 5977 const Function &F = MF.getFunction(); 5978 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 5979 5980 if (llvm::any_of(Regs, [&STI](auto Reg) { 5981 return STI.isRegisterReservedByUser(Reg.first); 5982 })) 5983 F.getContext().diagnose(DiagnosticInfoUnsupported{ 5984 F, "Argument register required, but has been reserved."}); 5985 } 5986 5987 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 5988 return CI->isTailCall(); 5989 } 5990 5991 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 5992 #define NODE_NAME_CASE(NODE) \ 5993 case RISCVISD::NODE: \ 5994 return "RISCVISD::" #NODE; 5995 // clang-format off 5996 switch ((RISCVISD::NodeType)Opcode) { 5997 case RISCVISD::FIRST_NUMBER: 5998 break; 5999 NODE_NAME_CASE(RET_FLAG) 6000 NODE_NAME_CASE(URET_FLAG) 6001 NODE_NAME_CASE(SRET_FLAG) 6002 NODE_NAME_CASE(MRET_FLAG) 6003 NODE_NAME_CASE(CALL) 6004 NODE_NAME_CASE(SELECT_CC) 6005 NODE_NAME_CASE(BuildPairF64) 6006 NODE_NAME_CASE(SplitF64) 6007 NODE_NAME_CASE(TAIL) 6008 NODE_NAME_CASE(SLLW) 6009 NODE_NAME_CASE(SRAW) 6010 NODE_NAME_CASE(SRLW) 6011 NODE_NAME_CASE(DIVW) 6012 NODE_NAME_CASE(DIVUW) 6013 NODE_NAME_CASE(REMUW) 6014 NODE_NAME_CASE(ROLW) 6015 NODE_NAME_CASE(RORW) 6016 NODE_NAME_CASE(FSLW) 6017 NODE_NAME_CASE(FSRW) 6018 NODE_NAME_CASE(FSL) 6019 NODE_NAME_CASE(FSR) 6020 NODE_NAME_CASE(FMV_H_X) 6021 NODE_NAME_CASE(FMV_X_ANYEXTH) 6022 NODE_NAME_CASE(FMV_W_X_RV64) 6023 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 6024 NODE_NAME_CASE(READ_CYCLE_WIDE) 6025 NODE_NAME_CASE(GREVI) 6026 NODE_NAME_CASE(GREVIW) 6027 NODE_NAME_CASE(GORCI) 6028 NODE_NAME_CASE(GORCIW) 6029 NODE_NAME_CASE(SHFLI) 6030 NODE_NAME_CASE(VMV_V_X_VL) 6031 NODE_NAME_CASE(VFMV_V_F_VL) 6032 NODE_NAME_CASE(VMV_X_S) 6033 NODE_NAME_CASE(VMV_S_XF_VL) 6034 NODE_NAME_CASE(SPLAT_VECTOR_I64) 6035 NODE_NAME_CASE(READ_VLENB) 6036 NODE_NAME_CASE(TRUNCATE_VECTOR_VL) 6037 NODE_NAME_CASE(VLEFF) 6038 NODE_NAME_CASE(VLEFF_MASK) 6039 NODE_NAME_CASE(VSLIDEUP_VL) 6040 NODE_NAME_CASE(VSLIDEDOWN_VL) 6041 NODE_NAME_CASE(VID_VL) 6042 NODE_NAME_CASE(VFNCVT_ROD_VL) 6043 NODE_NAME_CASE(VECREDUCE_ADD_VL) 6044 NODE_NAME_CASE(VECREDUCE_UMAX_VL) 6045 NODE_NAME_CASE(VECREDUCE_SMAX_VL) 6046 NODE_NAME_CASE(VECREDUCE_UMIN_VL) 6047 NODE_NAME_CASE(VECREDUCE_SMIN_VL) 6048 NODE_NAME_CASE(VECREDUCE_AND_VL) 6049 NODE_NAME_CASE(VECREDUCE_OR_VL) 6050 NODE_NAME_CASE(VECREDUCE_XOR_VL) 6051 NODE_NAME_CASE(VECREDUCE_FADD_VL) 6052 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL) 6053 NODE_NAME_CASE(ADD_VL) 6054 NODE_NAME_CASE(AND_VL) 6055 NODE_NAME_CASE(MUL_VL) 6056 NODE_NAME_CASE(OR_VL) 6057 NODE_NAME_CASE(SDIV_VL) 6058 NODE_NAME_CASE(SHL_VL) 6059 NODE_NAME_CASE(SREM_VL) 6060 NODE_NAME_CASE(SRA_VL) 6061 NODE_NAME_CASE(SRL_VL) 6062 NODE_NAME_CASE(SUB_VL) 6063 NODE_NAME_CASE(UDIV_VL) 6064 NODE_NAME_CASE(UREM_VL) 6065 NODE_NAME_CASE(XOR_VL) 6066 NODE_NAME_CASE(FADD_VL) 6067 NODE_NAME_CASE(FSUB_VL) 6068 NODE_NAME_CASE(FMUL_VL) 6069 NODE_NAME_CASE(FDIV_VL) 6070 NODE_NAME_CASE(FNEG_VL) 6071 NODE_NAME_CASE(FABS_VL) 6072 NODE_NAME_CASE(FSQRT_VL) 6073 NODE_NAME_CASE(FMA_VL) 6074 NODE_NAME_CASE(SMIN_VL) 6075 NODE_NAME_CASE(SMAX_VL) 6076 NODE_NAME_CASE(UMIN_VL) 6077 NODE_NAME_CASE(UMAX_VL) 6078 NODE_NAME_CASE(MULHS_VL) 6079 NODE_NAME_CASE(MULHU_VL) 6080 NODE_NAME_CASE(FP_TO_SINT_VL) 6081 NODE_NAME_CASE(FP_TO_UINT_VL) 6082 NODE_NAME_CASE(SINT_TO_FP_VL) 6083 NODE_NAME_CASE(UINT_TO_FP_VL) 6084 NODE_NAME_CASE(FP_EXTEND_VL) 6085 NODE_NAME_CASE(FP_ROUND_VL) 6086 NODE_NAME_CASE(SETCC_VL) 6087 NODE_NAME_CASE(VSELECT_VL) 6088 NODE_NAME_CASE(VMAND_VL) 6089 NODE_NAME_CASE(VMOR_VL) 6090 NODE_NAME_CASE(VMXOR_VL) 6091 NODE_NAME_CASE(VMCLR_VL) 6092 NODE_NAME_CASE(VMSET_VL) 6093 NODE_NAME_CASE(VRGATHER_VX_VL) 6094 NODE_NAME_CASE(VRGATHER_VV_VL) 6095 NODE_NAME_CASE(VRGATHEREI16_VV_VL) 6096 NODE_NAME_CASE(VSEXT_VL) 6097 NODE_NAME_CASE(VZEXT_VL) 6098 NODE_NAME_CASE(VLE_VL) 6099 NODE_NAME_CASE(VSE_VL) 6100 } 6101 // clang-format on 6102 return nullptr; 6103 #undef NODE_NAME_CASE 6104 } 6105 6106 /// getConstraintType - Given a constraint letter, return the type of 6107 /// constraint it is for this target. 6108 RISCVTargetLowering::ConstraintType 6109 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 6110 if (Constraint.size() == 1) { 6111 switch (Constraint[0]) { 6112 default: 6113 break; 6114 case 'f': 6115 return C_RegisterClass; 6116 case 'I': 6117 case 'J': 6118 case 'K': 6119 return C_Immediate; 6120 case 'A': 6121 return C_Memory; 6122 } 6123 } 6124 return TargetLowering::getConstraintType(Constraint); 6125 } 6126 6127 std::pair<unsigned, const TargetRegisterClass *> 6128 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 6129 StringRef Constraint, 6130 MVT VT) const { 6131 // First, see if this is a constraint that directly corresponds to a 6132 // RISCV register class. 6133 if (Constraint.size() == 1) { 6134 switch (Constraint[0]) { 6135 case 'r': 6136 return std::make_pair(0U, &RISCV::GPRRegClass); 6137 case 'f': 6138 if (Subtarget.hasStdExtZfh() && VT == MVT::f16) 6139 return std::make_pair(0U, &RISCV::FPR16RegClass); 6140 if (Subtarget.hasStdExtF() && VT == MVT::f32) 6141 return std::make_pair(0U, &RISCV::FPR32RegClass); 6142 if (Subtarget.hasStdExtD() && VT == MVT::f64) 6143 return std::make_pair(0U, &RISCV::FPR64RegClass); 6144 break; 6145 default: 6146 break; 6147 } 6148 } 6149 6150 // Clang will correctly decode the usage of register name aliases into their 6151 // official names. However, other frontends like `rustc` do not. This allows 6152 // users of these frontends to use the ABI names for registers in LLVM-style 6153 // register constraints. 6154 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 6155 .Case("{zero}", RISCV::X0) 6156 .Case("{ra}", RISCV::X1) 6157 .Case("{sp}", RISCV::X2) 6158 .Case("{gp}", RISCV::X3) 6159 .Case("{tp}", RISCV::X4) 6160 .Case("{t0}", RISCV::X5) 6161 .Case("{t1}", RISCV::X6) 6162 .Case("{t2}", RISCV::X7) 6163 .Cases("{s0}", "{fp}", RISCV::X8) 6164 .Case("{s1}", RISCV::X9) 6165 .Case("{a0}", RISCV::X10) 6166 .Case("{a1}", RISCV::X11) 6167 .Case("{a2}", RISCV::X12) 6168 .Case("{a3}", RISCV::X13) 6169 .Case("{a4}", RISCV::X14) 6170 .Case("{a5}", RISCV::X15) 6171 .Case("{a6}", RISCV::X16) 6172 .Case("{a7}", RISCV::X17) 6173 .Case("{s2}", RISCV::X18) 6174 .Case("{s3}", RISCV::X19) 6175 .Case("{s4}", RISCV::X20) 6176 .Case("{s5}", RISCV::X21) 6177 .Case("{s6}", RISCV::X22) 6178 .Case("{s7}", RISCV::X23) 6179 .Case("{s8}", RISCV::X24) 6180 .Case("{s9}", RISCV::X25) 6181 .Case("{s10}", RISCV::X26) 6182 .Case("{s11}", RISCV::X27) 6183 .Case("{t3}", RISCV::X28) 6184 .Case("{t4}", RISCV::X29) 6185 .Case("{t5}", RISCV::X30) 6186 .Case("{t6}", RISCV::X31) 6187 .Default(RISCV::NoRegister); 6188 if (XRegFromAlias != RISCV::NoRegister) 6189 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 6190 6191 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 6192 // TableGen record rather than the AsmName to choose registers for InlineAsm 6193 // constraints, plus we want to match those names to the widest floating point 6194 // register type available, manually select floating point registers here. 6195 // 6196 // The second case is the ABI name of the register, so that frontends can also 6197 // use the ABI names in register constraint lists. 6198 if (Subtarget.hasStdExtF()) { 6199 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 6200 .Cases("{f0}", "{ft0}", RISCV::F0_F) 6201 .Cases("{f1}", "{ft1}", RISCV::F1_F) 6202 .Cases("{f2}", "{ft2}", RISCV::F2_F) 6203 .Cases("{f3}", "{ft3}", RISCV::F3_F) 6204 .Cases("{f4}", "{ft4}", RISCV::F4_F) 6205 .Cases("{f5}", "{ft5}", RISCV::F5_F) 6206 .Cases("{f6}", "{ft6}", RISCV::F6_F) 6207 .Cases("{f7}", "{ft7}", RISCV::F7_F) 6208 .Cases("{f8}", "{fs0}", RISCV::F8_F) 6209 .Cases("{f9}", "{fs1}", RISCV::F9_F) 6210 .Cases("{f10}", "{fa0}", RISCV::F10_F) 6211 .Cases("{f11}", "{fa1}", RISCV::F11_F) 6212 .Cases("{f12}", "{fa2}", RISCV::F12_F) 6213 .Cases("{f13}", "{fa3}", RISCV::F13_F) 6214 .Cases("{f14}", "{fa4}", RISCV::F14_F) 6215 .Cases("{f15}", "{fa5}", RISCV::F15_F) 6216 .Cases("{f16}", "{fa6}", RISCV::F16_F) 6217 .Cases("{f17}", "{fa7}", RISCV::F17_F) 6218 .Cases("{f18}", "{fs2}", RISCV::F18_F) 6219 .Cases("{f19}", "{fs3}", RISCV::F19_F) 6220 .Cases("{f20}", "{fs4}", RISCV::F20_F) 6221 .Cases("{f21}", "{fs5}", RISCV::F21_F) 6222 .Cases("{f22}", "{fs6}", RISCV::F22_F) 6223 .Cases("{f23}", "{fs7}", RISCV::F23_F) 6224 .Cases("{f24}", "{fs8}", RISCV::F24_F) 6225 .Cases("{f25}", "{fs9}", RISCV::F25_F) 6226 .Cases("{f26}", "{fs10}", RISCV::F26_F) 6227 .Cases("{f27}", "{fs11}", RISCV::F27_F) 6228 .Cases("{f28}", "{ft8}", RISCV::F28_F) 6229 .Cases("{f29}", "{ft9}", RISCV::F29_F) 6230 .Cases("{f30}", "{ft10}", RISCV::F30_F) 6231 .Cases("{f31}", "{ft11}", RISCV::F31_F) 6232 .Default(RISCV::NoRegister); 6233 if (FReg != RISCV::NoRegister) { 6234 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 6235 if (Subtarget.hasStdExtD()) { 6236 unsigned RegNo = FReg - RISCV::F0_F; 6237 unsigned DReg = RISCV::F0_D + RegNo; 6238 return std::make_pair(DReg, &RISCV::FPR64RegClass); 6239 } 6240 return std::make_pair(FReg, &RISCV::FPR32RegClass); 6241 } 6242 } 6243 6244 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 6245 } 6246 6247 unsigned 6248 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 6249 // Currently only support length 1 constraints. 6250 if (ConstraintCode.size() == 1) { 6251 switch (ConstraintCode[0]) { 6252 case 'A': 6253 return InlineAsm::Constraint_A; 6254 default: 6255 break; 6256 } 6257 } 6258 6259 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 6260 } 6261 6262 void RISCVTargetLowering::LowerAsmOperandForConstraint( 6263 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 6264 SelectionDAG &DAG) const { 6265 // Currently only support length 1 constraints. 6266 if (Constraint.length() == 1) { 6267 switch (Constraint[0]) { 6268 case 'I': 6269 // Validate & create a 12-bit signed immediate operand. 6270 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 6271 uint64_t CVal = C->getSExtValue(); 6272 if (isInt<12>(CVal)) 6273 Ops.push_back( 6274 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 6275 } 6276 return; 6277 case 'J': 6278 // Validate & create an integer zero operand. 6279 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 6280 if (C->getZExtValue() == 0) 6281 Ops.push_back( 6282 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 6283 return; 6284 case 'K': 6285 // Validate & create a 5-bit unsigned immediate operand. 6286 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 6287 uint64_t CVal = C->getZExtValue(); 6288 if (isUInt<5>(CVal)) 6289 Ops.push_back( 6290 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 6291 } 6292 return; 6293 default: 6294 break; 6295 } 6296 } 6297 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 6298 } 6299 6300 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 6301 Instruction *Inst, 6302 AtomicOrdering Ord) const { 6303 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 6304 return Builder.CreateFence(Ord); 6305 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 6306 return Builder.CreateFence(AtomicOrdering::Release); 6307 return nullptr; 6308 } 6309 6310 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 6311 Instruction *Inst, 6312 AtomicOrdering Ord) const { 6313 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 6314 return Builder.CreateFence(AtomicOrdering::Acquire); 6315 return nullptr; 6316 } 6317 6318 TargetLowering::AtomicExpansionKind 6319 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 6320 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 6321 // point operations can't be used in an lr/sc sequence without breaking the 6322 // forward-progress guarantee. 6323 if (AI->isFloatingPointOperation()) 6324 return AtomicExpansionKind::CmpXChg; 6325 6326 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 6327 if (Size == 8 || Size == 16) 6328 return AtomicExpansionKind::MaskedIntrinsic; 6329 return AtomicExpansionKind::None; 6330 } 6331 6332 static Intrinsic::ID 6333 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 6334 if (XLen == 32) { 6335 switch (BinOp) { 6336 default: 6337 llvm_unreachable("Unexpected AtomicRMW BinOp"); 6338 case AtomicRMWInst::Xchg: 6339 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 6340 case AtomicRMWInst::Add: 6341 return Intrinsic::riscv_masked_atomicrmw_add_i32; 6342 case AtomicRMWInst::Sub: 6343 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 6344 case AtomicRMWInst::Nand: 6345 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 6346 case AtomicRMWInst::Max: 6347 return Intrinsic::riscv_masked_atomicrmw_max_i32; 6348 case AtomicRMWInst::Min: 6349 return Intrinsic::riscv_masked_atomicrmw_min_i32; 6350 case AtomicRMWInst::UMax: 6351 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 6352 case AtomicRMWInst::UMin: 6353 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 6354 } 6355 } 6356 6357 if (XLen == 64) { 6358 switch (BinOp) { 6359 default: 6360 llvm_unreachable("Unexpected AtomicRMW BinOp"); 6361 case AtomicRMWInst::Xchg: 6362 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 6363 case AtomicRMWInst::Add: 6364 return Intrinsic::riscv_masked_atomicrmw_add_i64; 6365 case AtomicRMWInst::Sub: 6366 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 6367 case AtomicRMWInst::Nand: 6368 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 6369 case AtomicRMWInst::Max: 6370 return Intrinsic::riscv_masked_atomicrmw_max_i64; 6371 case AtomicRMWInst::Min: 6372 return Intrinsic::riscv_masked_atomicrmw_min_i64; 6373 case AtomicRMWInst::UMax: 6374 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 6375 case AtomicRMWInst::UMin: 6376 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 6377 } 6378 } 6379 6380 llvm_unreachable("Unexpected XLen\n"); 6381 } 6382 6383 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 6384 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 6385 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 6386 unsigned XLen = Subtarget.getXLen(); 6387 Value *Ordering = 6388 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 6389 Type *Tys[] = {AlignedAddr->getType()}; 6390 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 6391 AI->getModule(), 6392 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 6393 6394 if (XLen == 64) { 6395 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 6396 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 6397 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 6398 } 6399 6400 Value *Result; 6401 6402 // Must pass the shift amount needed to sign extend the loaded value prior 6403 // to performing a signed comparison for min/max. ShiftAmt is the number of 6404 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 6405 // is the number of bits to left+right shift the value in order to 6406 // sign-extend. 6407 if (AI->getOperation() == AtomicRMWInst::Min || 6408 AI->getOperation() == AtomicRMWInst::Max) { 6409 const DataLayout &DL = AI->getModule()->getDataLayout(); 6410 unsigned ValWidth = 6411 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 6412 Value *SextShamt = 6413 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 6414 Result = Builder.CreateCall(LrwOpScwLoop, 6415 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 6416 } else { 6417 Result = 6418 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 6419 } 6420 6421 if (XLen == 64) 6422 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 6423 return Result; 6424 } 6425 6426 TargetLowering::AtomicExpansionKind 6427 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 6428 AtomicCmpXchgInst *CI) const { 6429 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 6430 if (Size == 8 || Size == 16) 6431 return AtomicExpansionKind::MaskedIntrinsic; 6432 return AtomicExpansionKind::None; 6433 } 6434 6435 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 6436 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 6437 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 6438 unsigned XLen = Subtarget.getXLen(); 6439 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 6440 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 6441 if (XLen == 64) { 6442 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 6443 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 6444 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 6445 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 6446 } 6447 Type *Tys[] = {AlignedAddr->getType()}; 6448 Function *MaskedCmpXchg = 6449 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 6450 Value *Result = Builder.CreateCall( 6451 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 6452 if (XLen == 64) 6453 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 6454 return Result; 6455 } 6456 6457 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 6458 EVT VT) const { 6459 VT = VT.getScalarType(); 6460 6461 if (!VT.isSimple()) 6462 return false; 6463 6464 switch (VT.getSimpleVT().SimpleTy) { 6465 case MVT::f16: 6466 return Subtarget.hasStdExtZfh(); 6467 case MVT::f32: 6468 return Subtarget.hasStdExtF(); 6469 case MVT::f64: 6470 return Subtarget.hasStdExtD(); 6471 default: 6472 break; 6473 } 6474 6475 return false; 6476 } 6477 6478 Register RISCVTargetLowering::getExceptionPointerRegister( 6479 const Constant *PersonalityFn) const { 6480 return RISCV::X10; 6481 } 6482 6483 Register RISCVTargetLowering::getExceptionSelectorRegister( 6484 const Constant *PersonalityFn) const { 6485 return RISCV::X11; 6486 } 6487 6488 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 6489 // Return false to suppress the unnecessary extensions if the LibCall 6490 // arguments or return value is f32 type for LP64 ABI. 6491 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 6492 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 6493 return false; 6494 6495 return true; 6496 } 6497 6498 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 6499 if (Subtarget.is64Bit() && Type == MVT::i32) 6500 return true; 6501 6502 return IsSigned; 6503 } 6504 6505 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 6506 SDValue C) const { 6507 // Check integral scalar types. 6508 if (VT.isScalarInteger()) { 6509 // Omit the optimization if the sub target has the M extension and the data 6510 // size exceeds XLen. 6511 if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) 6512 return false; 6513 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 6514 // Break the MUL to a SLLI and an ADD/SUB. 6515 const APInt &Imm = ConstNode->getAPIntValue(); 6516 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 6517 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 6518 return true; 6519 // Omit the following optimization if the sub target has the M extension 6520 // and the data size >= XLen. 6521 if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) 6522 return false; 6523 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 6524 // a pair of LUI/ADDI. 6525 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { 6526 APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); 6527 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 6528 (1 - ImmS).isPowerOf2()) 6529 return true; 6530 } 6531 } 6532 } 6533 6534 return false; 6535 } 6536 6537 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { 6538 if (!Subtarget.useRVVForFixedLengthVectors()) 6539 return false; 6540 6541 if (!VT.isFixedLengthVector()) 6542 return false; 6543 6544 // Don't use RVV for vectors we cannot scalarize if required. 6545 switch (VT.getVectorElementType().SimpleTy) { 6546 // i1 is supported but has different rules. 6547 default: 6548 return false; 6549 case MVT::i1: 6550 // Masks can only use a single register. 6551 if (VT.getVectorNumElements() > Subtarget.getMinRVVVectorSizeInBits()) 6552 return false; 6553 break; 6554 case MVT::i8: 6555 case MVT::i16: 6556 case MVT::i32: 6557 case MVT::i64: 6558 break; 6559 case MVT::f16: 6560 if (!Subtarget.hasStdExtZfh()) 6561 return false; 6562 break; 6563 case MVT::f32: 6564 if (!Subtarget.hasStdExtF()) 6565 return false; 6566 break; 6567 case MVT::f64: 6568 if (!Subtarget.hasStdExtD()) 6569 return false; 6570 break; 6571 } 6572 6573 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 6574 // Don't use RVV for types that don't fit. 6575 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) 6576 return false; 6577 6578 // TODO: Perhaps an artificial restriction, but worth having whilst getting 6579 // the base fixed length RVV support in place. 6580 if (!VT.isPow2VectorType()) 6581 return false; 6582 6583 return true; 6584 } 6585 6586 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( 6587 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 6588 bool *Fast) const { 6589 if (!VT.isScalableVector()) 6590 return false; 6591 6592 EVT ElemVT = VT.getVectorElementType(); 6593 if (Alignment >= ElemVT.getStoreSize()) { 6594 if (Fast) 6595 *Fast = true; 6596 return true; 6597 } 6598 6599 return false; 6600 } 6601 6602 #define GET_REGISTER_MATCHER 6603 #include "RISCVGenAsmMatcher.inc" 6604 6605 Register 6606 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 6607 const MachineFunction &MF) const { 6608 Register Reg = MatchRegisterAltName(RegName); 6609 if (Reg == RISCV::NoRegister) 6610 Reg = MatchRegisterName(RegName); 6611 if (Reg == RISCV::NoRegister) 6612 report_fatal_error( 6613 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 6614 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 6615 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 6616 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 6617 StringRef(RegName) + "\".")); 6618 return Reg; 6619 } 6620 6621 namespace llvm { 6622 namespace RISCVVIntrinsicsTable { 6623 6624 #define GET_RISCVVIntrinsicsTable_IMPL 6625 #include "RISCVGenSearchableTables.inc" 6626 6627 } // namespace RISCVVIntrinsicsTable 6628 6629 } // namespace llvm 6630