1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/IntrinsicsRISCV.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/KnownBits.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "riscv-lower" 42 43 STATISTIC(NumTailCalls, "Number of tail calls"); 44 45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 46 const RISCVSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 if (Subtarget.isRV32E()) 50 report_fatal_error("Codegen not yet implemented for RV32E"); 51 52 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 53 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 54 55 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 56 !Subtarget.hasStdExtF()) { 57 errs() << "Hard-float 'f' ABI can't be used for a target that " 58 "doesn't support the F instruction set extension (ignoring " 59 "target-abi)\n"; 60 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 61 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 62 !Subtarget.hasStdExtD()) { 63 errs() << "Hard-float 'd' ABI can't be used for a target that " 64 "doesn't support the D instruction set extension (ignoring " 65 "target-abi)\n"; 66 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 67 } 68 69 switch (ABI) { 70 default: 71 report_fatal_error("Don't know how to lower this ABI"); 72 case RISCVABI::ABI_ILP32: 73 case RISCVABI::ABI_ILP32F: 74 case RISCVABI::ABI_ILP32D: 75 case RISCVABI::ABI_LP64: 76 case RISCVABI::ABI_LP64F: 77 case RISCVABI::ABI_LP64D: 78 break; 79 } 80 81 MVT XLenVT = Subtarget.getXLenVT(); 82 83 // Set up the register classes. 84 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 85 86 if (Subtarget.hasStdExtZfh()) 87 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 88 if (Subtarget.hasStdExtF()) 89 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 90 if (Subtarget.hasStdExtD()) 91 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 92 93 static const MVT::SimpleValueType BoolVecVTs[] = { 94 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 95 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 96 static const MVT::SimpleValueType IntVecVTs[] = { 97 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 98 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 99 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 100 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 101 MVT::nxv4i64, MVT::nxv8i64}; 102 static const MVT::SimpleValueType F16VecVTs[] = { 103 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 104 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 105 static const MVT::SimpleValueType F32VecVTs[] = { 106 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 107 static const MVT::SimpleValueType F64VecVTs[] = { 108 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 109 110 if (Subtarget.hasStdExtV()) { 111 auto addRegClassForRVV = [this](MVT VT) { 112 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 113 assert(Size <= 512 && isPowerOf2_32(Size)); 114 const TargetRegisterClass *RC; 115 if (Size <= 64) 116 RC = &RISCV::VRRegClass; 117 else if (Size == 128) 118 RC = &RISCV::VRM2RegClass; 119 else if (Size == 256) 120 RC = &RISCV::VRM4RegClass; 121 else 122 RC = &RISCV::VRM8RegClass; 123 124 addRegisterClass(VT, RC); 125 }; 126 127 for (MVT VT : BoolVecVTs) 128 addRegClassForRVV(VT); 129 for (MVT VT : IntVecVTs) 130 addRegClassForRVV(VT); 131 132 if (Subtarget.hasStdExtZfh()) 133 for (MVT VT : F16VecVTs) 134 addRegClassForRVV(VT); 135 136 if (Subtarget.hasStdExtF()) 137 for (MVT VT : F32VecVTs) 138 addRegClassForRVV(VT); 139 140 if (Subtarget.hasStdExtD()) 141 for (MVT VT : F64VecVTs) 142 addRegClassForRVV(VT); 143 144 if (Subtarget.useRVVForFixedLengthVectors()) { 145 auto addRegClassForFixedVectors = [this](MVT VT) { 146 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 147 const TargetRegisterClass *RC; 148 if (LMul == 1 || VT.getVectorElementType() == MVT::i1) 149 RC = &RISCV::VRRegClass; 150 else if (LMul == 2) 151 RC = &RISCV::VRM2RegClass; 152 else if (LMul == 4) 153 RC = &RISCV::VRM4RegClass; 154 else if (LMul == 8) 155 RC = &RISCV::VRM8RegClass; 156 else 157 llvm_unreachable("Unexpected LMul!"); 158 159 addRegisterClass(VT, RC); 160 }; 161 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 162 if (useRVVForFixedLengthVectorVT(VT)) 163 addRegClassForFixedVectors(VT); 164 165 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) 166 if (useRVVForFixedLengthVectorVT(VT)) 167 addRegClassForFixedVectors(VT); 168 } 169 } 170 171 // Compute derived properties from the register classes. 172 computeRegisterProperties(STI.getRegisterInfo()); 173 174 setStackPointerRegisterToSaveRestore(RISCV::X2); 175 176 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 177 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 178 179 // TODO: add all necessary setOperationAction calls. 180 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 181 182 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 183 setOperationAction(ISD::BR_CC, XLenVT, Expand); 184 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 185 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 186 187 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 188 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 189 190 setOperationAction(ISD::VASTART, MVT::Other, Custom); 191 setOperationAction(ISD::VAARG, MVT::Other, Expand); 192 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 193 setOperationAction(ISD::VAEND, MVT::Other, Expand); 194 195 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 196 if (!Subtarget.hasStdExtZbb()) { 197 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 198 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 199 } 200 201 if (Subtarget.hasStdExtZbb() && Subtarget.is64Bit()) 202 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 203 204 if (Subtarget.is64Bit()) { 205 setOperationAction(ISD::ADD, MVT::i32, Custom); 206 setOperationAction(ISD::SUB, MVT::i32, Custom); 207 setOperationAction(ISD::SHL, MVT::i32, Custom); 208 setOperationAction(ISD::SRA, MVT::i32, Custom); 209 setOperationAction(ISD::SRL, MVT::i32, Custom); 210 211 setOperationAction(ISD::UADDO, MVT::i32, Custom); 212 setOperationAction(ISD::USUBO, MVT::i32, Custom); 213 setOperationAction(ISD::UADDSAT, MVT::i32, Custom); 214 setOperationAction(ISD::USUBSAT, MVT::i32, Custom); 215 } 216 217 if (!Subtarget.hasStdExtM()) { 218 setOperationAction(ISD::MUL, XLenVT, Expand); 219 setOperationAction(ISD::MULHS, XLenVT, Expand); 220 setOperationAction(ISD::MULHU, XLenVT, Expand); 221 setOperationAction(ISD::SDIV, XLenVT, Expand); 222 setOperationAction(ISD::UDIV, XLenVT, Expand); 223 setOperationAction(ISD::SREM, XLenVT, Expand); 224 setOperationAction(ISD::UREM, XLenVT, Expand); 225 } else { 226 if (Subtarget.is64Bit()) { 227 setOperationAction(ISD::MUL, MVT::i32, Custom); 228 setOperationAction(ISD::MUL, MVT::i128, Custom); 229 230 setOperationAction(ISD::SDIV, MVT::i8, Custom); 231 setOperationAction(ISD::UDIV, MVT::i8, Custom); 232 setOperationAction(ISD::UREM, MVT::i8, Custom); 233 setOperationAction(ISD::SDIV, MVT::i16, Custom); 234 setOperationAction(ISD::UDIV, MVT::i16, Custom); 235 setOperationAction(ISD::UREM, MVT::i16, Custom); 236 setOperationAction(ISD::SDIV, MVT::i32, Custom); 237 setOperationAction(ISD::UDIV, MVT::i32, Custom); 238 setOperationAction(ISD::UREM, MVT::i32, Custom); 239 } else { 240 setOperationAction(ISD::MUL, MVT::i64, Custom); 241 } 242 } 243 244 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 245 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 246 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 247 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 248 249 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 250 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 251 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 252 253 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { 254 if (Subtarget.is64Bit()) { 255 setOperationAction(ISD::ROTL, MVT::i32, Custom); 256 setOperationAction(ISD::ROTR, MVT::i32, Custom); 257 } 258 } else { 259 setOperationAction(ISD::ROTL, XLenVT, Expand); 260 setOperationAction(ISD::ROTR, XLenVT, Expand); 261 } 262 263 if (Subtarget.hasStdExtZbp()) { 264 // Custom lower bswap/bitreverse so we can convert them to GREVI to enable 265 // more combining. 266 setOperationAction(ISD::BITREVERSE, XLenVT, Custom); 267 setOperationAction(ISD::BSWAP, XLenVT, Custom); 268 269 if (Subtarget.is64Bit()) { 270 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 271 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 272 } 273 } else { 274 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 275 // pattern match it directly in isel. 276 setOperationAction(ISD::BSWAP, XLenVT, 277 Subtarget.hasStdExtZbb() ? Legal : Expand); 278 } 279 280 if (Subtarget.hasStdExtZbb()) { 281 setOperationAction(ISD::SMIN, XLenVT, Legal); 282 setOperationAction(ISD::SMAX, XLenVT, Legal); 283 setOperationAction(ISD::UMIN, XLenVT, Legal); 284 setOperationAction(ISD::UMAX, XLenVT, Legal); 285 286 if (Subtarget.is64Bit()) { 287 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 288 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); 289 setOperationAction(ISD::CTLZ, MVT::i32, Custom); 290 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); 291 } 292 } else { 293 setOperationAction(ISD::CTTZ, XLenVT, Expand); 294 setOperationAction(ISD::CTLZ, XLenVT, Expand); 295 setOperationAction(ISD::CTPOP, XLenVT, Expand); 296 } 297 298 if (Subtarget.hasStdExtZbt()) { 299 setOperationAction(ISD::FSHL, XLenVT, Custom); 300 setOperationAction(ISD::FSHR, XLenVT, Custom); 301 setOperationAction(ISD::SELECT, XLenVT, Legal); 302 303 if (Subtarget.is64Bit()) { 304 setOperationAction(ISD::FSHL, MVT::i32, Custom); 305 setOperationAction(ISD::FSHR, MVT::i32, Custom); 306 } 307 } else { 308 setOperationAction(ISD::SELECT, XLenVT, Custom); 309 } 310 311 ISD::CondCode FPCCToExpand[] = { 312 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 313 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 314 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 315 316 ISD::NodeType FPOpToExpand[] = { 317 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, 318 ISD::FP_TO_FP16}; 319 320 if (Subtarget.hasStdExtZfh()) 321 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 322 323 if (Subtarget.hasStdExtZfh()) { 324 setOperationAction(ISD::FMINNUM, MVT::f16, Legal); 325 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); 326 for (auto CC : FPCCToExpand) 327 setCondCodeAction(CC, MVT::f16, Expand); 328 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 329 setOperationAction(ISD::SELECT, MVT::f16, Custom); 330 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 331 for (auto Op : FPOpToExpand) 332 setOperationAction(Op, MVT::f16, Expand); 333 } 334 335 if (Subtarget.hasStdExtF()) { 336 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 337 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 338 for (auto CC : FPCCToExpand) 339 setCondCodeAction(CC, MVT::f32, Expand); 340 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 341 setOperationAction(ISD::SELECT, MVT::f32, Custom); 342 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 343 for (auto Op : FPOpToExpand) 344 setOperationAction(Op, MVT::f32, Expand); 345 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 346 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 347 } 348 349 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 350 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 351 352 if (Subtarget.hasStdExtD()) { 353 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 354 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 355 for (auto CC : FPCCToExpand) 356 setCondCodeAction(CC, MVT::f64, Expand); 357 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 358 setOperationAction(ISD::SELECT, MVT::f64, Custom); 359 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 360 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 361 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 362 for (auto Op : FPOpToExpand) 363 setOperationAction(Op, MVT::f64, Expand); 364 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 365 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 366 } 367 368 if (Subtarget.is64Bit()) { 369 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 370 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 371 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 372 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 373 } 374 375 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 376 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 377 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 378 setOperationAction(ISD::JumpTable, XLenVT, Custom); 379 380 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 381 382 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 383 // Unfortunately this can't be determined just from the ISA naming string. 384 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 385 Subtarget.is64Bit() ? Legal : Custom); 386 387 setOperationAction(ISD::TRAP, MVT::Other, Legal); 388 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 389 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 390 391 if (Subtarget.hasStdExtA()) { 392 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 393 setMinCmpXchgSizeInBits(32); 394 } else { 395 setMaxAtomicSizeInBitsSupported(0); 396 } 397 398 setBooleanContents(ZeroOrOneBooleanContent); 399 400 if (Subtarget.hasStdExtV()) { 401 setBooleanVectorContents(ZeroOrOneBooleanContent); 402 403 setOperationAction(ISD::VSCALE, XLenVT, Custom); 404 405 // RVV intrinsics may have illegal operands. 406 // We also need to custom legalize vmv.x.s. 407 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); 408 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); 409 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 410 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); 411 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 412 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 413 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 414 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 415 416 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 417 418 if (!Subtarget.is64Bit()) { 419 // We must custom-lower certain vXi64 operations on RV32 due to the vector 420 // element type being illegal. 421 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); 422 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); 423 424 setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom); 425 setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom); 426 setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom); 427 setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom); 428 setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom); 429 setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom); 430 setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom); 431 setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom); 432 } 433 434 for (MVT VT : BoolVecVTs) { 435 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 436 437 // Mask VTs are custom-expanded into a series of standard nodes 438 setOperationAction(ISD::TRUNCATE, VT, Custom); 439 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 440 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 441 442 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 443 444 // Expand all extending loads to types larger than this, and truncating 445 // stores from types larger than this. 446 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { 447 setTruncStoreAction(OtherVT, VT, Expand); 448 setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); 449 setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); 450 setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); 451 } 452 } 453 454 for (MVT VT : IntVecVTs) { 455 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 456 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); 457 458 setOperationAction(ISD::SMIN, VT, Legal); 459 setOperationAction(ISD::SMAX, VT, Legal); 460 setOperationAction(ISD::UMIN, VT, Legal); 461 setOperationAction(ISD::UMAX, VT, Legal); 462 463 setOperationAction(ISD::ROTL, VT, Expand); 464 setOperationAction(ISD::ROTR, VT, Expand); 465 466 // Custom-lower extensions and truncations from/to mask types. 467 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 468 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 469 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 470 471 // RVV has native int->float & float->int conversions where the 472 // element type sizes are within one power-of-two of each other. Any 473 // wider distances between type sizes have to be lowered as sequences 474 // which progressively narrow the gap in stages. 475 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 476 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 477 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 478 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 479 480 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" 481 // nodes which truncate by one power of two at a time. 482 setOperationAction(ISD::TRUNCATE, VT, Custom); 483 484 // Custom-lower insert/extract operations to simplify patterns. 485 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 486 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 487 488 // Custom-lower reduction operations to set up the corresponding custom 489 // nodes' operands. 490 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 491 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 492 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 493 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 494 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 495 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 496 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 497 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 498 499 setOperationAction(ISD::MLOAD, VT, Custom); 500 setOperationAction(ISD::MSTORE, VT, Custom); 501 setOperationAction(ISD::MGATHER, VT, Custom); 502 setOperationAction(ISD::MSCATTER, VT, Custom); 503 504 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 505 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 506 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 507 508 setOperationAction(ISD::STEP_VECTOR, VT, Custom); 509 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 510 511 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { 512 setTruncStoreAction(VT, OtherVT, Expand); 513 setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); 514 setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); 515 setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); 516 } 517 } 518 519 // Expand various CCs to best match the RVV ISA, which natively supports UNE 520 // but no other unordered comparisons, and supports all ordered comparisons 521 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 522 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 523 // and we pattern-match those back to the "original", swapping operands once 524 // more. This way we catch both operations and both "vf" and "fv" forms with 525 // fewer patterns. 526 ISD::CondCode VFPCCToExpand[] = { 527 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 528 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 529 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 530 }; 531 532 // Sets common operation actions on RVV floating-point vector types. 533 const auto SetCommonVFPActions = [&](MVT VT) { 534 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 535 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 536 // sizes are within one power-of-two of each other. Therefore conversions 537 // between vXf16 and vXf64 must be lowered as sequences which convert via 538 // vXf32. 539 setOperationAction(ISD::FP_ROUND, VT, Custom); 540 setOperationAction(ISD::FP_EXTEND, VT, Custom); 541 // Custom-lower insert/extract operations to simplify patterns. 542 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 543 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 544 // Expand various condition codes (explained above). 545 for (auto CC : VFPCCToExpand) 546 setCondCodeAction(CC, VT, Expand); 547 548 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 549 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 550 setOperationAction(ISD::FCOPYSIGN, VT, Legal); 551 552 setOperationAction(ISD::MLOAD, VT, Custom); 553 setOperationAction(ISD::MSTORE, VT, Custom); 554 setOperationAction(ISD::MGATHER, VT, Custom); 555 setOperationAction(ISD::MSCATTER, VT, Custom); 556 557 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 558 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 559 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 560 561 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 562 }; 563 564 // Sets common extload/truncstore actions on RVV floating-point vector 565 // types. 566 const auto SetCommonVFPExtLoadTruncStoreActions = 567 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) { 568 for (auto SmallVT : SmallerVTs) { 569 setTruncStoreAction(VT, SmallVT, Expand); 570 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand); 571 } 572 }; 573 574 if (Subtarget.hasStdExtZfh()) 575 for (MVT VT : F16VecVTs) 576 SetCommonVFPActions(VT); 577 578 for (MVT VT : F32VecVTs) { 579 if (Subtarget.hasStdExtF()) 580 SetCommonVFPActions(VT); 581 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); 582 } 583 584 for (MVT VT : F64VecVTs) { 585 if (Subtarget.hasStdExtD()) 586 SetCommonVFPActions(VT); 587 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); 588 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs); 589 } 590 591 if (Subtarget.useRVVForFixedLengthVectors()) { 592 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 593 if (!useRVVForFixedLengthVectorVT(VT)) 594 continue; 595 596 // By default everything must be expanded. 597 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 598 setOperationAction(Op, VT, Expand); 599 for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) { 600 setTruncStoreAction(VT, OtherVT, Expand); 601 setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); 602 setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); 603 setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); 604 } 605 606 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 607 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 608 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 609 610 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 611 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 612 613 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 614 615 setOperationAction(ISD::LOAD, VT, Custom); 616 setOperationAction(ISD::STORE, VT, Custom); 617 618 setOperationAction(ISD::SETCC, VT, Custom); 619 620 setOperationAction(ISD::TRUNCATE, VT, Custom); 621 622 setOperationAction(ISD::BITCAST, VT, Custom); 623 624 // Operations below are different for between masks and other vectors. 625 if (VT.getVectorElementType() == MVT::i1) { 626 setOperationAction(ISD::AND, VT, Custom); 627 setOperationAction(ISD::OR, VT, Custom); 628 setOperationAction(ISD::XOR, VT, Custom); 629 continue; 630 } 631 632 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 633 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 634 635 setOperationAction(ISD::MLOAD, VT, Custom); 636 setOperationAction(ISD::MSTORE, VT, Custom); 637 setOperationAction(ISD::MGATHER, VT, Custom); 638 setOperationAction(ISD::MSCATTER, VT, Custom); 639 setOperationAction(ISD::ADD, VT, Custom); 640 setOperationAction(ISD::MUL, VT, Custom); 641 setOperationAction(ISD::SUB, VT, Custom); 642 setOperationAction(ISD::AND, VT, Custom); 643 setOperationAction(ISD::OR, VT, Custom); 644 setOperationAction(ISD::XOR, VT, Custom); 645 setOperationAction(ISD::SDIV, VT, Custom); 646 setOperationAction(ISD::SREM, VT, Custom); 647 setOperationAction(ISD::UDIV, VT, Custom); 648 setOperationAction(ISD::UREM, VT, Custom); 649 setOperationAction(ISD::SHL, VT, Custom); 650 setOperationAction(ISD::SRA, VT, Custom); 651 setOperationAction(ISD::SRL, VT, Custom); 652 653 setOperationAction(ISD::SMIN, VT, Custom); 654 setOperationAction(ISD::SMAX, VT, Custom); 655 setOperationAction(ISD::UMIN, VT, Custom); 656 setOperationAction(ISD::UMAX, VT, Custom); 657 setOperationAction(ISD::ABS, VT, Custom); 658 659 setOperationAction(ISD::MULHS, VT, Custom); 660 setOperationAction(ISD::MULHU, VT, Custom); 661 662 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 663 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 664 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 665 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 666 667 setOperationAction(ISD::VSELECT, VT, Custom); 668 669 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 670 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 671 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 672 673 // Custom-lower reduction operations to set up the corresponding custom 674 // nodes' operands. 675 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 676 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 677 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 678 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 679 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 680 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 681 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 682 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 683 } 684 685 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { 686 if (!useRVVForFixedLengthVectorVT(VT)) 687 continue; 688 689 // By default everything must be expanded. 690 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 691 setOperationAction(Op, VT, Expand); 692 for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) { 693 setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); 694 setTruncStoreAction(VT, OtherVT, Expand); 695 } 696 697 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 698 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 699 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 700 701 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 702 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 703 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 704 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 705 706 setOperationAction(ISD::LOAD, VT, Custom); 707 setOperationAction(ISD::STORE, VT, Custom); 708 setOperationAction(ISD::MLOAD, VT, Custom); 709 setOperationAction(ISD::MSTORE, VT, Custom); 710 setOperationAction(ISD::MGATHER, VT, Custom); 711 setOperationAction(ISD::MSCATTER, VT, Custom); 712 setOperationAction(ISD::FADD, VT, Custom); 713 setOperationAction(ISD::FSUB, VT, Custom); 714 setOperationAction(ISD::FMUL, VT, Custom); 715 setOperationAction(ISD::FDIV, VT, Custom); 716 setOperationAction(ISD::FNEG, VT, Custom); 717 setOperationAction(ISD::FABS, VT, Custom); 718 setOperationAction(ISD::FCOPYSIGN, VT, Custom); 719 setOperationAction(ISD::FSQRT, VT, Custom); 720 setOperationAction(ISD::FMA, VT, Custom); 721 722 setOperationAction(ISD::FP_ROUND, VT, Custom); 723 setOperationAction(ISD::FP_EXTEND, VT, Custom); 724 725 for (auto CC : VFPCCToExpand) 726 setCondCodeAction(CC, VT, Expand); 727 728 setOperationAction(ISD::VSELECT, VT, Custom); 729 730 setOperationAction(ISD::BITCAST, VT, Custom); 731 732 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 733 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 734 } 735 736 // Custom-legalize bitcasts from fixed-length vectors to scalar types. 737 setOperationAction(ISD::BITCAST, MVT::i8, Custom); 738 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 739 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 740 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 741 setOperationAction(ISD::BITCAST, MVT::f16, Custom); 742 setOperationAction(ISD::BITCAST, MVT::f32, Custom); 743 setOperationAction(ISD::BITCAST, MVT::f64, Custom); 744 } 745 } 746 747 // Function alignments. 748 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 749 setMinFunctionAlignment(FunctionAlignment); 750 setPrefFunctionAlignment(FunctionAlignment); 751 752 setMinimumJumpTableEntries(5); 753 754 // Jumps are expensive, compared to logic 755 setJumpIsExpensive(); 756 757 // We can use any register for comparisons 758 setHasMultipleConditionRegisters(); 759 760 if (Subtarget.hasStdExtZbp()) { 761 setTargetDAGCombine(ISD::OR); 762 } 763 if (Subtarget.hasStdExtV()) { 764 setTargetDAGCombine(ISD::FCOPYSIGN); 765 setTargetDAGCombine(ISD::MGATHER); 766 setTargetDAGCombine(ISD::MSCATTER); 767 } 768 } 769 770 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 771 LLVMContext &Context, 772 EVT VT) const { 773 if (!VT.isVector()) 774 return getPointerTy(DL); 775 if (Subtarget.hasStdExtV() && 776 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) 777 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 778 return VT.changeVectorElementTypeToInteger(); 779 } 780 781 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 782 const CallInst &I, 783 MachineFunction &MF, 784 unsigned Intrinsic) const { 785 switch (Intrinsic) { 786 default: 787 return false; 788 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 789 case Intrinsic::riscv_masked_atomicrmw_add_i32: 790 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 791 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 792 case Intrinsic::riscv_masked_atomicrmw_max_i32: 793 case Intrinsic::riscv_masked_atomicrmw_min_i32: 794 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 795 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 796 case Intrinsic::riscv_masked_cmpxchg_i32: 797 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 798 Info.opc = ISD::INTRINSIC_W_CHAIN; 799 Info.memVT = MVT::getVT(PtrTy->getElementType()); 800 Info.ptrVal = I.getArgOperand(0); 801 Info.offset = 0; 802 Info.align = Align(4); 803 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 804 MachineMemOperand::MOVolatile; 805 return true; 806 } 807 } 808 809 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 810 const AddrMode &AM, Type *Ty, 811 unsigned AS, 812 Instruction *I) const { 813 // No global is ever allowed as a base. 814 if (AM.BaseGV) 815 return false; 816 817 // Require a 12-bit signed offset. 818 if (!isInt<12>(AM.BaseOffs)) 819 return false; 820 821 switch (AM.Scale) { 822 case 0: // "r+i" or just "i", depending on HasBaseReg. 823 break; 824 case 1: 825 if (!AM.HasBaseReg) // allow "r+i". 826 break; 827 return false; // disallow "r+r" or "r+r+i". 828 default: 829 return false; 830 } 831 832 return true; 833 } 834 835 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 836 return isInt<12>(Imm); 837 } 838 839 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 840 return isInt<12>(Imm); 841 } 842 843 // On RV32, 64-bit integers are split into their high and low parts and held 844 // in two different registers, so the trunc is free since the low register can 845 // just be used. 846 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 847 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 848 return false; 849 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 850 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 851 return (SrcBits == 64 && DestBits == 32); 852 } 853 854 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 855 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 856 !SrcVT.isInteger() || !DstVT.isInteger()) 857 return false; 858 unsigned SrcBits = SrcVT.getSizeInBits(); 859 unsigned DestBits = DstVT.getSizeInBits(); 860 return (SrcBits == 64 && DestBits == 32); 861 } 862 863 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 864 // Zexts are free if they can be combined with a load. 865 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 866 EVT MemVT = LD->getMemoryVT(); 867 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 868 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 869 (LD->getExtensionType() == ISD::NON_EXTLOAD || 870 LD->getExtensionType() == ISD::ZEXTLOAD)) 871 return true; 872 } 873 874 return TargetLowering::isZExtFree(Val, VT2); 875 } 876 877 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 878 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 879 } 880 881 bool RISCVTargetLowering::isCheapToSpeculateCttz() const { 882 return Subtarget.hasStdExtZbb(); 883 } 884 885 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { 886 return Subtarget.hasStdExtZbb(); 887 } 888 889 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 890 bool ForCodeSize) const { 891 if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) 892 return false; 893 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 894 return false; 895 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 896 return false; 897 if (Imm.isNegZero()) 898 return false; 899 return Imm.isZero(); 900 } 901 902 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 903 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || 904 (VT == MVT::f32 && Subtarget.hasStdExtF()) || 905 (VT == MVT::f64 && Subtarget.hasStdExtD()); 906 } 907 908 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, 909 CallingConv::ID CC, 910 EVT VT) const { 911 // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still 912 // end up using a GPR but that will be decided based on ABI. 913 if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh()) 914 return MVT::f32; 915 916 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); 917 } 918 919 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, 920 CallingConv::ID CC, 921 EVT VT) const { 922 // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still 923 // end up using a GPR but that will be decided based on ABI. 924 if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh()) 925 return 1; 926 927 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); 928 } 929 930 // Changes the condition code and swaps operands if necessary, so the SetCC 931 // operation matches one of the comparisons supported directly by branches 932 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare 933 // with 1/-1. 934 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, 935 ISD::CondCode &CC, SelectionDAG &DAG) { 936 // Convert X > -1 to X >= 0. 937 if (CC == ISD::SETGT && isAllOnesConstant(RHS)) { 938 RHS = DAG.getConstant(0, DL, RHS.getValueType()); 939 CC = ISD::SETGE; 940 return; 941 } 942 // Convert X < 1 to 0 >= X. 943 if (CC == ISD::SETLT && isOneConstant(RHS)) { 944 RHS = LHS; 945 LHS = DAG.getConstant(0, DL, RHS.getValueType()); 946 CC = ISD::SETGE; 947 return; 948 } 949 950 switch (CC) { 951 default: 952 break; 953 case ISD::SETGT: 954 case ISD::SETLE: 955 case ISD::SETUGT: 956 case ISD::SETULE: 957 CC = ISD::getSetCCSwappedOperands(CC); 958 std::swap(LHS, RHS); 959 break; 960 } 961 } 962 963 // Return the RISC-V branch opcode that matches the given DAG integer 964 // condition code. The CondCode must be one of those supported by the RISC-V 965 // ISA (see translateSetCCForBranch). 966 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 967 switch (CC) { 968 default: 969 llvm_unreachable("Unsupported CondCode"); 970 case ISD::SETEQ: 971 return RISCV::BEQ; 972 case ISD::SETNE: 973 return RISCV::BNE; 974 case ISD::SETLT: 975 return RISCV::BLT; 976 case ISD::SETGE: 977 return RISCV::BGE; 978 case ISD::SETULT: 979 return RISCV::BLTU; 980 case ISD::SETUGE: 981 return RISCV::BGEU; 982 } 983 } 984 985 RISCVVLMUL RISCVTargetLowering::getLMUL(MVT VT) { 986 assert(VT.isScalableVector() && "Expecting a scalable vector type"); 987 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); 988 if (VT.getVectorElementType() == MVT::i1) 989 KnownSize *= 8; 990 991 switch (KnownSize) { 992 default: 993 llvm_unreachable("Invalid LMUL."); 994 case 8: 995 return RISCVVLMUL::LMUL_F8; 996 case 16: 997 return RISCVVLMUL::LMUL_F4; 998 case 32: 999 return RISCVVLMUL::LMUL_F2; 1000 case 64: 1001 return RISCVVLMUL::LMUL_1; 1002 case 128: 1003 return RISCVVLMUL::LMUL_2; 1004 case 256: 1005 return RISCVVLMUL::LMUL_4; 1006 case 512: 1007 return RISCVVLMUL::LMUL_8; 1008 } 1009 } 1010 1011 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVVLMUL LMul) { 1012 switch (LMul) { 1013 default: 1014 llvm_unreachable("Invalid LMUL."); 1015 case RISCVVLMUL::LMUL_F8: 1016 case RISCVVLMUL::LMUL_F4: 1017 case RISCVVLMUL::LMUL_F2: 1018 case RISCVVLMUL::LMUL_1: 1019 return RISCV::VRRegClassID; 1020 case RISCVVLMUL::LMUL_2: 1021 return RISCV::VRM2RegClassID; 1022 case RISCVVLMUL::LMUL_4: 1023 return RISCV::VRM4RegClassID; 1024 case RISCVVLMUL::LMUL_8: 1025 return RISCV::VRM8RegClassID; 1026 } 1027 } 1028 1029 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { 1030 RISCVVLMUL LMUL = getLMUL(VT); 1031 if (LMUL == RISCVVLMUL::LMUL_F8 || LMUL == RISCVVLMUL::LMUL_F4 || 1032 LMUL == RISCVVLMUL::LMUL_F2 || LMUL == RISCVVLMUL::LMUL_1) { 1033 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 1034 "Unexpected subreg numbering"); 1035 return RISCV::sub_vrm1_0 + Index; 1036 } 1037 if (LMUL == RISCVVLMUL::LMUL_2) { 1038 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 1039 "Unexpected subreg numbering"); 1040 return RISCV::sub_vrm2_0 + Index; 1041 } 1042 if (LMUL == RISCVVLMUL::LMUL_4) { 1043 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 1044 "Unexpected subreg numbering"); 1045 return RISCV::sub_vrm4_0 + Index; 1046 } 1047 llvm_unreachable("Invalid vector type."); 1048 } 1049 1050 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { 1051 if (VT.getVectorElementType() == MVT::i1) 1052 return RISCV::VRRegClassID; 1053 return getRegClassIDForLMUL(getLMUL(VT)); 1054 } 1055 1056 // Attempt to decompose a subvector insert/extract between VecVT and 1057 // SubVecVT via subregister indices. Returns the subregister index that 1058 // can perform the subvector insert/extract with the given element index, as 1059 // well as the index corresponding to any leftover subvectors that must be 1060 // further inserted/extracted within the register class for SubVecVT. 1061 std::pair<unsigned, unsigned> 1062 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1063 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, 1064 const RISCVRegisterInfo *TRI) { 1065 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && 1066 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && 1067 RISCV::VRM2RegClassID > RISCV::VRRegClassID), 1068 "Register classes not ordered"); 1069 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT); 1070 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT); 1071 // Try to compose a subregister index that takes us from the incoming 1072 // LMUL>1 register class down to the outgoing one. At each step we half 1073 // the LMUL: 1074 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 1075 // Note that this is not guaranteed to find a subregister index, such as 1076 // when we are extracting from one VR type to another. 1077 unsigned SubRegIdx = RISCV::NoSubRegister; 1078 for (const unsigned RCID : 1079 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) 1080 if (VecRegClassID > RCID && SubRegClassID <= RCID) { 1081 VecVT = VecVT.getHalfNumVectorElementsVT(); 1082 bool IsHi = 1083 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); 1084 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, 1085 getSubregIndexByMVT(VecVT, IsHi)); 1086 if (IsHi) 1087 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); 1088 } 1089 return {SubRegIdx, InsertExtractIdx}; 1090 } 1091 1092 // Return the largest legal scalable vector type that matches VT's element type. 1093 MVT RISCVTargetLowering::getContainerForFixedLengthVector( 1094 const TargetLowering &TLI, MVT VT, const RISCVSubtarget &Subtarget) { 1095 assert(VT.isFixedLengthVector() && TLI.isTypeLegal(VT) && 1096 "Expected legal fixed length vector!"); 1097 1098 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 1099 assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!"); 1100 1101 MVT EltVT = VT.getVectorElementType(); 1102 switch (EltVT.SimpleTy) { 1103 default: 1104 llvm_unreachable("unexpected element type for RVV container"); 1105 case MVT::i1: { 1106 // Masks are calculated assuming 8-bit elements since that's when we need 1107 // the most elements. 1108 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8; 1109 return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock); 1110 } 1111 case MVT::i8: 1112 case MVT::i16: 1113 case MVT::i32: 1114 case MVT::i64: 1115 case MVT::f16: 1116 case MVT::f32: 1117 case MVT::f64: { 1118 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits(); 1119 return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock); 1120 } 1121 } 1122 } 1123 1124 MVT RISCVTargetLowering::getContainerForFixedLengthVector( 1125 SelectionDAG &DAG, MVT VT, const RISCVSubtarget &Subtarget) { 1126 return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT, 1127 Subtarget); 1128 } 1129 1130 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const { 1131 return getContainerForFixedLengthVector(*this, VT, getSubtarget()); 1132 } 1133 1134 // Grow V to consume an entire RVV register. 1135 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 1136 const RISCVSubtarget &Subtarget) { 1137 assert(VT.isScalableVector() && 1138 "Expected to convert into a scalable vector!"); 1139 assert(V.getValueType().isFixedLengthVector() && 1140 "Expected a fixed length vector operand!"); 1141 SDLoc DL(V); 1142 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1143 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); 1144 } 1145 1146 // Shrink V so it's just big enough to maintain a VT's worth of data. 1147 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 1148 const RISCVSubtarget &Subtarget) { 1149 assert(VT.isFixedLengthVector() && 1150 "Expected to convert into a fixed length vector!"); 1151 assert(V.getValueType().isScalableVector() && 1152 "Expected a scalable vector operand!"); 1153 SDLoc DL(V); 1154 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1155 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); 1156 } 1157 1158 // Gets the two common "VL" operands: an all-ones mask and the vector length. 1159 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is 1160 // the vector type that it is contained in. 1161 static std::pair<SDValue, SDValue> 1162 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, 1163 const RISCVSubtarget &Subtarget) { 1164 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 1165 MVT XLenVT = Subtarget.getXLenVT(); 1166 SDValue VL = VecVT.isFixedLengthVector() 1167 ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT) 1168 : DAG.getRegister(RISCV::X0, XLenVT); 1169 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 1170 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 1171 return {Mask, VL}; 1172 } 1173 1174 // As above but assuming the given type is a scalable vector type. 1175 static std::pair<SDValue, SDValue> 1176 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG, 1177 const RISCVSubtarget &Subtarget) { 1178 assert(VecVT.isScalableVector() && "Expecting a scalable vector"); 1179 return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget); 1180 } 1181 1182 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few 1183 // of either is (currently) supported. This can get us into an infinite loop 1184 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR 1185 // as a ..., etc. 1186 // Until either (or both) of these can reliably lower any node, reporting that 1187 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks 1188 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack, 1189 // which is not desirable. 1190 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( 1191 EVT VT, unsigned DefinedValues) const { 1192 return false; 1193 } 1194 1195 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { 1196 // Only splats are currently supported. 1197 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) 1198 return true; 1199 1200 return false; 1201 } 1202 1203 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 1204 const RISCVSubtarget &Subtarget) { 1205 MVT VT = Op.getSimpleValueType(); 1206 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 1207 1208 MVT ContainerVT = 1209 RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); 1210 1211 SDLoc DL(Op); 1212 SDValue Mask, VL; 1213 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1214 1215 MVT XLenVT = Subtarget.getXLenVT(); 1216 unsigned NumElts = Op.getNumOperands(); 1217 1218 if (VT.getVectorElementType() == MVT::i1) { 1219 if (ISD::isBuildVectorAllZeros(Op.getNode())) { 1220 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); 1221 return convertFromScalableVector(VT, VMClr, DAG, Subtarget); 1222 } 1223 1224 if (ISD::isBuildVectorAllOnes(Op.getNode())) { 1225 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 1226 return convertFromScalableVector(VT, VMSet, DAG, Subtarget); 1227 } 1228 1229 // Lower constant mask BUILD_VECTORs via an integer vector type, in 1230 // scalar integer chunks whose bit-width depends on the number of mask 1231 // bits and XLEN. 1232 // First, determine the most appropriate scalar integer type to use. This 1233 // is at most XLenVT, but may be shrunk to a smaller vector element type 1234 // according to the size of the final vector - use i8 chunks rather than 1235 // XLenVT if we're producing a v8i1. This results in more consistent 1236 // codegen across RV32 and RV64. 1237 // If we have to use more than one INSERT_VECTOR_ELT then this optimization 1238 // is likely to increase code size; avoid peforming it in such a case. 1239 unsigned NumViaIntegerBits = 1240 std::min(std::max(NumElts, 8u), Subtarget.getXLen()); 1241 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && 1242 (!DAG.shouldOptForSize() || NumElts <= NumViaIntegerBits)) { 1243 // Now we can create our integer vector type. Note that it may be larger 1244 // than the resulting mask type: v4i1 would use v1i8 as its integer type. 1245 MVT IntegerViaVecVT = 1246 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits), 1247 divideCeil(NumElts, NumViaIntegerBits)); 1248 1249 uint64_t Bits = 0; 1250 unsigned BitPos = 0, IntegerEltIdx = 0; 1251 SDValue Vec = DAG.getUNDEF(IntegerViaVecVT); 1252 1253 for (unsigned I = 0; I < NumElts; I++, BitPos++) { 1254 // Once we accumulate enough bits to fill our scalar type, insert into 1255 // our vector and clear our accumulated data. 1256 if (I != 0 && I % NumViaIntegerBits == 0) { 1257 if (NumViaIntegerBits <= 32) 1258 Bits = SignExtend64(Bits, 32); 1259 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); 1260 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, 1261 Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT)); 1262 Bits = 0; 1263 BitPos = 0; 1264 IntegerEltIdx++; 1265 } 1266 SDValue V = Op.getOperand(I); 1267 bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue(); 1268 Bits |= ((uint64_t)BitValue << BitPos); 1269 } 1270 1271 // Insert the (remaining) scalar value into position in our integer 1272 // vector type. 1273 if (NumViaIntegerBits <= 32) 1274 Bits = SignExtend64(Bits, 32); 1275 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); 1276 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt, 1277 DAG.getConstant(IntegerEltIdx, DL, XLenVT)); 1278 1279 if (NumElts < NumViaIntegerBits) { 1280 // If we're producing a smaller vector than our minimum legal integer 1281 // type, bitcast to the equivalent (known-legal) mask type, and extract 1282 // our final mask. 1283 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type"); 1284 Vec = DAG.getBitcast(MVT::v8i1, Vec); 1285 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, 1286 DAG.getConstant(0, DL, XLenVT)); 1287 } else { 1288 // Else we must have produced an integer type with the same size as the 1289 // mask type; bitcast for the final result. 1290 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits()); 1291 Vec = DAG.getBitcast(VT, Vec); 1292 } 1293 1294 return Vec; 1295 } 1296 1297 return SDValue(); 1298 } 1299 1300 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 1301 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 1302 : RISCVISD::VMV_V_X_VL; 1303 Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL); 1304 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 1305 } 1306 1307 // Try and match an index sequence, which we can lower directly to the vid 1308 // instruction. An all-undef vector is matched by getSplatValue, above. 1309 if (VT.isInteger()) { 1310 bool IsVID = true; 1311 for (unsigned I = 0; I < NumElts && IsVID; I++) 1312 IsVID &= Op.getOperand(I).isUndef() || 1313 (isa<ConstantSDNode>(Op.getOperand(I)) && 1314 Op.getConstantOperandVal(I) == I); 1315 1316 if (IsVID) { 1317 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); 1318 return convertFromScalableVector(VT, VID, DAG, Subtarget); 1319 } 1320 } 1321 1322 // Attempt to detect "hidden" splats, which only reveal themselves as splats 1323 // when re-interpreted as a vector with a larger element type. For example, 1324 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1 1325 // could be instead splat as 1326 // v2i32 = build_vector i32 0x00010000, i32 0x00010000 1327 // TODO: This optimization could also work on non-constant splats, but it 1328 // would require bit-manipulation instructions to construct the splat value. 1329 SmallVector<SDValue> Sequence; 1330 unsigned EltBitSize = VT.getScalarSizeInBits(); 1331 const auto *BV = cast<BuildVectorSDNode>(Op); 1332 if (VT.isInteger() && EltBitSize < 64 && 1333 ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && 1334 BV->getRepeatedSequence(Sequence) && 1335 (Sequence.size() * EltBitSize) <= 64) { 1336 unsigned SeqLen = Sequence.size(); 1337 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen); 1338 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen); 1339 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || 1340 ViaIntVT == MVT::i64) && 1341 "Unexpected sequence type"); 1342 1343 unsigned EltIdx = 0; 1344 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize); 1345 uint64_t SplatValue = 0; 1346 // Construct the amalgamated value which can be splatted as this larger 1347 // vector type. 1348 for (const auto &SeqV : Sequence) { 1349 if (!SeqV.isUndef()) 1350 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask) 1351 << (EltIdx * EltBitSize)); 1352 EltIdx++; 1353 } 1354 1355 // On RV64, sign-extend from 32 to 64 bits where possible in order to 1356 // achieve better constant materializion. 1357 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) 1358 SplatValue = SignExtend64(SplatValue, 32); 1359 1360 // Since we can't introduce illegal i64 types at this stage, we can only 1361 // perform an i64 splat on RV32 if it is its own sign-extended value. That 1362 // way we can use RVV instructions to splat. 1363 assert((ViaIntVT.bitsLE(XLenVT) || 1364 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) && 1365 "Unexpected bitcast sequence"); 1366 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) { 1367 SDValue ViaVL = 1368 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT); 1369 MVT ViaContainerVT = 1370 RISCVTargetLowering::getContainerForFixedLengthVector(DAG, ViaVecVT, 1371 Subtarget); 1372 SDValue Splat = 1373 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT, 1374 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL); 1375 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget); 1376 return DAG.getBitcast(VT, Splat); 1377 } 1378 } 1379 1380 // Try and optimize BUILD_VECTORs with "dominant values" - these are values 1381 // which constitute a large proportion of the elements. In such cases we can 1382 // splat a vector with the dominant element and make up the shortfall with 1383 // INSERT_VECTOR_ELTs. 1384 // Note that this includes vectors of 2 elements by association. The 1385 // upper-most element is the "dominant" one, allowing us to use a splat to 1386 // "insert" the upper element, and an insert of the lower element at position 1387 // 0, which improves codegen. 1388 SDValue DominantValue; 1389 unsigned MostCommonCount = 0; 1390 DenseMap<SDValue, unsigned> ValueCounts; 1391 unsigned NumUndefElts = 1392 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); 1393 1394 for (SDValue V : Op->op_values()) { 1395 if (V.isUndef()) 1396 continue; 1397 1398 ValueCounts.insert(std::make_pair(V, 0)); 1399 unsigned &Count = ValueCounts[V]; 1400 1401 // Is this value dominant? In case of a tie, prefer the highest element as 1402 // it's cheaper to insert near the beginning of a vector than it is at the 1403 // end. 1404 if (++Count >= MostCommonCount) { 1405 DominantValue = V; 1406 MostCommonCount = Count; 1407 } 1408 } 1409 1410 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR"); 1411 unsigned NumDefElts = NumElts - NumUndefElts; 1412 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; 1413 1414 // Don't perform this optimization when optimizing for size, since 1415 // materializing elements and inserting them tends to cause code bloat. 1416 if (!DAG.shouldOptForSize() && 1417 ((MostCommonCount > DominantValueCountThreshold) || 1418 (ValueCounts.size() <= Log2_32(NumDefElts)))) { 1419 // Start by splatting the most common element. 1420 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); 1421 1422 DenseSet<SDValue> Processed{DominantValue}; 1423 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1); 1424 for (const auto &OpIdx : enumerate(Op->ops())) { 1425 const SDValue &V = OpIdx.value(); 1426 if (V.isUndef() || !Processed.insert(V).second) 1427 continue; 1428 if (ValueCounts[V] == 1) { 1429 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, 1430 DAG.getConstant(OpIdx.index(), DL, XLenVT)); 1431 } else { 1432 // Blend in all instances of this value using a VSELECT, using a 1433 // mask where each bit signals whether that element is the one 1434 // we're after. 1435 SmallVector<SDValue> Ops; 1436 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) { 1437 return DAG.getConstant(V == V1, DL, XLenVT); 1438 }); 1439 Vec = DAG.getNode(ISD::VSELECT, DL, VT, 1440 DAG.getBuildVector(SelMaskTy, DL, Ops), 1441 DAG.getSplatBuildVector(VT, DL, V), Vec); 1442 } 1443 } 1444 1445 return Vec; 1446 } 1447 1448 return SDValue(); 1449 } 1450 1451 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, 1452 const RISCVSubtarget &Subtarget) { 1453 SDValue V1 = Op.getOperand(0); 1454 SDValue V2 = Op.getOperand(1); 1455 SDLoc DL(Op); 1456 MVT XLenVT = Subtarget.getXLenVT(); 1457 MVT VT = Op.getSimpleValueType(); 1458 unsigned NumElts = VT.getVectorNumElements(); 1459 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 1460 1461 if (SVN->isSplat()) { 1462 int Lane = SVN->getSplatIndex(); 1463 if (Lane >= 0) { 1464 MVT ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( 1465 DAG, VT, Subtarget); 1466 1467 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 1468 assert(Lane < (int)NumElts && "Unexpected lane!"); 1469 1470 SDValue Mask, VL; 1471 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1472 SDValue Gather = 1473 DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1, 1474 DAG.getConstant(Lane, DL, XLenVT), Mask, VL); 1475 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 1476 } 1477 } 1478 1479 // Detect shuffles which can be re-expressed as vector selects. 1480 SmallVector<SDValue> MaskVals; 1481 // By default we preserve the original operand order, and select LHS as true 1482 // and RHS as false. However, since RVV vector selects may feature splats but 1483 // only on the LHS, we may choose to invert our mask and instead select 1484 // between RHS and LHS. 1485 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1); 1486 1487 bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) { 1488 int MaskIndex = MaskIdx.value(); 1489 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps; 1490 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); 1491 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts; 1492 }); 1493 1494 if (IsSelect) { 1495 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); 1496 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); 1497 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); 1498 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SwapOps ? V2 : V1, 1499 SwapOps ? V1 : V2); 1500 } 1501 1502 return SDValue(); 1503 } 1504 1505 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT, 1506 SDLoc DL, SelectionDAG &DAG, 1507 const RISCVSubtarget &Subtarget) { 1508 if (VT.isScalableVector()) 1509 return DAG.getFPExtendOrRound(Op, DL, VT); 1510 assert(VT.isFixedLengthVector() && 1511 "Unexpected value type for RVV FP extend/round lowering"); 1512 SDValue Mask, VL; 1513 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1514 unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType()) 1515 ? RISCVISD::FP_EXTEND_VL 1516 : RISCVISD::FP_ROUND_VL; 1517 return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL); 1518 } 1519 1520 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 1521 SelectionDAG &DAG) const { 1522 switch (Op.getOpcode()) { 1523 default: 1524 report_fatal_error("unimplemented operand"); 1525 case ISD::GlobalAddress: 1526 return lowerGlobalAddress(Op, DAG); 1527 case ISD::BlockAddress: 1528 return lowerBlockAddress(Op, DAG); 1529 case ISD::ConstantPool: 1530 return lowerConstantPool(Op, DAG); 1531 case ISD::JumpTable: 1532 return lowerJumpTable(Op, DAG); 1533 case ISD::GlobalTLSAddress: 1534 return lowerGlobalTLSAddress(Op, DAG); 1535 case ISD::SELECT: 1536 return lowerSELECT(Op, DAG); 1537 case ISD::BRCOND: 1538 return lowerBRCOND(Op, DAG); 1539 case ISD::VASTART: 1540 return lowerVASTART(Op, DAG); 1541 case ISD::FRAMEADDR: 1542 return lowerFRAMEADDR(Op, DAG); 1543 case ISD::RETURNADDR: 1544 return lowerRETURNADDR(Op, DAG); 1545 case ISD::SHL_PARTS: 1546 return lowerShiftLeftParts(Op, DAG); 1547 case ISD::SRA_PARTS: 1548 return lowerShiftRightParts(Op, DAG, true); 1549 case ISD::SRL_PARTS: 1550 return lowerShiftRightParts(Op, DAG, false); 1551 case ISD::BITCAST: { 1552 SDLoc DL(Op); 1553 EVT VT = Op.getValueType(); 1554 SDValue Op0 = Op.getOperand(0); 1555 EVT Op0VT = Op0.getValueType(); 1556 MVT XLenVT = Subtarget.getXLenVT(); 1557 if (VT.isFixedLengthVector()) { 1558 // We can handle fixed length vector bitcasts with a simple replacement 1559 // in isel. 1560 if (Op0VT.isFixedLengthVector()) 1561 return Op; 1562 // When bitcasting from scalar to fixed-length vector, insert the scalar 1563 // into a one-element vector of the result type, and perform a vector 1564 // bitcast. 1565 if (!Op0VT.isVector()) { 1566 auto BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1); 1567 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT, 1568 DAG.getUNDEF(BVT), Op0, 1569 DAG.getConstant(0, DL, XLenVT))); 1570 } 1571 return SDValue(); 1572 } 1573 // Custom-legalize bitcasts from fixed-length vector types to scalar types 1574 // thus: bitcast the vector to a one-element vector type whose element type 1575 // is the same as the result type, and extract the first element. 1576 if (!VT.isVector() && Op0VT.isFixedLengthVector()) { 1577 LLVMContext &Context = *DAG.getContext(); 1578 SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0); 1579 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, 1580 DAG.getConstant(0, DL, XLenVT)); 1581 } 1582 if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) { 1583 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); 1584 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 1585 return FPConv; 1586 } 1587 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() && 1588 Subtarget.hasStdExtF()) { 1589 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 1590 SDValue FPConv = 1591 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 1592 return FPConv; 1593 } 1594 return SDValue(); 1595 } 1596 case ISD::INTRINSIC_WO_CHAIN: 1597 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 1598 case ISD::INTRINSIC_W_CHAIN: 1599 return LowerINTRINSIC_W_CHAIN(Op, DAG); 1600 case ISD::BSWAP: 1601 case ISD::BITREVERSE: { 1602 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. 1603 assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 1604 MVT VT = Op.getSimpleValueType(); 1605 SDLoc DL(Op); 1606 // Start with the maximum immediate value which is the bitwidth - 1. 1607 unsigned Imm = VT.getSizeInBits() - 1; 1608 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. 1609 if (Op.getOpcode() == ISD::BSWAP) 1610 Imm &= ~0x7U; 1611 return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0), 1612 DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT())); 1613 } 1614 case ISD::FSHL: 1615 case ISD::FSHR: { 1616 MVT VT = Op.getSimpleValueType(); 1617 assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization"); 1618 SDLoc DL(Op); 1619 if (Op.getOperand(2).getOpcode() == ISD::Constant) 1620 return Op; 1621 // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only 1622 // use log(XLen) bits. Mask the shift amount accordingly. 1623 unsigned ShAmtWidth = Subtarget.getXLen() - 1; 1624 SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2), 1625 DAG.getConstant(ShAmtWidth, DL, VT)); 1626 unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR; 1627 return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt); 1628 } 1629 case ISD::TRUNCATE: { 1630 SDLoc DL(Op); 1631 MVT VT = Op.getSimpleValueType(); 1632 // Only custom-lower vector truncates 1633 if (!VT.isVector()) 1634 return Op; 1635 1636 // Truncates to mask types are handled differently 1637 if (VT.getVectorElementType() == MVT::i1) 1638 return lowerVectorMaskTrunc(Op, DAG); 1639 1640 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 1641 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which 1642 // truncate by one power of two at a time. 1643 MVT DstEltVT = VT.getVectorElementType(); 1644 1645 SDValue Src = Op.getOperand(0); 1646 MVT SrcVT = Src.getSimpleValueType(); 1647 MVT SrcEltVT = SrcVT.getVectorElementType(); 1648 1649 assert(DstEltVT.bitsLT(SrcEltVT) && 1650 isPowerOf2_64(DstEltVT.getSizeInBits()) && 1651 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 1652 "Unexpected vector truncate lowering"); 1653 1654 MVT ContainerVT = SrcVT; 1655 if (SrcVT.isFixedLengthVector()) { 1656 ContainerVT = getContainerForFixedLengthVector(SrcVT); 1657 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 1658 } 1659 1660 SDValue Result = Src; 1661 SDValue Mask, VL; 1662 std::tie(Mask, VL) = 1663 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 1664 LLVMContext &Context = *DAG.getContext(); 1665 const ElementCount Count = ContainerVT.getVectorElementCount(); 1666 do { 1667 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); 1668 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 1669 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, 1670 Mask, VL); 1671 } while (SrcEltVT != DstEltVT); 1672 1673 if (SrcVT.isFixedLengthVector()) 1674 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 1675 1676 return Result; 1677 } 1678 case ISD::ANY_EXTEND: 1679 case ISD::ZERO_EXTEND: 1680 if (Op.getOperand(0).getValueType().isVector() && 1681 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 1682 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 1683 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL); 1684 case ISD::SIGN_EXTEND: 1685 if (Op.getOperand(0).getValueType().isVector() && 1686 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 1687 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 1688 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); 1689 case ISD::SPLAT_VECTOR_PARTS: 1690 return lowerSPLAT_VECTOR_PARTS(Op, DAG); 1691 case ISD::INSERT_VECTOR_ELT: 1692 return lowerINSERT_VECTOR_ELT(Op, DAG); 1693 case ISD::EXTRACT_VECTOR_ELT: 1694 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 1695 case ISD::VSCALE: { 1696 MVT VT = Op.getSimpleValueType(); 1697 SDLoc DL(Op); 1698 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); 1699 // We define our scalable vector types for lmul=1 to use a 64 bit known 1700 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 1701 // vscale as VLENB / 8. 1702 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, 1703 DAG.getConstant(3, DL, VT)); 1704 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); 1705 } 1706 case ISD::FP_EXTEND: { 1707 // RVV can only do fp_extend to types double the size as the source. We 1708 // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going 1709 // via f32. 1710 SDLoc DL(Op); 1711 MVT VT = Op.getSimpleValueType(); 1712 SDValue Src = Op.getOperand(0); 1713 MVT SrcVT = Src.getSimpleValueType(); 1714 1715 // Prepare any fixed-length vector operands. 1716 MVT ContainerVT = VT; 1717 if (SrcVT.isFixedLengthVector()) { 1718 ContainerVT = getContainerForFixedLengthVector(VT); 1719 MVT SrcContainerVT = 1720 ContainerVT.changeVectorElementType(SrcVT.getVectorElementType()); 1721 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1722 } 1723 1724 if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 || 1725 SrcVT.getVectorElementType() != MVT::f16) { 1726 // For scalable vectors, we only need to close the gap between 1727 // vXf16->vXf64. 1728 if (!VT.isFixedLengthVector()) 1729 return Op; 1730 // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version. 1731 Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); 1732 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1733 } 1734 1735 MVT InterVT = VT.changeVectorElementType(MVT::f32); 1736 MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32); 1737 SDValue IntermediateExtend = getRVVFPExtendOrRound( 1738 Src, InterVT, InterContainerVT, DL, DAG, Subtarget); 1739 1740 SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT, 1741 DL, DAG, Subtarget); 1742 if (VT.isFixedLengthVector()) 1743 return convertFromScalableVector(VT, Extend, DAG, Subtarget); 1744 return Extend; 1745 } 1746 case ISD::FP_ROUND: { 1747 // RVV can only do fp_round to types half the size as the source. We 1748 // custom-lower f64->f16 rounds via RVV's round-to-odd float 1749 // conversion instruction. 1750 SDLoc DL(Op); 1751 MVT VT = Op.getSimpleValueType(); 1752 SDValue Src = Op.getOperand(0); 1753 MVT SrcVT = Src.getSimpleValueType(); 1754 1755 // Prepare any fixed-length vector operands. 1756 MVT ContainerVT = VT; 1757 if (VT.isFixedLengthVector()) { 1758 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 1759 ContainerVT = 1760 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 1761 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1762 } 1763 1764 if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || 1765 SrcVT.getVectorElementType() != MVT::f64) { 1766 // For scalable vectors, we only need to close the gap between 1767 // vXf64<->vXf16. 1768 if (!VT.isFixedLengthVector()) 1769 return Op; 1770 // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version. 1771 Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); 1772 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1773 } 1774 1775 SDValue Mask, VL; 1776 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1777 1778 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); 1779 SDValue IntermediateRound = 1780 DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL); 1781 SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT, 1782 DL, DAG, Subtarget); 1783 1784 if (VT.isFixedLengthVector()) 1785 return convertFromScalableVector(VT, Round, DAG, Subtarget); 1786 return Round; 1787 } 1788 case ISD::FP_TO_SINT: 1789 case ISD::FP_TO_UINT: 1790 case ISD::SINT_TO_FP: 1791 case ISD::UINT_TO_FP: { 1792 // RVV can only do fp<->int conversions to types half/double the size as 1793 // the source. We custom-lower any conversions that do two hops into 1794 // sequences. 1795 MVT VT = Op.getSimpleValueType(); 1796 if (!VT.isVector()) 1797 return Op; 1798 SDLoc DL(Op); 1799 SDValue Src = Op.getOperand(0); 1800 MVT EltVT = VT.getVectorElementType(); 1801 MVT SrcVT = Src.getSimpleValueType(); 1802 MVT SrcEltVT = SrcVT.getVectorElementType(); 1803 unsigned EltSize = EltVT.getSizeInBits(); 1804 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 1805 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 1806 "Unexpected vector element types"); 1807 1808 bool IsInt2FP = SrcEltVT.isInteger(); 1809 // Widening conversions 1810 if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) { 1811 if (IsInt2FP) { 1812 // Do a regular integer sign/zero extension then convert to float. 1813 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()), 1814 VT.getVectorElementCount()); 1815 unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP 1816 ? ISD::ZERO_EXTEND 1817 : ISD::SIGN_EXTEND; 1818 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 1819 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 1820 } 1821 // FP2Int 1822 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 1823 // Do one doubling fp_extend then complete the operation by converting 1824 // to int. 1825 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1826 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 1827 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 1828 } 1829 1830 // Narrowing conversions 1831 if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) { 1832 if (IsInt2FP) { 1833 // One narrowing int_to_fp, then an fp_round. 1834 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 1835 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1836 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 1837 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 1838 } 1839 // FP2Int 1840 // One narrowing fp_to_int, then truncate the integer. If the float isn't 1841 // representable by the integer, the result is poison. 1842 MVT IVecVT = 1843 MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2), 1844 VT.getVectorElementCount()); 1845 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 1846 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 1847 } 1848 1849 // Scalable vectors can exit here. Patterns will handle equally-sized 1850 // conversions halving/doubling ones. 1851 if (!VT.isFixedLengthVector()) 1852 return Op; 1853 1854 // For fixed-length vectors we lower to a custom "VL" node. 1855 unsigned RVVOpc = 0; 1856 switch (Op.getOpcode()) { 1857 default: 1858 llvm_unreachable("Impossible opcode"); 1859 case ISD::FP_TO_SINT: 1860 RVVOpc = RISCVISD::FP_TO_SINT_VL; 1861 break; 1862 case ISD::FP_TO_UINT: 1863 RVVOpc = RISCVISD::FP_TO_UINT_VL; 1864 break; 1865 case ISD::SINT_TO_FP: 1866 RVVOpc = RISCVISD::SINT_TO_FP_VL; 1867 break; 1868 case ISD::UINT_TO_FP: 1869 RVVOpc = RISCVISD::UINT_TO_FP_VL; 1870 break; 1871 } 1872 1873 MVT ContainerVT, SrcContainerVT; 1874 // Derive the reference container type from the larger vector type. 1875 if (SrcEltSize > EltSize) { 1876 SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 1877 ContainerVT = 1878 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 1879 } else { 1880 ContainerVT = getContainerForFixedLengthVector(VT); 1881 SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT); 1882 } 1883 1884 SDValue Mask, VL; 1885 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1886 1887 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1888 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL); 1889 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1890 } 1891 case ISD::VECREDUCE_ADD: 1892 case ISD::VECREDUCE_UMAX: 1893 case ISD::VECREDUCE_SMAX: 1894 case ISD::VECREDUCE_UMIN: 1895 case ISD::VECREDUCE_SMIN: 1896 case ISD::VECREDUCE_AND: 1897 case ISD::VECREDUCE_OR: 1898 case ISD::VECREDUCE_XOR: 1899 return lowerVECREDUCE(Op, DAG); 1900 case ISD::VECREDUCE_FADD: 1901 case ISD::VECREDUCE_SEQ_FADD: 1902 return lowerFPVECREDUCE(Op, DAG); 1903 case ISD::INSERT_SUBVECTOR: 1904 return lowerINSERT_SUBVECTOR(Op, DAG); 1905 case ISD::EXTRACT_SUBVECTOR: 1906 return lowerEXTRACT_SUBVECTOR(Op, DAG); 1907 case ISD::STEP_VECTOR: 1908 return lowerSTEP_VECTOR(Op, DAG); 1909 case ISD::VECTOR_REVERSE: 1910 return lowerVECTOR_REVERSE(Op, DAG); 1911 case ISD::BUILD_VECTOR: 1912 return lowerBUILD_VECTOR(Op, DAG, Subtarget); 1913 case ISD::VECTOR_SHUFFLE: 1914 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); 1915 case ISD::CONCAT_VECTORS: { 1916 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is 1917 // better than going through the stack, as the default expansion does. 1918 SDLoc DL(Op); 1919 MVT VT = Op.getSimpleValueType(); 1920 unsigned NumOpElts = 1921 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements(); 1922 SDValue Vec = DAG.getUNDEF(VT); 1923 for (const auto &OpIdx : enumerate(Op->ops())) 1924 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(), 1925 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL)); 1926 return Vec; 1927 } 1928 case ISD::LOAD: 1929 return lowerFixedLengthVectorLoadToRVV(Op, DAG); 1930 case ISD::STORE: 1931 return lowerFixedLengthVectorStoreToRVV(Op, DAG); 1932 case ISD::MLOAD: 1933 return lowerMLOAD(Op, DAG); 1934 case ISD::MSTORE: 1935 return lowerMSTORE(Op, DAG); 1936 case ISD::SETCC: 1937 return lowerFixedLengthVectorSetccToRVV(Op, DAG); 1938 case ISD::ADD: 1939 return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL); 1940 case ISD::SUB: 1941 return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL); 1942 case ISD::MUL: 1943 return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL); 1944 case ISD::MULHS: 1945 return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL); 1946 case ISD::MULHU: 1947 return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL); 1948 case ISD::AND: 1949 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL, 1950 RISCVISD::AND_VL); 1951 case ISD::OR: 1952 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL, 1953 RISCVISD::OR_VL); 1954 case ISD::XOR: 1955 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL, 1956 RISCVISD::XOR_VL); 1957 case ISD::SDIV: 1958 return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL); 1959 case ISD::SREM: 1960 return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL); 1961 case ISD::UDIV: 1962 return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL); 1963 case ISD::UREM: 1964 return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL); 1965 case ISD::SHL: 1966 return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL); 1967 case ISD::SRA: 1968 return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL); 1969 case ISD::SRL: 1970 return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL); 1971 case ISD::FADD: 1972 return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL); 1973 case ISD::FSUB: 1974 return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL); 1975 case ISD::FMUL: 1976 return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL); 1977 case ISD::FDIV: 1978 return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL); 1979 case ISD::FNEG: 1980 return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL); 1981 case ISD::FABS: 1982 return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL); 1983 case ISD::FSQRT: 1984 return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL); 1985 case ISD::FMA: 1986 return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL); 1987 case ISD::SMIN: 1988 return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL); 1989 case ISD::SMAX: 1990 return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL); 1991 case ISD::UMIN: 1992 return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL); 1993 case ISD::UMAX: 1994 return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL); 1995 case ISD::ABS: 1996 return lowerABS(Op, DAG); 1997 case ISD::VSELECT: 1998 return lowerFixedLengthVectorSelectToRVV(Op, DAG); 1999 case ISD::FCOPYSIGN: 2000 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); 2001 case ISD::MGATHER: 2002 return lowerMGATHER(Op, DAG); 2003 case ISD::MSCATTER: 2004 return lowerMSCATTER(Op, DAG); 2005 } 2006 } 2007 2008 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 2009 SelectionDAG &DAG, unsigned Flags) { 2010 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 2011 } 2012 2013 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 2014 SelectionDAG &DAG, unsigned Flags) { 2015 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 2016 Flags); 2017 } 2018 2019 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 2020 SelectionDAG &DAG, unsigned Flags) { 2021 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 2022 N->getOffset(), Flags); 2023 } 2024 2025 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 2026 SelectionDAG &DAG, unsigned Flags) { 2027 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 2028 } 2029 2030 template <class NodeTy> 2031 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 2032 bool IsLocal) const { 2033 SDLoc DL(N); 2034 EVT Ty = getPointerTy(DAG.getDataLayout()); 2035 2036 if (isPositionIndependent()) { 2037 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 2038 if (IsLocal) 2039 // Use PC-relative addressing to access the symbol. This generates the 2040 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 2041 // %pcrel_lo(auipc)). 2042 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 2043 2044 // Use PC-relative addressing to access the GOT for this symbol, then load 2045 // the address from the GOT. This generates the pattern (PseudoLA sym), 2046 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 2047 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 2048 } 2049 2050 switch (getTargetMachine().getCodeModel()) { 2051 default: 2052 report_fatal_error("Unsupported code model for lowering"); 2053 case CodeModel::Small: { 2054 // Generate a sequence for accessing addresses within the first 2 GiB of 2055 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 2056 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 2057 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 2058 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 2059 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 2060 } 2061 case CodeModel::Medium: { 2062 // Generate a sequence for accessing addresses within any 2GiB range within 2063 // the address space. This generates the pattern (PseudoLLA sym), which 2064 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 2065 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 2066 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 2067 } 2068 } 2069 } 2070 2071 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 2072 SelectionDAG &DAG) const { 2073 SDLoc DL(Op); 2074 EVT Ty = Op.getValueType(); 2075 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 2076 int64_t Offset = N->getOffset(); 2077 MVT XLenVT = Subtarget.getXLenVT(); 2078 2079 const GlobalValue *GV = N->getGlobal(); 2080 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 2081 SDValue Addr = getAddr(N, DAG, IsLocal); 2082 2083 // In order to maximise the opportunity for common subexpression elimination, 2084 // emit a separate ADD node for the global address offset instead of folding 2085 // it in the global address node. Later peephole optimisations may choose to 2086 // fold it back in when profitable. 2087 if (Offset != 0) 2088 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 2089 DAG.getConstant(Offset, DL, XLenVT)); 2090 return Addr; 2091 } 2092 2093 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 2094 SelectionDAG &DAG) const { 2095 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 2096 2097 return getAddr(N, DAG); 2098 } 2099 2100 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 2101 SelectionDAG &DAG) const { 2102 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 2103 2104 return getAddr(N, DAG); 2105 } 2106 2107 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 2108 SelectionDAG &DAG) const { 2109 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 2110 2111 return getAddr(N, DAG); 2112 } 2113 2114 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 2115 SelectionDAG &DAG, 2116 bool UseGOT) const { 2117 SDLoc DL(N); 2118 EVT Ty = getPointerTy(DAG.getDataLayout()); 2119 const GlobalValue *GV = N->getGlobal(); 2120 MVT XLenVT = Subtarget.getXLenVT(); 2121 2122 if (UseGOT) { 2123 // Use PC-relative addressing to access the GOT for this TLS symbol, then 2124 // load the address from the GOT and add the thread pointer. This generates 2125 // the pattern (PseudoLA_TLS_IE sym), which expands to 2126 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 2127 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 2128 SDValue Load = 2129 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 2130 2131 // Add the thread pointer. 2132 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 2133 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 2134 } 2135 2136 // Generate a sequence for accessing the address relative to the thread 2137 // pointer, with the appropriate adjustment for the thread pointer offset. 2138 // This generates the pattern 2139 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 2140 SDValue AddrHi = 2141 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 2142 SDValue AddrAdd = 2143 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 2144 SDValue AddrLo = 2145 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 2146 2147 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 2148 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 2149 SDValue MNAdd = SDValue( 2150 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 2151 0); 2152 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 2153 } 2154 2155 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 2156 SelectionDAG &DAG) const { 2157 SDLoc DL(N); 2158 EVT Ty = getPointerTy(DAG.getDataLayout()); 2159 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 2160 const GlobalValue *GV = N->getGlobal(); 2161 2162 // Use a PC-relative addressing mode to access the global dynamic GOT address. 2163 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 2164 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 2165 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 2166 SDValue Load = 2167 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 2168 2169 // Prepare argument list to generate call. 2170 ArgListTy Args; 2171 ArgListEntry Entry; 2172 Entry.Node = Load; 2173 Entry.Ty = CallTy; 2174 Args.push_back(Entry); 2175 2176 // Setup call to __tls_get_addr. 2177 TargetLowering::CallLoweringInfo CLI(DAG); 2178 CLI.setDebugLoc(DL) 2179 .setChain(DAG.getEntryNode()) 2180 .setLibCallee(CallingConv::C, CallTy, 2181 DAG.getExternalSymbol("__tls_get_addr", Ty), 2182 std::move(Args)); 2183 2184 return LowerCallTo(CLI).first; 2185 } 2186 2187 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 2188 SelectionDAG &DAG) const { 2189 SDLoc DL(Op); 2190 EVT Ty = Op.getValueType(); 2191 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 2192 int64_t Offset = N->getOffset(); 2193 MVT XLenVT = Subtarget.getXLenVT(); 2194 2195 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 2196 2197 if (DAG.getMachineFunction().getFunction().getCallingConv() == 2198 CallingConv::GHC) 2199 report_fatal_error("In GHC calling convention TLS is not supported"); 2200 2201 SDValue Addr; 2202 switch (Model) { 2203 case TLSModel::LocalExec: 2204 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 2205 break; 2206 case TLSModel::InitialExec: 2207 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 2208 break; 2209 case TLSModel::LocalDynamic: 2210 case TLSModel::GeneralDynamic: 2211 Addr = getDynamicTLSAddr(N, DAG); 2212 break; 2213 } 2214 2215 // In order to maximise the opportunity for common subexpression elimination, 2216 // emit a separate ADD node for the global address offset instead of folding 2217 // it in the global address node. Later peephole optimisations may choose to 2218 // fold it back in when profitable. 2219 if (Offset != 0) 2220 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 2221 DAG.getConstant(Offset, DL, XLenVT)); 2222 return Addr; 2223 } 2224 2225 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 2226 SDValue CondV = Op.getOperand(0); 2227 SDValue TrueV = Op.getOperand(1); 2228 SDValue FalseV = Op.getOperand(2); 2229 SDLoc DL(Op); 2230 MVT XLenVT = Subtarget.getXLenVT(); 2231 2232 // If the result type is XLenVT and CondV is the output of a SETCC node 2233 // which also operated on XLenVT inputs, then merge the SETCC node into the 2234 // lowered RISCVISD::SELECT_CC to take advantage of the integer 2235 // compare+branch instructions. i.e.: 2236 // (select (setcc lhs, rhs, cc), truev, falsev) 2237 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 2238 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 2239 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 2240 SDValue LHS = CondV.getOperand(0); 2241 SDValue RHS = CondV.getOperand(1); 2242 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 2243 ISD::CondCode CCVal = CC->get(); 2244 2245 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 2246 2247 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 2248 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 2249 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 2250 } 2251 2252 // Otherwise: 2253 // (select condv, truev, falsev) 2254 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 2255 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 2256 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 2257 2258 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 2259 2260 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 2261 } 2262 2263 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { 2264 SDValue CondV = Op.getOperand(1); 2265 SDLoc DL(Op); 2266 MVT XLenVT = Subtarget.getXLenVT(); 2267 2268 if (CondV.getOpcode() == ISD::SETCC && 2269 CondV.getOperand(0).getValueType() == XLenVT) { 2270 SDValue LHS = CondV.getOperand(0); 2271 SDValue RHS = CondV.getOperand(1); 2272 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 2273 2274 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 2275 2276 SDValue TargetCC = DAG.getCondCode(CCVal); 2277 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 2278 LHS, RHS, TargetCC, Op.getOperand(2)); 2279 } 2280 2281 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 2282 CondV, DAG.getConstant(0, DL, XLenVT), 2283 DAG.getCondCode(ISD::SETNE), Op.getOperand(2)); 2284 } 2285 2286 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 2287 MachineFunction &MF = DAG.getMachineFunction(); 2288 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 2289 2290 SDLoc DL(Op); 2291 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 2292 getPointerTy(MF.getDataLayout())); 2293 2294 // vastart just stores the address of the VarArgsFrameIndex slot into the 2295 // memory location argument. 2296 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2297 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 2298 MachinePointerInfo(SV)); 2299 } 2300 2301 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 2302 SelectionDAG &DAG) const { 2303 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 2304 MachineFunction &MF = DAG.getMachineFunction(); 2305 MachineFrameInfo &MFI = MF.getFrameInfo(); 2306 MFI.setFrameAddressIsTaken(true); 2307 Register FrameReg = RI.getFrameRegister(MF); 2308 int XLenInBytes = Subtarget.getXLen() / 8; 2309 2310 EVT VT = Op.getValueType(); 2311 SDLoc DL(Op); 2312 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 2313 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2314 while (Depth--) { 2315 int Offset = -(XLenInBytes * 2); 2316 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 2317 DAG.getIntPtrConstant(Offset, DL)); 2318 FrameAddr = 2319 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 2320 } 2321 return FrameAddr; 2322 } 2323 2324 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 2325 SelectionDAG &DAG) const { 2326 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 2327 MachineFunction &MF = DAG.getMachineFunction(); 2328 MachineFrameInfo &MFI = MF.getFrameInfo(); 2329 MFI.setReturnAddressIsTaken(true); 2330 MVT XLenVT = Subtarget.getXLenVT(); 2331 int XLenInBytes = Subtarget.getXLen() / 8; 2332 2333 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 2334 return SDValue(); 2335 2336 EVT VT = Op.getValueType(); 2337 SDLoc DL(Op); 2338 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2339 if (Depth) { 2340 int Off = -XLenInBytes; 2341 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 2342 SDValue Offset = DAG.getConstant(Off, DL, VT); 2343 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 2344 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 2345 MachinePointerInfo()); 2346 } 2347 2348 // Return the value of the return address register, marking it an implicit 2349 // live-in. 2350 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 2351 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 2352 } 2353 2354 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 2355 SelectionDAG &DAG) const { 2356 SDLoc DL(Op); 2357 SDValue Lo = Op.getOperand(0); 2358 SDValue Hi = Op.getOperand(1); 2359 SDValue Shamt = Op.getOperand(2); 2360 EVT VT = Lo.getValueType(); 2361 2362 // if Shamt-XLEN < 0: // Shamt < XLEN 2363 // Lo = Lo << Shamt 2364 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 2365 // else: 2366 // Lo = 0 2367 // Hi = Lo << (Shamt-XLEN) 2368 2369 SDValue Zero = DAG.getConstant(0, DL, VT); 2370 SDValue One = DAG.getConstant(1, DL, VT); 2371 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 2372 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 2373 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 2374 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 2375 2376 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 2377 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 2378 SDValue ShiftRightLo = 2379 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 2380 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 2381 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 2382 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 2383 2384 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 2385 2386 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 2387 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 2388 2389 SDValue Parts[2] = {Lo, Hi}; 2390 return DAG.getMergeValues(Parts, DL); 2391 } 2392 2393 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 2394 bool IsSRA) const { 2395 SDLoc DL(Op); 2396 SDValue Lo = Op.getOperand(0); 2397 SDValue Hi = Op.getOperand(1); 2398 SDValue Shamt = Op.getOperand(2); 2399 EVT VT = Lo.getValueType(); 2400 2401 // SRA expansion: 2402 // if Shamt-XLEN < 0: // Shamt < XLEN 2403 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 2404 // Hi = Hi >>s Shamt 2405 // else: 2406 // Lo = Hi >>s (Shamt-XLEN); 2407 // Hi = Hi >>s (XLEN-1) 2408 // 2409 // SRL expansion: 2410 // if Shamt-XLEN < 0: // Shamt < XLEN 2411 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 2412 // Hi = Hi >>u Shamt 2413 // else: 2414 // Lo = Hi >>u (Shamt-XLEN); 2415 // Hi = 0; 2416 2417 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 2418 2419 SDValue Zero = DAG.getConstant(0, DL, VT); 2420 SDValue One = DAG.getConstant(1, DL, VT); 2421 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 2422 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 2423 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 2424 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 2425 2426 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 2427 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 2428 SDValue ShiftLeftHi = 2429 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 2430 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 2431 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 2432 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 2433 SDValue HiFalse = 2434 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 2435 2436 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 2437 2438 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 2439 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 2440 2441 SDValue Parts[2] = {Lo, Hi}; 2442 return DAG.getMergeValues(Parts, DL); 2443 } 2444 2445 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is 2446 // illegal (currently only vXi64 RV32). 2447 // FIXME: We could also catch non-constant sign-extended i32 values and lower 2448 // them to SPLAT_VECTOR_I64 2449 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op, 2450 SelectionDAG &DAG) const { 2451 SDLoc DL(Op); 2452 EVT VecVT = Op.getValueType(); 2453 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 2454 "Unexpected SPLAT_VECTOR_PARTS lowering"); 2455 2456 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!"); 2457 SDValue Lo = Op.getOperand(0); 2458 SDValue Hi = Op.getOperand(1); 2459 2460 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) { 2461 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue(); 2462 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue(); 2463 // If Hi constant is all the same sign bit as Lo, lower this as a custom 2464 // node in order to try and match RVV vector/scalar instructions. 2465 if ((LoC >> 31) == HiC) 2466 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 2467 } 2468 2469 // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not 2470 // to accidentally sign-extend the 32-bit halves to the e64 SEW: 2471 // vmv.v.x vX, hi 2472 // vsll.vx vX, vX, /*32*/ 2473 // vmv.v.x vY, lo 2474 // vsll.vx vY, vY, /*32*/ 2475 // vsrl.vx vY, vY, /*32*/ 2476 // vor.vv vX, vX, vY 2477 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT); 2478 2479 Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 2480 Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV); 2481 Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV); 2482 2483 if (isNullConstant(Hi)) 2484 return Lo; 2485 2486 Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi); 2487 Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV); 2488 2489 return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi); 2490 } 2491 2492 // Custom-lower extensions from mask vectors by using a vselect either with 1 2493 // for zero/any-extension or -1 for sign-extension: 2494 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 2495 // Note that any-extension is lowered identically to zero-extension. 2496 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 2497 int64_t ExtTrueVal) const { 2498 SDLoc DL(Op); 2499 MVT VecVT = Op.getSimpleValueType(); 2500 SDValue Src = Op.getOperand(0); 2501 // Only custom-lower extensions from mask types 2502 assert(Src.getValueType().isVector() && 2503 Src.getValueType().getVectorElementType() == MVT::i1); 2504 2505 MVT XLenVT = Subtarget.getXLenVT(); 2506 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 2507 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT); 2508 2509 if (VecVT.isScalableVector()) { 2510 // Be careful not to introduce illegal scalar types at this stage, and be 2511 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 2512 // illegal and must be expanded. Since we know that the constants are 2513 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 2514 bool IsRV32E64 = 2515 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 2516 2517 if (!IsRV32E64) { 2518 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 2519 SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal); 2520 } else { 2521 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 2522 SplatTrueVal = 2523 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal); 2524 } 2525 2526 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 2527 } 2528 2529 MVT ContainerVT = getContainerForFixedLengthVector(VecVT); 2530 MVT I1ContainerVT = 2531 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2532 2533 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget); 2534 2535 SDValue Mask, VL; 2536 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2537 2538 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL); 2539 SplatTrueVal = 2540 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL); 2541 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, 2542 SplatTrueVal, SplatZero, VL); 2543 2544 return convertFromScalableVector(VecVT, Select, DAG, Subtarget); 2545 } 2546 2547 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( 2548 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { 2549 MVT ExtVT = Op.getSimpleValueType(); 2550 // Only custom-lower extensions from fixed-length vector types. 2551 if (!ExtVT.isFixedLengthVector()) 2552 return Op; 2553 MVT VT = Op.getOperand(0).getSimpleValueType(); 2554 // Grab the canonical container type for the extended type. Infer the smaller 2555 // type from that to ensure the same number of vector elements, as we know 2556 // the LMUL will be sufficient to hold the smaller type. 2557 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT); 2558 // Get the extended container type manually to ensure the same number of 2559 // vector elements between source and dest. 2560 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), 2561 ContainerExtVT.getVectorElementCount()); 2562 2563 SDValue Op1 = 2564 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 2565 2566 SDLoc DL(Op); 2567 SDValue Mask, VL; 2568 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2569 2570 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL); 2571 2572 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget); 2573 } 2574 2575 // Custom-lower truncations from vectors to mask vectors by using a mask and a 2576 // setcc operation: 2577 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 2578 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, 2579 SelectionDAG &DAG) const { 2580 SDLoc DL(Op); 2581 EVT MaskVT = Op.getValueType(); 2582 // Only expect to custom-lower truncations to mask types 2583 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 2584 "Unexpected type for vector mask lowering"); 2585 SDValue Src = Op.getOperand(0); 2586 MVT VecVT = Src.getSimpleValueType(); 2587 2588 // If this is a fixed vector, we need to convert it to a scalable vector. 2589 MVT ContainerVT = VecVT; 2590 if (VecVT.isFixedLengthVector()) { 2591 ContainerVT = getContainerForFixedLengthVector(VecVT); 2592 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 2593 } 2594 2595 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 2596 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 2597 2598 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne); 2599 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero); 2600 2601 if (VecVT.isScalableVector()) { 2602 SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); 2603 return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); 2604 } 2605 2606 SDValue Mask, VL; 2607 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2608 2609 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 2610 SDValue Trunc = 2611 DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL); 2612 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero, 2613 DAG.getCondCode(ISD::SETNE), Mask, VL); 2614 return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); 2615 } 2616 2617 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the 2618 // first position of a vector, and that vector is slid up to the insert index. 2619 // By limiting the active vector length to index+1 and merging with the 2620 // original vector (with an undisturbed tail policy for elements >= VL), we 2621 // achieve the desired result of leaving all elements untouched except the one 2622 // at VL-1, which is replaced with the desired value. 2623 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 2624 SelectionDAG &DAG) const { 2625 SDLoc DL(Op); 2626 MVT VecVT = Op.getSimpleValueType(); 2627 SDValue Vec = Op.getOperand(0); 2628 SDValue Val = Op.getOperand(1); 2629 SDValue Idx = Op.getOperand(2); 2630 2631 MVT ContainerVT = VecVT; 2632 // If the operand is a fixed-length vector, convert to a scalable one. 2633 if (VecVT.isFixedLengthVector()) { 2634 ContainerVT = getContainerForFixedLengthVector(VecVT); 2635 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2636 } 2637 2638 MVT XLenVT = Subtarget.getXLenVT(); 2639 2640 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 2641 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64; 2642 // Even i64-element vectors on RV32 can be lowered without scalar 2643 // legalization if the most-significant 32 bits of the value are not affected 2644 // by the sign-extension of the lower 32 bits. 2645 // TODO: We could also catch sign extensions of a 32-bit value. 2646 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) { 2647 const auto *CVal = cast<ConstantSDNode>(Val); 2648 if (isInt<32>(CVal->getSExtValue())) { 2649 IsLegalInsert = true; 2650 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); 2651 } 2652 } 2653 2654 SDValue Mask, VL; 2655 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2656 2657 SDValue ValInVec; 2658 2659 if (IsLegalInsert) { 2660 unsigned Opc = 2661 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL; 2662 if (isNullConstant(Idx)) { 2663 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL); 2664 if (!VecVT.isFixedLengthVector()) 2665 return Vec; 2666 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget); 2667 } 2668 ValInVec = 2669 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL); 2670 } else { 2671 // On RV32, i64-element vectors must be specially handled to place the 2672 // value at element 0, by using two vslide1up instructions in sequence on 2673 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for 2674 // this. 2675 SDValue One = DAG.getConstant(1, DL, XLenVT); 2676 SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero); 2677 SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One); 2678 MVT I32ContainerVT = 2679 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2); 2680 SDValue I32Mask = 2681 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first; 2682 // Limit the active VL to two. 2683 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT); 2684 // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied 2685 // undef doesn't obey the earlyclobber constraint. Just splat a zero value. 2686 ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero, 2687 InsertI64VL); 2688 // First slide in the hi value, then the lo in underneath it. 2689 ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec, 2690 ValHi, I32Mask, InsertI64VL); 2691 ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec, 2692 ValLo, I32Mask, InsertI64VL); 2693 // Bitcast back to the right container type. 2694 ValInVec = DAG.getBitcast(ContainerVT, ValInVec); 2695 } 2696 2697 // Now that the value is in a vector, slide it into position. 2698 SDValue InsertVL = 2699 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT)); 2700 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, 2701 ValInVec, Idx, Mask, InsertVL); 2702 if (!VecVT.isFixedLengthVector()) 2703 return Slideup; 2704 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 2705 } 2706 2707 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 2708 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer 2709 // types this is done using VMV_X_S to allow us to glean information about the 2710 // sign bits of the result. 2711 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 2712 SelectionDAG &DAG) const { 2713 SDLoc DL(Op); 2714 SDValue Idx = Op.getOperand(1); 2715 SDValue Vec = Op.getOperand(0); 2716 EVT EltVT = Op.getValueType(); 2717 MVT VecVT = Vec.getSimpleValueType(); 2718 MVT XLenVT = Subtarget.getXLenVT(); 2719 2720 if (VecVT.getVectorElementType() == MVT::i1) { 2721 // FIXME: For now we just promote to an i8 vector and extract from that, 2722 // but this is probably not optimal. 2723 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 2724 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); 2725 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx); 2726 } 2727 2728 // If this is a fixed vector, we need to convert it to a scalable vector. 2729 MVT ContainerVT = VecVT; 2730 if (VecVT.isFixedLengthVector()) { 2731 ContainerVT = getContainerForFixedLengthVector(VecVT); 2732 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2733 } 2734 2735 // If the index is 0, the vector is already in the right position. 2736 if (!isNullConstant(Idx)) { 2737 // Use a VL of 1 to avoid processing more elements than we need. 2738 SDValue VL = DAG.getConstant(1, DL, XLenVT); 2739 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2740 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2741 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 2742 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 2743 } 2744 2745 if (!EltVT.isInteger()) { 2746 // Floating-point extracts are handled in TableGen. 2747 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 2748 DAG.getConstant(0, DL, XLenVT)); 2749 } 2750 2751 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 2752 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 2753 } 2754 2755 // Called by type legalization to handle splat of i64 on RV32. 2756 // FIXME: We can optimize this when the type has sign or zero bits in one 2757 // of the halves. 2758 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar, 2759 SDValue VL, SelectionDAG &DAG) { 2760 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VT); 2761 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, 2762 DAG.getConstant(0, DL, MVT::i32)); 2763 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, 2764 DAG.getConstant(1, DL, MVT::i32)); 2765 2766 // vmv.v.x vX, hi 2767 // vsll.vx vX, vX, /*32*/ 2768 // vmv.v.x vY, lo 2769 // vsll.vx vY, vY, /*32*/ 2770 // vsrl.vx vY, vY, /*32*/ 2771 // vor.vv vX, vX, vY 2772 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); 2773 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2774 Lo = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL); 2775 Lo = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL); 2776 Lo = DAG.getNode(RISCVISD::SRL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL); 2777 2778 Hi = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Hi, VL); 2779 Hi = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Hi, ThirtyTwoV, Mask, VL); 2780 2781 return DAG.getNode(RISCVISD::OR_VL, DL, VT, Lo, Hi, Mask, VL); 2782 } 2783 2784 // Some RVV intrinsics may claim that they want an integer operand to be 2785 // promoted or expanded. 2786 static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG, 2787 const RISCVSubtarget &Subtarget) { 2788 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 2789 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && 2790 "Unexpected opcode"); 2791 2792 if (!Subtarget.hasStdExtV()) 2793 return SDValue(); 2794 2795 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; 2796 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); 2797 SDLoc DL(Op); 2798 2799 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 2800 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); 2801 if (!II || !II->SplatOperand) 2802 return SDValue(); 2803 2804 unsigned SplatOp = II->SplatOperand + HasChain; 2805 assert(SplatOp < Op.getNumOperands()); 2806 2807 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 2808 SDValue &ScalarOp = Operands[SplatOp]; 2809 MVT OpVT = ScalarOp.getSimpleValueType(); 2810 MVT XLenVT = Subtarget.getXLenVT(); 2811 2812 // If this isn't a scalar, or its type is XLenVT we're done. 2813 if (!OpVT.isScalarInteger() || OpVT == XLenVT) 2814 return SDValue(); 2815 2816 // Simplest case is that the operand needs to be promoted to XLenVT. 2817 if (OpVT.bitsLT(XLenVT)) { 2818 // If the operand is a constant, sign extend to increase our chances 2819 // of being able to use a .vi instruction. ANY_EXTEND would become a 2820 // a zero extend and the simm5 check in isel would fail. 2821 // FIXME: Should we ignore the upper bits in isel instead? 2822 unsigned ExtOpc = 2823 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 2824 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp); 2825 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 2826 } 2827 2828 // Use the previous operand to get the vXi64 VT. The result might be a mask 2829 // VT for compares. Using the previous operand assumes that the previous 2830 // operand will never have a smaller element size than a scalar operand and 2831 // that a widening operation never uses SEW=64. 2832 // NOTE: If this fails the below assert, we can probably just find the 2833 // element count from any operand or result and use it to construct the VT. 2834 assert(II->SplatOperand > 1 && "Unexpected splat operand!"); 2835 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType(); 2836 2837 // The more complex case is when the scalar is larger than XLenVT. 2838 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 && 2839 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!"); 2840 2841 // If this is a sign-extended 32-bit constant, we can truncate it and rely 2842 // on the instruction to sign-extend since SEW>XLEN. 2843 if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) { 2844 if (isInt<32>(CVal->getSExtValue())) { 2845 ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); 2846 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 2847 } 2848 } 2849 2850 // We need to convert the scalar to a splat vector. 2851 // FIXME: Can we implicitly truncate the scalar if it is known to 2852 // be sign extended? 2853 // VL should be the last operand. 2854 SDValue VL = Op.getOperand(Op.getNumOperands() - 1); 2855 assert(VL.getValueType() == XLenVT); 2856 ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG); 2857 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 2858 } 2859 2860 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 2861 SelectionDAG &DAG) const { 2862 unsigned IntNo = Op.getConstantOperandVal(0); 2863 SDLoc DL(Op); 2864 MVT XLenVT = Subtarget.getXLenVT(); 2865 2866 switch (IntNo) { 2867 default: 2868 break; // Don't custom lower most intrinsics. 2869 case Intrinsic::thread_pointer: { 2870 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2871 return DAG.getRegister(RISCV::X4, PtrVT); 2872 } 2873 case Intrinsic::riscv_orc_b: 2874 // Lower to the GORCI encoding for orc.b. 2875 return DAG.getNode(RISCVISD::GORCI, DL, XLenVT, Op.getOperand(1), 2876 DAG.getTargetConstant(7, DL, XLenVT)); 2877 case Intrinsic::riscv_vmv_x_s: 2878 assert(Op.getValueType() == XLenVT && "Unexpected VT!"); 2879 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 2880 Op.getOperand(1)); 2881 case Intrinsic::riscv_vmv_v_x: { 2882 SDValue Scalar = Op.getOperand(1); 2883 if (Scalar.getValueType().bitsLE(XLenVT)) { 2884 unsigned ExtOpc = 2885 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 2886 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); 2887 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), Scalar, 2888 Op.getOperand(2)); 2889 } 2890 2891 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); 2892 2893 // If this is a sign-extended 32-bit constant, we can truncate it and rely 2894 // on the instruction to sign-extend since SEW>XLEN. 2895 if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar)) { 2896 if (isInt<32>(CVal->getSExtValue())) 2897 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), 2898 DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32), 2899 Op.getOperand(2)); 2900 } 2901 2902 // Otherwise use the more complicated splatting algorithm. 2903 return splatSplitI64WithVL(DL, Op.getSimpleValueType(), Scalar, 2904 Op.getOperand(2), DAG); 2905 } 2906 case Intrinsic::riscv_vfmv_v_f: 2907 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), 2908 Op.getOperand(1), Op.getOperand(2)); 2909 case Intrinsic::riscv_vmv_s_x: { 2910 SDValue Scalar = Op.getOperand(2); 2911 2912 if (Scalar.getValueType().bitsLE(XLenVT)) { 2913 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar); 2914 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(), 2915 Op.getOperand(1), Scalar, Op.getOperand(3)); 2916 } 2917 2918 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); 2919 2920 // This is an i64 value that lives in two scalar registers. We have to 2921 // insert this in a convoluted way. First we build vXi64 splat containing 2922 // the/ two values that we assemble using some bit math. Next we'll use 2923 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask 2924 // to merge element 0 from our splat into the source vector. 2925 // FIXME: This is probably not the best way to do this, but it is 2926 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting 2927 // point. 2928 // vmv.v.x vX, hi 2929 // vsll.vx vX, vX, /*32*/ 2930 // vmv.v.x vY, lo 2931 // vsll.vx vY, vY, /*32*/ 2932 // vsrl.vx vY, vY, /*32*/ 2933 // vor.vv vX, vX, vY 2934 // 2935 // vid.v vVid 2936 // vmseq.vx mMask, vVid, 0 2937 // vmerge.vvm vDest, vSrc, vVal, mMask 2938 MVT VT = Op.getSimpleValueType(); 2939 SDValue Vec = Op.getOperand(1); 2940 SDValue VL = Op.getOperand(3); 2941 2942 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); 2943 SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, 2944 DAG.getConstant(0, DL, MVT::i32), VL); 2945 2946 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); 2947 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2948 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 2949 SDValue SelectCond = 2950 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx, 2951 DAG.getCondCode(ISD::SETEQ), Mask, VL); 2952 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal, 2953 Vec, VL); 2954 } 2955 } 2956 2957 return lowerVectorIntrinsicSplats(Op, DAG, Subtarget); 2958 } 2959 2960 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 2961 SelectionDAG &DAG) const { 2962 return lowerVectorIntrinsicSplats(Op, DAG, Subtarget); 2963 } 2964 2965 static MVT getLMUL1VT(MVT VT) { 2966 assert(VT.getVectorElementType().getSizeInBits() <= 64 && 2967 "Unexpected vector MVT"); 2968 return MVT::getScalableVectorVT( 2969 VT.getVectorElementType(), 2970 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); 2971 } 2972 2973 static unsigned getRVVReductionOp(unsigned ISDOpcode) { 2974 switch (ISDOpcode) { 2975 default: 2976 llvm_unreachable("Unhandled reduction"); 2977 case ISD::VECREDUCE_ADD: 2978 return RISCVISD::VECREDUCE_ADD_VL; 2979 case ISD::VECREDUCE_UMAX: 2980 return RISCVISD::VECREDUCE_UMAX_VL; 2981 case ISD::VECREDUCE_SMAX: 2982 return RISCVISD::VECREDUCE_SMAX_VL; 2983 case ISD::VECREDUCE_UMIN: 2984 return RISCVISD::VECREDUCE_UMIN_VL; 2985 case ISD::VECREDUCE_SMIN: 2986 return RISCVISD::VECREDUCE_SMIN_VL; 2987 case ISD::VECREDUCE_AND: 2988 return RISCVISD::VECREDUCE_AND_VL; 2989 case ISD::VECREDUCE_OR: 2990 return RISCVISD::VECREDUCE_OR_VL; 2991 case ISD::VECREDUCE_XOR: 2992 return RISCVISD::VECREDUCE_XOR_VL; 2993 } 2994 } 2995 2996 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 2997 SelectionDAG &DAG) const { 2998 SDLoc DL(Op); 2999 SDValue Vec = Op.getOperand(0); 3000 EVT VecEVT = Vec.getValueType(); 3001 3002 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode()); 3003 3004 // Due to ordering in legalize types we may have a vector type that needs to 3005 // be split. Do that manually so we can get down to a legal type. 3006 while (getTypeAction(*DAG.getContext(), VecEVT) == 3007 TargetLowering::TypeSplitVector) { 3008 SDValue Lo, Hi; 3009 std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL); 3010 VecEVT = Lo.getValueType(); 3011 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi); 3012 } 3013 3014 // TODO: The type may need to be widened rather than split. Or widened before 3015 // it can be split. 3016 if (!isTypeLegal(VecEVT)) 3017 return SDValue(); 3018 3019 MVT VecVT = VecEVT.getSimpleVT(); 3020 MVT VecEltVT = VecVT.getVectorElementType(); 3021 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); 3022 3023 MVT ContainerVT = VecVT; 3024 if (VecVT.isFixedLengthVector()) { 3025 ContainerVT = getContainerForFixedLengthVector(VecVT); 3026 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3027 } 3028 3029 MVT M1VT = getLMUL1VT(ContainerVT); 3030 3031 SDValue Mask, VL; 3032 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 3033 3034 // FIXME: This is a VLMAX splat which might be too large and can prevent 3035 // vsetvli removal. 3036 SDValue NeutralElem = 3037 DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); 3038 SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem); 3039 SDValue Reduction = 3040 DAG.getNode(RVVOpcode, DL, M1VT, Vec, IdentitySplat, Mask, VL); 3041 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 3042 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 3043 return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); 3044 } 3045 3046 // Given a reduction op, this function returns the matching reduction opcode, 3047 // the vector SDValue and the scalar SDValue required to lower this to a 3048 // RISCVISD node. 3049 static std::tuple<unsigned, SDValue, SDValue> 3050 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { 3051 SDLoc DL(Op); 3052 switch (Op.getOpcode()) { 3053 default: 3054 llvm_unreachable("Unhandled reduction"); 3055 case ISD::VECREDUCE_FADD: 3056 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), 3057 DAG.getConstantFP(0.0, DL, EltVT)); 3058 case ISD::VECREDUCE_SEQ_FADD: 3059 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), 3060 Op.getOperand(0)); 3061 } 3062 } 3063 3064 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, 3065 SelectionDAG &DAG) const { 3066 SDLoc DL(Op); 3067 MVT VecEltVT = Op.getSimpleValueType(); 3068 3069 unsigned RVVOpcode; 3070 SDValue VectorVal, ScalarVal; 3071 std::tie(RVVOpcode, VectorVal, ScalarVal) = 3072 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT); 3073 MVT VecVT = VectorVal.getSimpleValueType(); 3074 3075 MVT ContainerVT = VecVT; 3076 if (VecVT.isFixedLengthVector()) { 3077 ContainerVT = getContainerForFixedLengthVector(VecVT); 3078 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget); 3079 } 3080 3081 MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType()); 3082 3083 SDValue Mask, VL; 3084 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 3085 3086 // FIXME: This is a VLMAX splat which might be too large and can prevent 3087 // vsetvli removal. 3088 SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal); 3089 SDValue Reduction = 3090 DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat, Mask, VL); 3091 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 3092 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 3093 } 3094 3095 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, 3096 SelectionDAG &DAG) const { 3097 SDValue Vec = Op.getOperand(0); 3098 SDValue SubVec = Op.getOperand(1); 3099 MVT VecVT = Vec.getSimpleValueType(); 3100 MVT SubVecVT = SubVec.getSimpleValueType(); 3101 3102 SDLoc DL(Op); 3103 MVT XLenVT = Subtarget.getXLenVT(); 3104 unsigned OrigIdx = Op.getConstantOperandVal(2); 3105 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 3106 3107 // We don't have the ability to slide mask vectors up indexed by their i1 3108 // elements; the smallest we can do is i8. Often we are able to bitcast to 3109 // equivalent i8 vectors. Note that when inserting a fixed-length vector 3110 // into a scalable one, we might not necessarily have enough scalable 3111 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. 3112 if (SubVecVT.getVectorElementType() == MVT::i1 && 3113 (OrigIdx != 0 || !Vec.isUndef())) { 3114 if (VecVT.getVectorMinNumElements() >= 8 && 3115 SubVecVT.getVectorMinNumElements() >= 8) { 3116 assert(OrigIdx % 8 == 0 && "Invalid index"); 3117 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 3118 SubVecVT.getVectorMinNumElements() % 8 == 0 && 3119 "Unexpected mask vector lowering"); 3120 OrigIdx /= 8; 3121 SubVecVT = 3122 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 3123 SubVecVT.isScalableVector()); 3124 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 3125 VecVT.isScalableVector()); 3126 Vec = DAG.getBitcast(VecVT, Vec); 3127 SubVec = DAG.getBitcast(SubVecVT, SubVec); 3128 } else { 3129 // We can't slide this mask vector up indexed by its i1 elements. 3130 // This poses a problem when we wish to insert a scalable vector which 3131 // can't be re-expressed as a larger type. Just choose the slow path and 3132 // extend to a larger type, then truncate back down. 3133 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 3134 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 3135 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 3136 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec); 3137 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec, 3138 Op.getOperand(2)); 3139 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT); 3140 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE); 3141 } 3142 } 3143 3144 // If the subvector vector is a fixed-length type, we cannot use subregister 3145 // manipulation to simplify the codegen; we don't know which register of a 3146 // LMUL group contains the specific subvector as we only know the minimum 3147 // register size. Therefore we must slide the vector group up the full 3148 // amount. 3149 if (SubVecVT.isFixedLengthVector()) { 3150 if (OrigIdx == 0 && Vec.isUndef()) 3151 return Op; 3152 MVT ContainerVT = VecVT; 3153 if (VecVT.isFixedLengthVector()) { 3154 ContainerVT = getContainerForFixedLengthVector(VecVT); 3155 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3156 } 3157 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, 3158 DAG.getUNDEF(ContainerVT), SubVec, 3159 DAG.getConstant(0, DL, XLenVT)); 3160 SDValue Mask = 3161 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 3162 // Set the vector length to only the number of elements we care about. Note 3163 // that for slideup this includes the offset. 3164 SDValue VL = 3165 DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT); 3166 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 3167 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, 3168 SubVec, SlideupAmt, Mask, VL); 3169 if (VecVT.isFixedLengthVector()) 3170 Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 3171 return DAG.getBitcast(Op.getValueType(), Slideup); 3172 } 3173 3174 unsigned SubRegIdx, RemIdx; 3175 std::tie(SubRegIdx, RemIdx) = 3176 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 3177 VecVT, SubVecVT, OrigIdx, TRI); 3178 3179 RISCVVLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT); 3180 bool IsSubVecPartReg = SubVecLMUL == RISCVVLMUL::LMUL_F2 || 3181 SubVecLMUL == RISCVVLMUL::LMUL_F4 || 3182 SubVecLMUL == RISCVVLMUL::LMUL_F8; 3183 3184 // 1. If the Idx has been completely eliminated and this subvector's size is 3185 // a vector register or a multiple thereof, or the surrounding elements are 3186 // undef, then this is a subvector insert which naturally aligns to a vector 3187 // register. These can easily be handled using subregister manipulation. 3188 // 2. If the subvector is smaller than a vector register, then the insertion 3189 // must preserve the undisturbed elements of the register. We do this by 3190 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type 3191 // (which resolves to a subregister copy), performing a VSLIDEUP to place the 3192 // subvector within the vector register, and an INSERT_SUBVECTOR of that 3193 // LMUL=1 type back into the larger vector (resolving to another subregister 3194 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type 3195 // to avoid allocating a large register group to hold our subvector. 3196 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef())) 3197 return Op; 3198 3199 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements 3200 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy 3201 // (in our case undisturbed). This means we can set up a subvector insertion 3202 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the 3203 // size of the subvector. 3204 MVT InterSubVT = VecVT; 3205 SDValue AlignedExtract = Vec; 3206 unsigned AlignedIdx = OrigIdx - RemIdx; 3207 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 3208 InterSubVT = getLMUL1VT(VecVT); 3209 // Extract a subvector equal to the nearest full vector register type. This 3210 // should resolve to a EXTRACT_SUBREG instruction. 3211 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 3212 DAG.getConstant(AlignedIdx, DL, XLenVT)); 3213 } 3214 3215 SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT); 3216 // For scalable vectors this must be further multiplied by vscale. 3217 SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt); 3218 3219 SDValue Mask, VL; 3220 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 3221 3222 // Construct the vector length corresponding to RemIdx + length(SubVecVT). 3223 VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT); 3224 VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL); 3225 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL); 3226 3227 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT, 3228 DAG.getUNDEF(InterSubVT), SubVec, 3229 DAG.getConstant(0, DL, XLenVT)); 3230 3231 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT, 3232 AlignedExtract, SubVec, SlideupAmt, Mask, VL); 3233 3234 // If required, insert this subvector back into the correct vector register. 3235 // This should resolve to an INSERT_SUBREG instruction. 3236 if (VecVT.bitsGT(InterSubVT)) 3237 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup, 3238 DAG.getConstant(AlignedIdx, DL, XLenVT)); 3239 3240 // We might have bitcast from a mask type: cast back to the original type if 3241 // required. 3242 return DAG.getBitcast(Op.getSimpleValueType(), Slideup); 3243 } 3244 3245 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, 3246 SelectionDAG &DAG) const { 3247 SDValue Vec = Op.getOperand(0); 3248 MVT SubVecVT = Op.getSimpleValueType(); 3249 MVT VecVT = Vec.getSimpleValueType(); 3250 3251 SDLoc DL(Op); 3252 MVT XLenVT = Subtarget.getXLenVT(); 3253 unsigned OrigIdx = Op.getConstantOperandVal(1); 3254 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 3255 3256 // We don't have the ability to slide mask vectors down indexed by their i1 3257 // elements; the smallest we can do is i8. Often we are able to bitcast to 3258 // equivalent i8 vectors. Note that when extracting a fixed-length vector 3259 // from a scalable one, we might not necessarily have enough scalable 3260 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. 3261 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) { 3262 if (VecVT.getVectorMinNumElements() >= 8 && 3263 SubVecVT.getVectorMinNumElements() >= 8) { 3264 assert(OrigIdx % 8 == 0 && "Invalid index"); 3265 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 3266 SubVecVT.getVectorMinNumElements() % 8 == 0 && 3267 "Unexpected mask vector lowering"); 3268 OrigIdx /= 8; 3269 SubVecVT = 3270 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 3271 SubVecVT.isScalableVector()); 3272 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 3273 VecVT.isScalableVector()); 3274 Vec = DAG.getBitcast(VecVT, Vec); 3275 } else { 3276 // We can't slide this mask vector down, indexed by its i1 elements. 3277 // This poses a problem when we wish to extract a scalable vector which 3278 // can't be re-expressed as a larger type. Just choose the slow path and 3279 // extend to a larger type, then truncate back down. 3280 // TODO: We could probably improve this when extracting certain fixed 3281 // from fixed, where we can extract as i8 and shift the correct element 3282 // right to reach the desired subvector? 3283 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 3284 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 3285 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 3286 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec, 3287 Op.getOperand(1)); 3288 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT); 3289 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE); 3290 } 3291 } 3292 3293 // If the subvector vector is a fixed-length type, we cannot use subregister 3294 // manipulation to simplify the codegen; we don't know which register of a 3295 // LMUL group contains the specific subvector as we only know the minimum 3296 // register size. Therefore we must slide the vector group down the full 3297 // amount. 3298 if (SubVecVT.isFixedLengthVector()) { 3299 // With an index of 0 this is a cast-like subvector, which can be performed 3300 // with subregister operations. 3301 if (OrigIdx == 0) 3302 return Op; 3303 MVT ContainerVT = VecVT; 3304 if (VecVT.isFixedLengthVector()) { 3305 ContainerVT = getContainerForFixedLengthVector(VecVT); 3306 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3307 } 3308 SDValue Mask = 3309 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 3310 // Set the vector length to only the number of elements we care about. This 3311 // avoids sliding down elements we're going to discard straight away. 3312 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT); 3313 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 3314 SDValue Slidedown = 3315 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 3316 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL); 3317 // Now we can use a cast-like subvector extract to get the result. 3318 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 3319 DAG.getConstant(0, DL, XLenVT)); 3320 return DAG.getBitcast(Op.getValueType(), Slidedown); 3321 } 3322 3323 unsigned SubRegIdx, RemIdx; 3324 std::tie(SubRegIdx, RemIdx) = 3325 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 3326 VecVT, SubVecVT, OrigIdx, TRI); 3327 3328 // If the Idx has been completely eliminated then this is a subvector extract 3329 // which naturally aligns to a vector register. These can easily be handled 3330 // using subregister manipulation. 3331 if (RemIdx == 0) 3332 return Op; 3333 3334 // Else we must shift our vector register directly to extract the subvector. 3335 // Do this using VSLIDEDOWN. 3336 3337 // If the vector type is an LMUL-group type, extract a subvector equal to the 3338 // nearest full vector register type. This should resolve to a EXTRACT_SUBREG 3339 // instruction. 3340 MVT InterSubVT = VecVT; 3341 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 3342 InterSubVT = getLMUL1VT(VecVT); 3343 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 3344 DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT)); 3345 } 3346 3347 // Slide this vector register down by the desired number of elements in order 3348 // to place the desired subvector starting at element 0. 3349 SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT); 3350 // For scalable vectors this must be further multiplied by vscale. 3351 SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt); 3352 3353 SDValue Mask, VL; 3354 std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); 3355 SDValue Slidedown = 3356 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT, 3357 DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL); 3358 3359 // Now the vector is in the right position, extract our final subvector. This 3360 // should resolve to a COPY. 3361 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 3362 DAG.getConstant(0, DL, XLenVT)); 3363 3364 // We might have bitcast from a mask type: cast back to the original type if 3365 // required. 3366 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown); 3367 } 3368 3369 // Implement step_vector to the vid instruction. 3370 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op, 3371 SelectionDAG &DAG) const { 3372 SDLoc DL(Op); 3373 assert(Op.getConstantOperandAPInt(0) == 1 && "Unexpected step value"); 3374 MVT VT = Op.getSimpleValueType(); 3375 SDValue Mask, VL; 3376 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); 3377 return DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 3378 } 3379 3380 // Implement vector_reverse using vrgather.vv with indices determined by 3381 // subtracting the id of each element from (VLMAX-1). This will convert 3382 // the indices like so: 3383 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). 3384 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. 3385 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, 3386 SelectionDAG &DAG) const { 3387 SDLoc DL(Op); 3388 MVT VecVT = Op.getSimpleValueType(); 3389 unsigned EltSize = VecVT.getScalarSizeInBits(); 3390 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); 3391 3392 unsigned MaxVLMAX = 0; 3393 unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits(); 3394 if (VectorBitsMax != 0) 3395 MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; 3396 3397 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; 3398 MVT IntVT = VecVT.changeVectorElementTypeToInteger(); 3399 3400 // If this is SEW=8 and VLMAX is unknown or more than 256, we need 3401 // to use vrgatherei16.vv. 3402 // TODO: It's also possible to use vrgatherei16.vv for other types to 3403 // decrease register width for the index calculation. 3404 if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) { 3405 // If this is LMUL=8, we have to split before can use vrgatherei16.vv. 3406 // Reverse each half, then reassemble them in reverse order. 3407 // NOTE: It's also possible that after splitting that VLMAX no longer 3408 // requires vrgatherei16.vv. 3409 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { 3410 SDValue Lo, Hi; 3411 std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0); 3412 EVT LoVT, HiVT; 3413 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); 3414 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); 3415 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); 3416 // Reassemble the low and high pieces reversed. 3417 // FIXME: This is a CONCAT_VECTORS. 3418 SDValue Res = 3419 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi, 3420 DAG.getIntPtrConstant(0, DL)); 3421 return DAG.getNode( 3422 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo, 3423 DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL)); 3424 } 3425 3426 // Just promote the int type to i16 which will double the LMUL. 3427 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount()); 3428 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; 3429 } 3430 3431 MVT XLenVT = Subtarget.getXLenVT(); 3432 SDValue Mask, VL; 3433 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 3434 3435 // Calculate VLMAX-1 for the desired SEW. 3436 unsigned MinElts = VecVT.getVectorMinNumElements(); 3437 SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, 3438 DAG.getConstant(MinElts, DL, XLenVT)); 3439 SDValue VLMinus1 = 3440 DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT)); 3441 3442 // Splat VLMAX-1 taking care to handle SEW==64 on RV32. 3443 bool IsRV32E64 = 3444 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64; 3445 SDValue SplatVL; 3446 if (!IsRV32E64) 3447 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1); 3448 else 3449 SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1); 3450 3451 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL); 3452 SDValue Indices = 3453 DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL); 3454 3455 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL); 3456 } 3457 3458 SDValue 3459 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, 3460 SelectionDAG &DAG) const { 3461 auto *Load = cast<LoadSDNode>(Op); 3462 3463 SDLoc DL(Op); 3464 MVT VT = Op.getSimpleValueType(); 3465 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3466 3467 SDValue VL = 3468 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 3469 3470 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 3471 SDValue NewLoad = DAG.getMemIntrinsicNode( 3472 RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL}, 3473 Load->getMemoryVT(), Load->getMemOperand()); 3474 3475 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 3476 return DAG.getMergeValues({Result, Load->getChain()}, DL); 3477 } 3478 3479 SDValue 3480 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, 3481 SelectionDAG &DAG) const { 3482 auto *Store = cast<StoreSDNode>(Op); 3483 3484 SDLoc DL(Op); 3485 MVT VT = Store->getValue().getSimpleValueType(); 3486 3487 // FIXME: We probably need to zero any extra bits in a byte for mask stores. 3488 // This is tricky to do. 3489 3490 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3491 3492 SDValue VL = 3493 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 3494 3495 SDValue NewValue = 3496 convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget); 3497 return DAG.getMemIntrinsicNode( 3498 RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other), 3499 {Store->getChain(), NewValue, Store->getBasePtr(), VL}, 3500 Store->getMemoryVT(), Store->getMemOperand()); 3501 } 3502 3503 SDValue RISCVTargetLowering::lowerMLOAD(SDValue Op, SelectionDAG &DAG) const { 3504 auto *Load = cast<MaskedLoadSDNode>(Op); 3505 3506 SDLoc DL(Op); 3507 MVT VT = Op.getSimpleValueType(); 3508 MVT XLenVT = Subtarget.getXLenVT(); 3509 3510 SDValue Mask = Load->getMask(); 3511 SDValue PassThru = Load->getPassThru(); 3512 SDValue VL; 3513 3514 MVT ContainerVT = VT; 3515 if (VT.isFixedLengthVector()) { 3516 ContainerVT = getContainerForFixedLengthVector(VT); 3517 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3518 3519 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 3520 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 3521 VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 3522 } else 3523 VL = DAG.getRegister(RISCV::X0, XLenVT); 3524 3525 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 3526 SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vle_mask, DL, XLenVT); 3527 SDValue Ops[] = {Load->getChain(), IntID, PassThru, 3528 Load->getBasePtr(), Mask, VL}; 3529 SDValue Result = 3530 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 3531 Load->getMemoryVT(), Load->getMemOperand()); 3532 SDValue Chain = Result.getValue(1); 3533 3534 if (VT.isFixedLengthVector()) 3535 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 3536 3537 return DAG.getMergeValues({Result, Chain}, DL); 3538 } 3539 3540 SDValue RISCVTargetLowering::lowerMSTORE(SDValue Op, SelectionDAG &DAG) const { 3541 auto *Store = cast<MaskedStoreSDNode>(Op); 3542 3543 SDLoc DL(Op); 3544 SDValue Val = Store->getValue(); 3545 SDValue Mask = Store->getMask(); 3546 MVT VT = Val.getSimpleValueType(); 3547 MVT XLenVT = Subtarget.getXLenVT(); 3548 SDValue VL; 3549 3550 MVT ContainerVT = VT; 3551 if (VT.isFixedLengthVector()) { 3552 ContainerVT = getContainerForFixedLengthVector(VT); 3553 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3554 3555 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 3556 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 3557 VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 3558 } else 3559 VL = DAG.getRegister(RISCV::X0, XLenVT); 3560 3561 SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vse_mask, DL, XLenVT); 3562 return DAG.getMemIntrinsicNode( 3563 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), 3564 {Store->getChain(), IntID, Val, Store->getBasePtr(), Mask, VL}, 3565 Store->getMemoryVT(), Store->getMemOperand()); 3566 } 3567 3568 SDValue 3569 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, 3570 SelectionDAG &DAG) const { 3571 MVT InVT = Op.getOperand(0).getSimpleValueType(); 3572 MVT ContainerVT = getContainerForFixedLengthVector(InVT); 3573 3574 MVT VT = Op.getSimpleValueType(); 3575 3576 SDValue Op1 = 3577 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 3578 SDValue Op2 = 3579 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 3580 3581 SDLoc DL(Op); 3582 SDValue VL = 3583 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 3584 3585 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3586 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 3587 3588 SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2, 3589 Op.getOperand(2), Mask, VL); 3590 3591 return convertFromScalableVector(VT, Cmp, DAG, Subtarget); 3592 } 3593 3594 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV( 3595 SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const { 3596 MVT VT = Op.getSimpleValueType(); 3597 3598 if (VT.getVectorElementType() == MVT::i1) 3599 return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false); 3600 3601 return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true); 3602 } 3603 3604 // Lower vector ABS to smax(X, sub(0, X)). 3605 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { 3606 SDLoc DL(Op); 3607 MVT VT = Op.getSimpleValueType(); 3608 SDValue X = Op.getOperand(0); 3609 3610 assert(VT.isFixedLengthVector() && "Unexpected type"); 3611 3612 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3613 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); 3614 3615 SDValue Mask, VL; 3616 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3617 3618 SDValue SplatZero = 3619 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 3620 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 3621 SDValue NegX = 3622 DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL); 3623 SDValue Max = 3624 DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL); 3625 3626 return convertFromScalableVector(VT, Max, DAG, Subtarget); 3627 } 3628 3629 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV( 3630 SDValue Op, SelectionDAG &DAG) const { 3631 SDLoc DL(Op); 3632 MVT VT = Op.getSimpleValueType(); 3633 SDValue Mag = Op.getOperand(0); 3634 SDValue Sign = Op.getOperand(1); 3635 assert(Mag.getValueType() == Sign.getValueType() && 3636 "Can only handle COPYSIGN with matching types."); 3637 3638 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3639 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget); 3640 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget); 3641 3642 SDValue Mask, VL; 3643 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3644 3645 SDValue CopySign = 3646 DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL); 3647 3648 return convertFromScalableVector(VT, CopySign, DAG, Subtarget); 3649 } 3650 3651 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( 3652 SDValue Op, SelectionDAG &DAG) const { 3653 MVT VT = Op.getSimpleValueType(); 3654 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3655 3656 MVT I1ContainerVT = 3657 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3658 3659 SDValue CC = 3660 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget); 3661 SDValue Op1 = 3662 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 3663 SDValue Op2 = 3664 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget); 3665 3666 SDLoc DL(Op); 3667 SDValue Mask, VL; 3668 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3669 3670 SDValue Select = 3671 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL); 3672 3673 return convertFromScalableVector(VT, Select, DAG, Subtarget); 3674 } 3675 3676 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, 3677 unsigned NewOpc, 3678 bool HasMask) const { 3679 MVT VT = Op.getSimpleValueType(); 3680 assert(useRVVForFixedLengthVectorVT(VT) && 3681 "Only expected to lower fixed length vector operation!"); 3682 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3683 3684 // Create list of operands by converting existing ones to scalable types. 3685 SmallVector<SDValue, 6> Ops; 3686 for (const SDValue &V : Op->op_values()) { 3687 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 3688 3689 // Pass through non-vector operands. 3690 if (!V.getValueType().isVector()) { 3691 Ops.push_back(V); 3692 continue; 3693 } 3694 3695 // "cast" fixed length vector to a scalable vector. 3696 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && 3697 "Only fixed length vectors are supported!"); 3698 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 3699 } 3700 3701 SDLoc DL(Op); 3702 SDValue Mask, VL; 3703 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3704 if (HasMask) 3705 Ops.push_back(Mask); 3706 Ops.push_back(VL); 3707 3708 SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops); 3709 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 3710 } 3711 3712 // Custom lower MGATHER to a legalized form for RVV. It will then be matched to 3713 // a RVV indexed load. The RVV indexed load instructions only support the 3714 // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or 3715 // truncated to XLEN and are treated as byte offsets. Any signed or scaled 3716 // indexing is extended to the XLEN value type and scaled accordingly. 3717 SDValue RISCVTargetLowering::lowerMGATHER(SDValue Op, SelectionDAG &DAG) const { 3718 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode()); 3719 SDLoc DL(Op); 3720 3721 SDValue Index = MGN->getIndex(); 3722 SDValue Mask = MGN->getMask(); 3723 SDValue PassThru = MGN->getPassThru(); 3724 3725 MVT VT = Op.getSimpleValueType(); 3726 MVT IndexVT = Index.getSimpleValueType(); 3727 MVT XLenVT = Subtarget.getXLenVT(); 3728 3729 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 3730 "Unexpected VTs!"); 3731 assert(MGN->getBasePtr().getSimpleValueType() == XLenVT && 3732 "Unexpected pointer type"); 3733 // Targets have to explicitly opt-in for extending vector loads. 3734 assert(MGN->getExtensionType() == ISD::NON_EXTLOAD && 3735 "Unexpected extending MGATHER"); 3736 3737 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 3738 // the selection of the masked intrinsics doesn't do this for us. 3739 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 3740 3741 SDValue VL; 3742 MVT ContainerVT = VT; 3743 if (VT.isFixedLengthVector()) { 3744 // We need to use the larger of the result and index type to determine the 3745 // scalable type to use so we don't increase LMUL for any operand/result. 3746 if (VT.bitsGE(IndexVT)) { 3747 ContainerVT = getContainerForFixedLengthVector(VT); 3748 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 3749 ContainerVT.getVectorElementCount()); 3750 } else { 3751 IndexVT = getContainerForFixedLengthVector(IndexVT); 3752 ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(), 3753 IndexVT.getVectorElementCount()); 3754 } 3755 3756 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 3757 3758 if (!IsUnmasked) { 3759 MVT MaskVT = 3760 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3761 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 3762 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 3763 } 3764 3765 VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 3766 } else 3767 VL = DAG.getRegister(RISCV::X0, XLenVT); 3768 3769 unsigned IntID = 3770 IsUnmasked ? Intrinsic::riscv_vloxei : Intrinsic::riscv_vloxei_mask; 3771 SmallVector<SDValue, 8> Ops{MGN->getChain(), 3772 DAG.getTargetConstant(IntID, DL, XLenVT)}; 3773 if (!IsUnmasked) 3774 Ops.push_back(PassThru); 3775 Ops.push_back(MGN->getBasePtr()); 3776 Ops.push_back(Index); 3777 if (!IsUnmasked) 3778 Ops.push_back(Mask); 3779 Ops.push_back(VL); 3780 3781 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 3782 SDValue Result = 3783 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 3784 MGN->getMemoryVT(), MGN->getMemOperand()); 3785 SDValue Chain = Result.getValue(1); 3786 3787 if (VT.isFixedLengthVector()) 3788 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 3789 3790 return DAG.getMergeValues({Result, Chain}, DL); 3791 } 3792 3793 // Custom lower MSCATTER to a legalized form for RVV. It will then be matched to 3794 // a RVV indexed store. The RVV indexed store instructions only support the 3795 // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or 3796 // truncated to XLEN and are treated as byte offsets. Any signed or scaled 3797 // indexing is extended to the XLEN value type and scaled accordingly. 3798 SDValue RISCVTargetLowering::lowerMSCATTER(SDValue Op, 3799 SelectionDAG &DAG) const { 3800 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode()); 3801 SDLoc DL(Op); 3802 SDValue Index = MSN->getIndex(); 3803 SDValue Mask = MSN->getMask(); 3804 SDValue Val = MSN->getValue(); 3805 3806 MVT VT = Val.getSimpleValueType(); 3807 MVT IndexVT = Index.getSimpleValueType(); 3808 MVT XLenVT = Subtarget.getXLenVT(); 3809 3810 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 3811 "Unexpected VTs!"); 3812 assert(MSN->getBasePtr().getSimpleValueType() == XLenVT && 3813 "Unexpected pointer type"); 3814 // Targets have to explicitly opt-in for extending vector loads and 3815 // truncating vector stores. 3816 assert(!MSN->isTruncatingStore() && "Unexpected extending MSCATTER"); 3817 3818 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 3819 // the selection of the masked intrinsics doesn't do this for us. 3820 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 3821 3822 SDValue VL; 3823 if (VT.isFixedLengthVector()) { 3824 // We need to use the larger of the value and index type to determine the 3825 // scalable type to use so we don't increase LMUL for any operand/result. 3826 if (VT.bitsGE(IndexVT)) { 3827 VT = getContainerForFixedLengthVector(VT); 3828 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 3829 VT.getVectorElementCount()); 3830 } else { 3831 IndexVT = getContainerForFixedLengthVector(IndexVT); 3832 VT = MVT::getVectorVT(VT.getVectorElementType(), 3833 IndexVT.getVectorElementCount()); 3834 } 3835 3836 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 3837 Val = convertToScalableVector(VT, Val, DAG, Subtarget); 3838 3839 if (!IsUnmasked) { 3840 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); 3841 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 3842 } 3843 3844 VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 3845 } else 3846 VL = DAG.getRegister(RISCV::X0, XLenVT); 3847 3848 unsigned IntID = 3849 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask; 3850 SmallVector<SDValue, 8> Ops{MSN->getChain(), 3851 DAG.getTargetConstant(IntID, DL, XLenVT)}; 3852 Ops.push_back(Val); 3853 Ops.push_back(MSN->getBasePtr()); 3854 Ops.push_back(Index); 3855 if (!IsUnmasked) 3856 Ops.push_back(Mask); 3857 Ops.push_back(VL); 3858 3859 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, MSN->getVTList(), Ops, 3860 MSN->getMemoryVT(), MSN->getMemOperand()); 3861 } 3862 3863 // Returns the opcode of the target-specific SDNode that implements the 32-bit 3864 // form of the given Opcode. 3865 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 3866 switch (Opcode) { 3867 default: 3868 llvm_unreachable("Unexpected opcode"); 3869 case ISD::SHL: 3870 return RISCVISD::SLLW; 3871 case ISD::SRA: 3872 return RISCVISD::SRAW; 3873 case ISD::SRL: 3874 return RISCVISD::SRLW; 3875 case ISD::SDIV: 3876 return RISCVISD::DIVW; 3877 case ISD::UDIV: 3878 return RISCVISD::DIVUW; 3879 case ISD::UREM: 3880 return RISCVISD::REMUW; 3881 case ISD::ROTL: 3882 return RISCVISD::ROLW; 3883 case ISD::ROTR: 3884 return RISCVISD::RORW; 3885 case RISCVISD::GREVI: 3886 return RISCVISD::GREVIW; 3887 case RISCVISD::GORCI: 3888 return RISCVISD::GORCIW; 3889 } 3890 } 3891 3892 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 3893 // Because i32 isn't a legal type for RV64, these operations would otherwise 3894 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 3895 // later one because the fact the operation was originally of type i32 is 3896 // lost. 3897 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 3898 unsigned ExtOpc = ISD::ANY_EXTEND) { 3899 SDLoc DL(N); 3900 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 3901 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 3902 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 3903 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 3904 // ReplaceNodeResults requires we maintain the same type for the return value. 3905 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 3906 } 3907 3908 // Converts the given 32-bit operation to a i64 operation with signed extension 3909 // semantic to reduce the signed extension instructions. 3910 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 3911 SDLoc DL(N); 3912 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3913 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 3914 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 3915 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 3916 DAG.getValueType(MVT::i32)); 3917 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 3918 } 3919 3920 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 3921 SmallVectorImpl<SDValue> &Results, 3922 SelectionDAG &DAG) const { 3923 SDLoc DL(N); 3924 switch (N->getOpcode()) { 3925 default: 3926 llvm_unreachable("Don't know how to custom type legalize this operation!"); 3927 case ISD::STRICT_FP_TO_SINT: 3928 case ISD::STRICT_FP_TO_UINT: 3929 case ISD::FP_TO_SINT: 3930 case ISD::FP_TO_UINT: { 3931 bool IsStrict = N->isStrictFPOpcode(); 3932 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3933 "Unexpected custom legalisation"); 3934 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 3935 // If the FP type needs to be softened, emit a library call using the 'si' 3936 // version. If we left it to default legalization we'd end up with 'di'. If 3937 // the FP type doesn't need to be softened just let generic type 3938 // legalization promote the result type. 3939 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 3940 TargetLowering::TypeSoftenFloat) 3941 return; 3942 RTLIB::Libcall LC; 3943 if (N->getOpcode() == ISD::FP_TO_SINT || 3944 N->getOpcode() == ISD::STRICT_FP_TO_SINT) 3945 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 3946 else 3947 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 3948 MakeLibCallOptions CallOptions; 3949 EVT OpVT = Op0.getValueType(); 3950 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 3951 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 3952 SDValue Result; 3953 std::tie(Result, Chain) = 3954 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 3955 Results.push_back(Result); 3956 if (IsStrict) 3957 Results.push_back(Chain); 3958 break; 3959 } 3960 case ISD::READCYCLECOUNTER: { 3961 assert(!Subtarget.is64Bit() && 3962 "READCYCLECOUNTER only has custom type legalization on riscv32"); 3963 3964 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 3965 SDValue RCW = 3966 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 3967 3968 Results.push_back( 3969 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 3970 Results.push_back(RCW.getValue(2)); 3971 break; 3972 } 3973 case ISD::MUL: { 3974 unsigned Size = N->getSimpleValueType(0).getSizeInBits(); 3975 unsigned XLen = Subtarget.getXLen(); 3976 // This multiply needs to be expanded, try to use MULHSU+MUL if possible. 3977 if (Size > XLen) { 3978 assert(Size == (XLen * 2) && "Unexpected custom legalisation"); 3979 SDValue LHS = N->getOperand(0); 3980 SDValue RHS = N->getOperand(1); 3981 APInt HighMask = APInt::getHighBitsSet(Size, XLen); 3982 3983 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask); 3984 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask); 3985 // We need exactly one side to be unsigned. 3986 if (LHSIsU == RHSIsU) 3987 return; 3988 3989 auto MakeMULPair = [&](SDValue S, SDValue U) { 3990 MVT XLenVT = Subtarget.getXLenVT(); 3991 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S); 3992 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U); 3993 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U); 3994 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U); 3995 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi); 3996 }; 3997 3998 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen; 3999 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen; 4000 4001 // The other operand should be signed, but still prefer MULH when 4002 // possible. 4003 if (RHSIsU && LHSIsS && !RHSIsS) 4004 Results.push_back(MakeMULPair(LHS, RHS)); 4005 else if (LHSIsU && RHSIsS && !LHSIsS) 4006 Results.push_back(MakeMULPair(RHS, LHS)); 4007 4008 return; 4009 } 4010 LLVM_FALLTHROUGH; 4011 } 4012 case ISD::ADD: 4013 case ISD::SUB: 4014 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 4015 "Unexpected custom legalisation"); 4016 if (N->getOperand(1).getOpcode() == ISD::Constant) 4017 return; 4018 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 4019 break; 4020 case ISD::SHL: 4021 case ISD::SRA: 4022 case ISD::SRL: 4023 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 4024 "Unexpected custom legalisation"); 4025 if (N->getOperand(1).getOpcode() == ISD::Constant) 4026 return; 4027 Results.push_back(customLegalizeToWOp(N, DAG)); 4028 break; 4029 case ISD::ROTL: 4030 case ISD::ROTR: 4031 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 4032 "Unexpected custom legalisation"); 4033 Results.push_back(customLegalizeToWOp(N, DAG)); 4034 break; 4035 case ISD::CTTZ: 4036 case ISD::CTTZ_ZERO_UNDEF: 4037 case ISD::CTLZ: 4038 case ISD::CTLZ_ZERO_UNDEF: { 4039 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 4040 "Unexpected custom legalisation"); 4041 4042 SDValue NewOp0 = 4043 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 4044 bool IsCTZ = 4045 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF; 4046 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW; 4047 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0); 4048 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 4049 return; 4050 } 4051 case ISD::SDIV: 4052 case ISD::UDIV: 4053 case ISD::UREM: { 4054 MVT VT = N->getSimpleValueType(0); 4055 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 4056 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 4057 "Unexpected custom legalisation"); 4058 if (N->getOperand(0).getOpcode() == ISD::Constant || 4059 N->getOperand(1).getOpcode() == ISD::Constant) 4060 return; 4061 4062 // If the input is i32, use ANY_EXTEND since the W instructions don't read 4063 // the upper 32 bits. For other types we need to sign or zero extend 4064 // based on the opcode. 4065 unsigned ExtOpc = ISD::ANY_EXTEND; 4066 if (VT != MVT::i32) 4067 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 4068 : ISD::ZERO_EXTEND; 4069 4070 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 4071 break; 4072 } 4073 case ISD::UADDO: 4074 case ISD::USUBO: { 4075 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 4076 "Unexpected custom legalisation"); 4077 bool IsAdd = N->getOpcode() == ISD::UADDO; 4078 // Create an ADDW or SUBW. 4079 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 4080 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 4081 SDValue Res = 4082 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS); 4083 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, 4084 DAG.getValueType(MVT::i32)); 4085 4086 // Sign extend the LHS and perform an unsigned compare with the ADDW result. 4087 // Since the inputs are sign extended from i32, this is equivalent to 4088 // comparing the lower 32 bits. 4089 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 4090 SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS, 4091 IsAdd ? ISD::SETULT : ISD::SETUGT); 4092 4093 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 4094 Results.push_back(Overflow); 4095 return; 4096 } 4097 case ISD::UADDSAT: 4098 case ISD::USUBSAT: { 4099 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 4100 "Unexpected custom legalisation"); 4101 if (Subtarget.hasStdExtZbb()) { 4102 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using 4103 // sign extend allows overflow of the lower 32 bits to be detected on 4104 // the promoted size. 4105 SDValue LHS = 4106 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 4107 SDValue RHS = 4108 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); 4109 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS); 4110 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 4111 return; 4112 } 4113 4114 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom 4115 // promotion for UADDO/USUBO. 4116 Results.push_back(expandAddSubSat(N, DAG)); 4117 return; 4118 } 4119 case ISD::BITCAST: { 4120 EVT VT = N->getValueType(0); 4121 SDValue Op0 = N->getOperand(0); 4122 EVT Op0VT = Op0.getValueType(); 4123 MVT XLenVT = Subtarget.getXLenVT(); 4124 if (VT == MVT::i16 && Op0VT == MVT::f16 && Subtarget.hasStdExtZfh()) { 4125 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0); 4126 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 4127 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() && 4128 Subtarget.hasStdExtF()) { 4129 SDValue FPConv = 4130 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 4131 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 4132 } else if (!VT.isVector() && Op0VT.isFixedLengthVector()) { 4133 // Custom-legalize bitcasts from fixed-length vector types to illegal 4134 // scalar types in order to improve codegen. Bitcast the vector to a 4135 // one-element vector type whose element type is the same as the result 4136 // type, and extract the first element. 4137 LLVMContext &Context = *DAG.getContext(); 4138 SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0); 4139 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, 4140 DAG.getConstant(0, DL, XLenVT))); 4141 } 4142 break; 4143 } 4144 case RISCVISD::GREVI: 4145 case RISCVISD::GORCI: { 4146 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 4147 "Unexpected custom legalisation"); 4148 // This is similar to customLegalizeToWOp, except that we pass the second 4149 // operand (a TargetConstant) straight through: it is already of type 4150 // XLenVT. 4151 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 4152 SDValue NewOp0 = 4153 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 4154 SDValue NewRes = 4155 DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1)); 4156 // ReplaceNodeResults requires we maintain the same type for the return 4157 // value. 4158 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 4159 break; 4160 } 4161 case RISCVISD::SHFLI: { 4162 // There is no SHFLIW instruction, but we can just promote the operation. 4163 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 4164 "Unexpected custom legalisation"); 4165 SDValue NewOp0 = 4166 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 4167 SDValue NewRes = 4168 DAG.getNode(RISCVISD::SHFLI, DL, MVT::i64, NewOp0, N->getOperand(1)); 4169 // ReplaceNodeResults requires we maintain the same type for the return 4170 // value. 4171 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 4172 break; 4173 } 4174 case ISD::BSWAP: 4175 case ISD::BITREVERSE: { 4176 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 4177 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 4178 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 4179 N->getOperand(0)); 4180 unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24; 4181 SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0, 4182 DAG.getTargetConstant(Imm, DL, 4183 Subtarget.getXLenVT())); 4184 // ReplaceNodeResults requires we maintain the same type for the return 4185 // value. 4186 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW)); 4187 break; 4188 } 4189 case ISD::FSHL: 4190 case ISD::FSHR: { 4191 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 4192 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); 4193 SDValue NewOp0 = 4194 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 4195 SDValue NewOp1 = 4196 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 4197 SDValue NewOp2 = 4198 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 4199 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. 4200 // Mask the shift amount to 5 bits. 4201 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, 4202 DAG.getConstant(0x1f, DL, MVT::i64)); 4203 unsigned Opc = 4204 N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; 4205 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); 4206 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); 4207 break; 4208 } 4209 case ISD::EXTRACT_VECTOR_ELT: { 4210 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 4211 // type is illegal (currently only vXi64 RV32). 4212 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 4213 // transferred to the destination register. We issue two of these from the 4214 // upper- and lower- halves of the SEW-bit vector element, slid down to the 4215 // first element. 4216 SDValue Vec = N->getOperand(0); 4217 SDValue Idx = N->getOperand(1); 4218 4219 // The vector type hasn't been legalized yet so we can't issue target 4220 // specific nodes if it needs legalization. 4221 // FIXME: We would manually legalize if it's important. 4222 if (!isTypeLegal(Vec.getValueType())) 4223 return; 4224 4225 MVT VecVT = Vec.getSimpleValueType(); 4226 4227 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 4228 VecVT.getVectorElementType() == MVT::i64 && 4229 "Unexpected EXTRACT_VECTOR_ELT legalization"); 4230 4231 // If this is a fixed vector, we need to convert it to a scalable vector. 4232 MVT ContainerVT = VecVT; 4233 if (VecVT.isFixedLengthVector()) { 4234 ContainerVT = getContainerForFixedLengthVector(VecVT); 4235 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 4236 } 4237 4238 MVT XLenVT = Subtarget.getXLenVT(); 4239 4240 // Use a VL of 1 to avoid processing more elements than we need. 4241 MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); 4242 SDValue VL = DAG.getConstant(1, DL, XLenVT); 4243 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 4244 4245 // Unless the index is known to be 0, we must slide the vector down to get 4246 // the desired element into index 0. 4247 if (!isNullConstant(Idx)) { 4248 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 4249 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 4250 } 4251 4252 // Extract the lower XLEN bits of the correct vector element. 4253 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 4254 4255 // To extract the upper XLEN bits of the vector element, shift the first 4256 // element right by 32 bits and re-extract the lower XLEN bits. 4257 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 4258 DAG.getConstant(32, DL, XLenVT), VL); 4259 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, 4260 ThirtyTwoV, Mask, VL); 4261 4262 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 4263 4264 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 4265 break; 4266 } 4267 case ISD::INTRINSIC_WO_CHAIN: { 4268 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 4269 switch (IntNo) { 4270 default: 4271 llvm_unreachable( 4272 "Don't know how to custom type legalize this intrinsic!"); 4273 case Intrinsic::riscv_orc_b: { 4274 // Lower to the GORCI encoding for orc.b with the operand extended. 4275 SDValue NewOp = 4276 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 4277 // If Zbp is enabled, use GORCIW which will sign extend the result. 4278 unsigned Opc = 4279 Subtarget.hasStdExtZbp() ? RISCVISD::GORCIW : RISCVISD::GORCI; 4280 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp, 4281 DAG.getTargetConstant(7, DL, MVT::i64)); 4282 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 4283 return; 4284 } 4285 case Intrinsic::riscv_vmv_x_s: { 4286 EVT VT = N->getValueType(0); 4287 MVT XLenVT = Subtarget.getXLenVT(); 4288 if (VT.bitsLT(XLenVT)) { 4289 // Simple case just extract using vmv.x.s and truncate. 4290 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 4291 Subtarget.getXLenVT(), N->getOperand(1)); 4292 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 4293 return; 4294 } 4295 4296 assert(VT == MVT::i64 && !Subtarget.is64Bit() && 4297 "Unexpected custom legalization"); 4298 4299 // We need to do the move in two steps. 4300 SDValue Vec = N->getOperand(1); 4301 MVT VecVT = Vec.getSimpleValueType(); 4302 4303 // First extract the lower XLEN bits of the element. 4304 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 4305 4306 // To extract the upper XLEN bits of the vector element, shift the first 4307 // element right by 32 bits and re-extract the lower XLEN bits. 4308 SDValue VL = DAG.getConstant(1, DL, XLenVT); 4309 MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); 4310 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 4311 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, 4312 DAG.getConstant(32, DL, XLenVT), VL); 4313 SDValue LShr32 = 4314 DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL); 4315 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 4316 4317 Results.push_back( 4318 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 4319 break; 4320 } 4321 } 4322 break; 4323 } 4324 case ISD::VECREDUCE_ADD: 4325 case ISD::VECREDUCE_AND: 4326 case ISD::VECREDUCE_OR: 4327 case ISD::VECREDUCE_XOR: 4328 case ISD::VECREDUCE_SMAX: 4329 case ISD::VECREDUCE_UMAX: 4330 case ISD::VECREDUCE_SMIN: 4331 case ISD::VECREDUCE_UMIN: 4332 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG)) 4333 Results.push_back(V); 4334 break; 4335 } 4336 } 4337 4338 // A structure to hold one of the bit-manipulation patterns below. Together, a 4339 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source: 4340 // (or (and (shl x, 1), 0xAAAAAAAA), 4341 // (and (srl x, 1), 0x55555555)) 4342 struct RISCVBitmanipPat { 4343 SDValue Op; 4344 unsigned ShAmt; 4345 bool IsSHL; 4346 4347 bool formsPairWith(const RISCVBitmanipPat &Other) const { 4348 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; 4349 } 4350 }; 4351 4352 // Matches patterns of the form 4353 // (and (shl x, C2), (C1 << C2)) 4354 // (and (srl x, C2), C1) 4355 // (shl (and x, C1), C2) 4356 // (srl (and x, (C1 << C2)), C2) 4357 // Where C2 is a power of 2 and C1 has at least that many leading zeroes. 4358 // The expected masks for each shift amount are specified in BitmanipMasks where 4359 // BitmanipMasks[log2(C2)] specifies the expected C1 value. 4360 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether 4361 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible 4362 // XLen is 64. 4363 static Optional<RISCVBitmanipPat> 4364 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) { 4365 assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) && 4366 "Unexpected number of masks"); 4367 Optional<uint64_t> Mask; 4368 // Optionally consume a mask around the shift operation. 4369 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { 4370 Mask = Op.getConstantOperandVal(1); 4371 Op = Op.getOperand(0); 4372 } 4373 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) 4374 return None; 4375 bool IsSHL = Op.getOpcode() == ISD::SHL; 4376 4377 if (!isa<ConstantSDNode>(Op.getOperand(1))) 4378 return None; 4379 uint64_t ShAmt = Op.getConstantOperandVal(1); 4380 4381 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 4382 if (ShAmt >= Width && !isPowerOf2_64(ShAmt)) 4383 return None; 4384 // If we don't have enough masks for 64 bit, then we must be trying to 4385 // match SHFL so we're only allowed to shift 1/4 of the width. 4386 if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2)) 4387 return None; 4388 4389 SDValue Src = Op.getOperand(0); 4390 4391 // The expected mask is shifted left when the AND is found around SHL 4392 // patterns. 4393 // ((x >> 1) & 0x55555555) 4394 // ((x << 1) & 0xAAAAAAAA) 4395 bool SHLExpMask = IsSHL; 4396 4397 if (!Mask) { 4398 // Sometimes LLVM keeps the mask as an operand of the shift, typically when 4399 // the mask is all ones: consume that now. 4400 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { 4401 Mask = Src.getConstantOperandVal(1); 4402 Src = Src.getOperand(0); 4403 // The expected mask is now in fact shifted left for SRL, so reverse the 4404 // decision. 4405 // ((x & 0xAAAAAAAA) >> 1) 4406 // ((x & 0x55555555) << 1) 4407 SHLExpMask = !SHLExpMask; 4408 } else { 4409 // Use a default shifted mask of all-ones if there's no AND, truncated 4410 // down to the expected width. This simplifies the logic later on. 4411 Mask = maskTrailingOnes<uint64_t>(Width); 4412 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); 4413 } 4414 } 4415 4416 unsigned MaskIdx = Log2_32(ShAmt); 4417 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 4418 4419 if (SHLExpMask) 4420 ExpMask <<= ShAmt; 4421 4422 if (Mask != ExpMask) 4423 return None; 4424 4425 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; 4426 } 4427 4428 // Matches any of the following bit-manipulation patterns: 4429 // (and (shl x, 1), (0x55555555 << 1)) 4430 // (and (srl x, 1), 0x55555555) 4431 // (shl (and x, 0x55555555), 1) 4432 // (srl (and x, (0x55555555 << 1)), 1) 4433 // where the shift amount and mask may vary thus: 4434 // [1] = 0x55555555 / 0xAAAAAAAA 4435 // [2] = 0x33333333 / 0xCCCCCCCC 4436 // [4] = 0x0F0F0F0F / 0xF0F0F0F0 4437 // [8] = 0x00FF00FF / 0xFF00FF00 4438 // [16] = 0x0000FFFF / 0xFFFFFFFF 4439 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) 4440 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) { 4441 // These are the unshifted masks which we use to match bit-manipulation 4442 // patterns. They may be shifted left in certain circumstances. 4443 static const uint64_t BitmanipMasks[] = { 4444 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 4445 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; 4446 4447 return matchRISCVBitmanipPat(Op, BitmanipMasks); 4448 } 4449 4450 // Match the following pattern as a GREVI(W) operation 4451 // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) 4452 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, 4453 const RISCVSubtarget &Subtarget) { 4454 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 4455 EVT VT = Op.getValueType(); 4456 4457 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 4458 auto LHS = matchGREVIPat(Op.getOperand(0)); 4459 auto RHS = matchGREVIPat(Op.getOperand(1)); 4460 if (LHS && RHS && LHS->formsPairWith(*RHS)) { 4461 SDLoc DL(Op); 4462 return DAG.getNode( 4463 RISCVISD::GREVI, DL, VT, LHS->Op, 4464 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 4465 } 4466 } 4467 return SDValue(); 4468 } 4469 4470 // Matches any the following pattern as a GORCI(W) operation 4471 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2 4472 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2 4473 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) 4474 // Note that with the variant of 3., 4475 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) 4476 // the inner pattern will first be matched as GREVI and then the outer 4477 // pattern will be matched to GORC via the first rule above. 4478 // 4. (or (rotl/rotr x, bitwidth/2), x) 4479 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, 4480 const RISCVSubtarget &Subtarget) { 4481 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 4482 EVT VT = Op.getValueType(); 4483 4484 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 4485 SDLoc DL(Op); 4486 SDValue Op0 = Op.getOperand(0); 4487 SDValue Op1 = Op.getOperand(1); 4488 4489 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { 4490 if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X && 4491 isPowerOf2_32(Reverse.getConstantOperandVal(1))) 4492 return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1)); 4493 // We can also form GORCI from ROTL/ROTR by half the bitwidth. 4494 if ((Reverse.getOpcode() == ISD::ROTL || 4495 Reverse.getOpcode() == ISD::ROTR) && 4496 Reverse.getOperand(0) == X && 4497 isa<ConstantSDNode>(Reverse.getOperand(1))) { 4498 uint64_t RotAmt = Reverse.getConstantOperandVal(1); 4499 if (RotAmt == (VT.getSizeInBits() / 2)) 4500 return DAG.getNode( 4501 RISCVISD::GORCI, DL, VT, X, 4502 DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT())); 4503 } 4504 return SDValue(); 4505 }; 4506 4507 // Check for either commutable permutation of (or (GREVI x, shamt), x) 4508 if (SDValue V = MatchOROfReverse(Op0, Op1)) 4509 return V; 4510 if (SDValue V = MatchOROfReverse(Op1, Op0)) 4511 return V; 4512 4513 // OR is commutable so canonicalize its OR operand to the left 4514 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) 4515 std::swap(Op0, Op1); 4516 if (Op0.getOpcode() != ISD::OR) 4517 return SDValue(); 4518 SDValue OrOp0 = Op0.getOperand(0); 4519 SDValue OrOp1 = Op0.getOperand(1); 4520 auto LHS = matchGREVIPat(OrOp0); 4521 // OR is commutable so swap the operands and try again: x might have been 4522 // on the left 4523 if (!LHS) { 4524 std::swap(OrOp0, OrOp1); 4525 LHS = matchGREVIPat(OrOp0); 4526 } 4527 auto RHS = matchGREVIPat(Op1); 4528 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { 4529 return DAG.getNode( 4530 RISCVISD::GORCI, DL, VT, LHS->Op, 4531 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 4532 } 4533 } 4534 return SDValue(); 4535 } 4536 4537 // Matches any of the following bit-manipulation patterns: 4538 // (and (shl x, 1), (0x22222222 << 1)) 4539 // (and (srl x, 1), 0x22222222) 4540 // (shl (and x, 0x22222222), 1) 4541 // (srl (and x, (0x22222222 << 1)), 1) 4542 // where the shift amount and mask may vary thus: 4543 // [1] = 0x22222222 / 0x44444444 4544 // [2] = 0x0C0C0C0C / 0x3C3C3C3C 4545 // [4] = 0x00F000F0 / 0x0F000F00 4546 // [8] = 0x0000FF00 / 0x00FF0000 4547 // [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64) 4548 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) { 4549 // These are the unshifted masks which we use to match bit-manipulation 4550 // patterns. They may be shifted left in certain circumstances. 4551 static const uint64_t BitmanipMasks[] = { 4552 0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL, 4553 0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL}; 4554 4555 return matchRISCVBitmanipPat(Op, BitmanipMasks); 4556 } 4557 4558 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x) 4559 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG, 4560 const RISCVSubtarget &Subtarget) { 4561 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 4562 EVT VT = Op.getValueType(); 4563 4564 if (VT != MVT::i32 && VT != Subtarget.getXLenVT()) 4565 return SDValue(); 4566 4567 SDValue Op0 = Op.getOperand(0); 4568 SDValue Op1 = Op.getOperand(1); 4569 4570 // Or is commutable so canonicalize the second OR to the LHS. 4571 if (Op0.getOpcode() != ISD::OR) 4572 std::swap(Op0, Op1); 4573 if (Op0.getOpcode() != ISD::OR) 4574 return SDValue(); 4575 4576 // We found an inner OR, so our operands are the operands of the inner OR 4577 // and the other operand of the outer OR. 4578 SDValue A = Op0.getOperand(0); 4579 SDValue B = Op0.getOperand(1); 4580 SDValue C = Op1; 4581 4582 auto Match1 = matchSHFLPat(A); 4583 auto Match2 = matchSHFLPat(B); 4584 4585 // If neither matched, we failed. 4586 if (!Match1 && !Match2) 4587 return SDValue(); 4588 4589 // We had at least one match. if one failed, try the remaining C operand. 4590 if (!Match1) { 4591 std::swap(A, C); 4592 Match1 = matchSHFLPat(A); 4593 if (!Match1) 4594 return SDValue(); 4595 } else if (!Match2) { 4596 std::swap(B, C); 4597 Match2 = matchSHFLPat(B); 4598 if (!Match2) 4599 return SDValue(); 4600 } 4601 assert(Match1 && Match2); 4602 4603 // Make sure our matches pair up. 4604 if (!Match1->formsPairWith(*Match2)) 4605 return SDValue(); 4606 4607 // All the remains is to make sure C is an AND with the same input, that masks 4608 // out the bits that are being shuffled. 4609 if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) || 4610 C.getOperand(0) != Match1->Op) 4611 return SDValue(); 4612 4613 uint64_t Mask = C.getConstantOperandVal(1); 4614 4615 static const uint64_t BitmanipMasks[] = { 4616 0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL, 4617 0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL, 4618 }; 4619 4620 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 4621 unsigned MaskIdx = Log2_32(Match1->ShAmt); 4622 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 4623 4624 if (Mask != ExpMask) 4625 return SDValue(); 4626 4627 SDLoc DL(Op); 4628 return DAG.getNode( 4629 RISCVISD::SHFLI, DL, VT, Match1->Op, 4630 DAG.getTargetConstant(Match1->ShAmt, DL, Subtarget.getXLenVT())); 4631 } 4632 4633 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is 4634 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. 4635 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does 4636 // not undo itself, but they are redundant. 4637 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { 4638 unsigned ShAmt1 = N->getConstantOperandVal(1); 4639 SDValue Src = N->getOperand(0); 4640 4641 if (Src.getOpcode() != N->getOpcode()) 4642 return SDValue(); 4643 4644 unsigned ShAmt2 = Src.getConstantOperandVal(1); 4645 Src = Src.getOperand(0); 4646 4647 unsigned CombinedShAmt; 4648 if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW) 4649 CombinedShAmt = ShAmt1 | ShAmt2; 4650 else 4651 CombinedShAmt = ShAmt1 ^ ShAmt2; 4652 4653 if (CombinedShAmt == 0) 4654 return Src; 4655 4656 SDLoc DL(N); 4657 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src, 4658 DAG.getTargetConstant(CombinedShAmt, DL, 4659 N->getOperand(1).getValueType())); 4660 } 4661 4662 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 4663 DAGCombinerInfo &DCI) const { 4664 SelectionDAG &DAG = DCI.DAG; 4665 4666 switch (N->getOpcode()) { 4667 default: 4668 break; 4669 case RISCVISD::SplitF64: { 4670 SDValue Op0 = N->getOperand(0); 4671 // If the input to SplitF64 is just BuildPairF64 then the operation is 4672 // redundant. Instead, use BuildPairF64's operands directly. 4673 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 4674 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 4675 4676 SDLoc DL(N); 4677 4678 // It's cheaper to materialise two 32-bit integers than to load a double 4679 // from the constant pool and transfer it to integer registers through the 4680 // stack. 4681 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 4682 APInt V = C->getValueAPF().bitcastToAPInt(); 4683 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 4684 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 4685 return DCI.CombineTo(N, Lo, Hi); 4686 } 4687 4688 // This is a target-specific version of a DAGCombine performed in 4689 // DAGCombiner::visitBITCAST. It performs the equivalent of: 4690 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 4691 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 4692 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 4693 !Op0.getNode()->hasOneUse()) 4694 break; 4695 SDValue NewSplitF64 = 4696 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 4697 Op0.getOperand(0)); 4698 SDValue Lo = NewSplitF64.getValue(0); 4699 SDValue Hi = NewSplitF64.getValue(1); 4700 APInt SignBit = APInt::getSignMask(32); 4701 if (Op0.getOpcode() == ISD::FNEG) { 4702 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 4703 DAG.getConstant(SignBit, DL, MVT::i32)); 4704 return DCI.CombineTo(N, Lo, NewHi); 4705 } 4706 assert(Op0.getOpcode() == ISD::FABS); 4707 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 4708 DAG.getConstant(~SignBit, DL, MVT::i32)); 4709 return DCI.CombineTo(N, Lo, NewHi); 4710 } 4711 case RISCVISD::SLLW: 4712 case RISCVISD::SRAW: 4713 case RISCVISD::SRLW: 4714 case RISCVISD::ROLW: 4715 case RISCVISD::RORW: { 4716 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 4717 SDValue LHS = N->getOperand(0); 4718 SDValue RHS = N->getOperand(1); 4719 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 4720 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 4721 if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) || 4722 SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) { 4723 if (N->getOpcode() != ISD::DELETED_NODE) 4724 DCI.AddToWorklist(N); 4725 return SDValue(N, 0); 4726 } 4727 break; 4728 } 4729 case RISCVISD::CLZW: 4730 case RISCVISD::CTZW: { 4731 // Only the lower 32 bits of the first operand are read 4732 SDValue Op0 = N->getOperand(0); 4733 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 4734 if (SimplifyDemandedBits(Op0, Mask, DCI)) { 4735 if (N->getOpcode() != ISD::DELETED_NODE) 4736 DCI.AddToWorklist(N); 4737 return SDValue(N, 0); 4738 } 4739 break; 4740 } 4741 case RISCVISD::FSL: 4742 case RISCVISD::FSR: { 4743 // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read. 4744 SDValue ShAmt = N->getOperand(2); 4745 unsigned BitWidth = ShAmt.getValueSizeInBits(); 4746 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width"); 4747 APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1); 4748 if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 4749 if (N->getOpcode() != ISD::DELETED_NODE) 4750 DCI.AddToWorklist(N); 4751 return SDValue(N, 0); 4752 } 4753 break; 4754 } 4755 case RISCVISD::FSLW: 4756 case RISCVISD::FSRW: { 4757 // Only the lower 32 bits of Values and lower 6 bits of shift amount are 4758 // read. 4759 SDValue Op0 = N->getOperand(0); 4760 SDValue Op1 = N->getOperand(1); 4761 SDValue ShAmt = N->getOperand(2); 4762 APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 4763 APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6); 4764 if (SimplifyDemandedBits(Op0, OpMask, DCI) || 4765 SimplifyDemandedBits(Op1, OpMask, DCI) || 4766 SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 4767 if (N->getOpcode() != ISD::DELETED_NODE) 4768 DCI.AddToWorklist(N); 4769 return SDValue(N, 0); 4770 } 4771 break; 4772 } 4773 case RISCVISD::GREVIW: 4774 case RISCVISD::GORCIW: { 4775 // Only the lower 32 bits of the first operand are read 4776 SDValue Op0 = N->getOperand(0); 4777 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 4778 if (SimplifyDemandedBits(Op0, Mask, DCI)) { 4779 if (N->getOpcode() != ISD::DELETED_NODE) 4780 DCI.AddToWorklist(N); 4781 return SDValue(N, 0); 4782 } 4783 4784 return combineGREVI_GORCI(N, DCI.DAG); 4785 } 4786 case RISCVISD::FMV_X_ANYEXTW_RV64: { 4787 SDLoc DL(N); 4788 SDValue Op0 = N->getOperand(0); 4789 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 4790 // conversion is unnecessary and can be replaced with an ANY_EXTEND 4791 // of the FMV_W_X_RV64 operand. 4792 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 4793 assert(Op0.getOperand(0).getValueType() == MVT::i64 && 4794 "Unexpected value type!"); 4795 return Op0.getOperand(0); 4796 } 4797 4798 // This is a target-specific version of a DAGCombine performed in 4799 // DAGCombiner::visitBITCAST. It performs the equivalent of: 4800 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 4801 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 4802 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 4803 !Op0.getNode()->hasOneUse()) 4804 break; 4805 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 4806 Op0.getOperand(0)); 4807 APInt SignBit = APInt::getSignMask(32).sext(64); 4808 if (Op0.getOpcode() == ISD::FNEG) 4809 return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 4810 DAG.getConstant(SignBit, DL, MVT::i64)); 4811 4812 assert(Op0.getOpcode() == ISD::FABS); 4813 return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 4814 DAG.getConstant(~SignBit, DL, MVT::i64)); 4815 } 4816 case RISCVISD::GREVI: 4817 case RISCVISD::GORCI: 4818 return combineGREVI_GORCI(N, DCI.DAG); 4819 case ISD::OR: 4820 if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) 4821 return GREV; 4822 if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) 4823 return GORC; 4824 if (auto SHFL = combineORToSHFL(SDValue(N, 0), DCI.DAG, Subtarget)) 4825 return SHFL; 4826 break; 4827 case RISCVISD::SELECT_CC: { 4828 // Transform 4829 SDValue LHS = N->getOperand(0); 4830 SDValue RHS = N->getOperand(1); 4831 auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2)); 4832 if (!ISD::isIntEqualitySetCC(CCVal)) 4833 break; 4834 4835 // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) -> 4836 // (select_cc X, Y, lt, trueV, falseV) 4837 // Sometimes the setcc is introduced after select_cc has been formed. 4838 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && 4839 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { 4840 // If we're looking for eq 0 instead of ne 0, we need to invert the 4841 // condition. 4842 bool Invert = CCVal == ISD::SETEQ; 4843 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); 4844 if (Invert) 4845 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4846 4847 SDLoc DL(N); 4848 RHS = LHS.getOperand(1); 4849 LHS = LHS.getOperand(0); 4850 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 4851 4852 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 4853 return DAG.getNode( 4854 RISCVISD::SELECT_CC, DL, N->getValueType(0), 4855 {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)}); 4856 } 4857 4858 // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) -> 4859 // (select_cc X, Y, eq/ne, trueV, falseV) 4860 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) 4861 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0), 4862 {LHS.getOperand(0), LHS.getOperand(1), 4863 N->getOperand(2), N->getOperand(3), 4864 N->getOperand(4)}); 4865 // (select_cc X, 1, setne, trueV, falseV) -> 4866 // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. 4867 // This can occur when legalizing some floating point comparisons. 4868 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 4869 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { 4870 SDLoc DL(N); 4871 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4872 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 4873 RHS = DAG.getConstant(0, DL, LHS.getValueType()); 4874 return DAG.getNode( 4875 RISCVISD::SELECT_CC, DL, N->getValueType(0), 4876 {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)}); 4877 } 4878 4879 break; 4880 } 4881 case RISCVISD::BR_CC: { 4882 SDValue LHS = N->getOperand(1); 4883 SDValue RHS = N->getOperand(2); 4884 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get(); 4885 if (!ISD::isIntEqualitySetCC(CCVal)) 4886 break; 4887 4888 // Fold (br_cc (setlt X, Y), 0, ne, dest) -> 4889 // (br_cc X, Y, lt, dest) 4890 // Sometimes the setcc is introduced after br_cc has been formed. 4891 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && 4892 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { 4893 // If we're looking for eq 0 instead of ne 0, we need to invert the 4894 // condition. 4895 bool Invert = CCVal == ISD::SETEQ; 4896 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); 4897 if (Invert) 4898 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4899 4900 SDLoc DL(N); 4901 RHS = LHS.getOperand(1); 4902 LHS = LHS.getOperand(0); 4903 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 4904 4905 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), 4906 N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal), 4907 N->getOperand(4)); 4908 } 4909 4910 // Fold (br_cc (xor X, Y), 0, eq/ne, dest) -> 4911 // (br_cc X, Y, eq/ne, trueV, falseV) 4912 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) 4913 return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0), 4914 N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1), 4915 N->getOperand(3), N->getOperand(4)); 4916 4917 // (br_cc X, 1, setne, br_cc) -> 4918 // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1. 4919 // This can occur when legalizing some floating point comparisons. 4920 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 4921 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { 4922 SDLoc DL(N); 4923 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 4924 SDValue TargetCC = DAG.getCondCode(CCVal); 4925 RHS = DAG.getConstant(0, DL, LHS.getValueType()); 4926 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), 4927 N->getOperand(0), LHS, RHS, TargetCC, 4928 N->getOperand(4)); 4929 } 4930 break; 4931 } 4932 case ISD::FCOPYSIGN: { 4933 EVT VT = N->getValueType(0); 4934 if (!VT.isVector()) 4935 break; 4936 // There is a form of VFSGNJ which injects the negated sign of its second 4937 // operand. Try and bubble any FNEG up after the extend/round to produce 4938 // this optimized pattern. Avoid modifying cases where FP_ROUND and 4939 // TRUNC=1. 4940 SDValue In2 = N->getOperand(1); 4941 // Avoid cases where the extend/round has multiple uses, as duplicating 4942 // those is typically more expensive than removing a fneg. 4943 if (!In2.hasOneUse()) 4944 break; 4945 if (In2.getOpcode() != ISD::FP_EXTEND && 4946 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0)) 4947 break; 4948 In2 = In2.getOperand(0); 4949 if (In2.getOpcode() != ISD::FNEG) 4950 break; 4951 SDLoc DL(N); 4952 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT); 4953 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), 4954 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); 4955 } 4956 case ISD::MGATHER: 4957 case ISD::MSCATTER: { 4958 if (!DCI.isBeforeLegalize()) 4959 break; 4960 MaskedGatherScatterSDNode *MGSN = cast<MaskedGatherScatterSDNode>(N); 4961 SDValue Index = MGSN->getIndex(); 4962 EVT IndexVT = Index.getValueType(); 4963 MVT XLenVT = Subtarget.getXLenVT(); 4964 // RISCV indexed loads only support the "unsigned unscaled" addressing 4965 // mode, so anything else must be manually legalized. 4966 bool NeedsIdxLegalization = MGSN->isIndexScaled() || 4967 (MGSN->isIndexSigned() && 4968 IndexVT.getVectorElementType().bitsLT(XLenVT)); 4969 if (!NeedsIdxLegalization) 4970 break; 4971 4972 SDLoc DL(N); 4973 4974 // Any index legalization should first promote to XLenVT, so we don't lose 4975 // bits when scaling. This may create an illegal index type so we let 4976 // LLVM's legalization take care of the splitting. 4977 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) { 4978 IndexVT = IndexVT.changeVectorElementType(XLenVT); 4979 Index = DAG.getNode(MGSN->isIndexSigned() ? ISD::SIGN_EXTEND 4980 : ISD::ZERO_EXTEND, 4981 DL, IndexVT, Index); 4982 } 4983 4984 unsigned Scale = N->getConstantOperandVal(5); 4985 if (MGSN->isIndexScaled() && Scale != 1) { 4986 // Manually scale the indices by the element size. 4987 // TODO: Sanitize the scale operand here? 4988 assert(isPowerOf2_32(Scale) && "Expecting power-of-two types"); 4989 SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT); 4990 Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale); 4991 } 4992 4993 ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED; 4994 if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N)) { 4995 return DAG.getMaskedGather( 4996 N->getVTList(), MGSN->getMemoryVT(), DL, 4997 {MGSN->getChain(), MGN->getPassThru(), MGSN->getMask(), 4998 MGSN->getBasePtr(), Index, MGN->getScale()}, 4999 MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType()); 5000 } 5001 const auto *MSN = cast<MaskedScatterSDNode>(N); 5002 return DAG.getMaskedScatter( 5003 N->getVTList(), MGSN->getMemoryVT(), DL, 5004 {MGSN->getChain(), MSN->getValue(), MGSN->getMask(), MGSN->getBasePtr(), 5005 Index, MGSN->getScale()}, 5006 MGSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore()); 5007 } 5008 } 5009 5010 return SDValue(); 5011 } 5012 5013 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 5014 const SDNode *N, CombineLevel Level) const { 5015 // The following folds are only desirable if `(OP _, c1 << c2)` can be 5016 // materialised in fewer instructions than `(OP _, c1)`: 5017 // 5018 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 5019 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 5020 SDValue N0 = N->getOperand(0); 5021 EVT Ty = N0.getValueType(); 5022 if (Ty.isScalarInteger() && 5023 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 5024 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 5025 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 5026 if (C1 && C2) { 5027 const APInt &C1Int = C1->getAPIntValue(); 5028 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 5029 5030 // We can materialise `c1 << c2` into an add immediate, so it's "free", 5031 // and the combine should happen, to potentially allow further combines 5032 // later. 5033 if (ShiftedC1Int.getMinSignedBits() <= 64 && 5034 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 5035 return true; 5036 5037 // We can materialise `c1` in an add immediate, so it's "free", and the 5038 // combine should be prevented. 5039 if (C1Int.getMinSignedBits() <= 64 && 5040 isLegalAddImmediate(C1Int.getSExtValue())) 5041 return false; 5042 5043 // Neither constant will fit into an immediate, so find materialisation 5044 // costs. 5045 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 5046 Subtarget.is64Bit()); 5047 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 5048 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); 5049 5050 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 5051 // combine should be prevented. 5052 if (C1Cost < ShiftedC1Cost) 5053 return false; 5054 } 5055 } 5056 return true; 5057 } 5058 5059 bool RISCVTargetLowering::targetShrinkDemandedConstant( 5060 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 5061 TargetLoweringOpt &TLO) const { 5062 // Delay this optimization as late as possible. 5063 if (!TLO.LegalOps) 5064 return false; 5065 5066 EVT VT = Op.getValueType(); 5067 if (VT.isVector()) 5068 return false; 5069 5070 // Only handle AND for now. 5071 if (Op.getOpcode() != ISD::AND) 5072 return false; 5073 5074 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5075 if (!C) 5076 return false; 5077 5078 const APInt &Mask = C->getAPIntValue(); 5079 5080 // Clear all non-demanded bits initially. 5081 APInt ShrunkMask = Mask & DemandedBits; 5082 5083 // Try to make a smaller immediate by setting undemanded bits. 5084 5085 APInt ExpandedMask = Mask | ~DemandedBits; 5086 5087 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool { 5088 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask); 5089 }; 5090 auto UseMask = [Mask, Op, VT, &TLO](const APInt &NewMask) -> bool { 5091 if (NewMask == Mask) 5092 return true; 5093 SDLoc DL(Op); 5094 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); 5095 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); 5096 return TLO.CombineTo(Op, NewOp); 5097 }; 5098 5099 // If the shrunk mask fits in sign extended 12 bits, let the target 5100 // independent code apply it. 5101 if (ShrunkMask.isSignedIntN(12)) 5102 return false; 5103 5104 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern. 5105 if (VT == MVT::i64) { 5106 APInt NewMask = APInt(64, 0xffffffff); 5107 if (IsLegalMask(NewMask)) 5108 return UseMask(NewMask); 5109 } 5110 5111 // For the remaining optimizations, we need to be able to make a negative 5112 // number through a combination of mask and undemanded bits. 5113 if (!ExpandedMask.isNegative()) 5114 return false; 5115 5116 // What is the fewest number of bits we need to represent the negative number. 5117 unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); 5118 5119 // Try to make a 12 bit negative immediate. If that fails try to make a 32 5120 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 5121 APInt NewMask = ShrunkMask; 5122 if (MinSignedBits <= 12) 5123 NewMask.setBitsFrom(11); 5124 else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 5125 NewMask.setBitsFrom(31); 5126 else 5127 return false; 5128 5129 // Sanity check that our new mask is a subset of the demanded mask. 5130 assert(IsLegalMask(NewMask)); 5131 return UseMask(NewMask); 5132 } 5133 5134 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 5135 KnownBits &Known, 5136 const APInt &DemandedElts, 5137 const SelectionDAG &DAG, 5138 unsigned Depth) const { 5139 unsigned BitWidth = Known.getBitWidth(); 5140 unsigned Opc = Op.getOpcode(); 5141 assert((Opc >= ISD::BUILTIN_OP_END || 5142 Opc == ISD::INTRINSIC_WO_CHAIN || 5143 Opc == ISD::INTRINSIC_W_CHAIN || 5144 Opc == ISD::INTRINSIC_VOID) && 5145 "Should use MaskedValueIsZero if you don't know whether Op" 5146 " is a target node!"); 5147 5148 Known.resetAll(); 5149 switch (Opc) { 5150 default: break; 5151 case RISCVISD::SELECT_CC: { 5152 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1); 5153 // If we don't know any bits, early out. 5154 if (Known.isUnknown()) 5155 break; 5156 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1); 5157 5158 // Only known if known in both the LHS and RHS. 5159 Known = KnownBits::commonBits(Known, Known2); 5160 break; 5161 } 5162 case RISCVISD::REMUW: { 5163 KnownBits Known2; 5164 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 5165 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 5166 // We only care about the lower 32 bits. 5167 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 5168 // Restore the original width by sign extending. 5169 Known = Known.sext(BitWidth); 5170 break; 5171 } 5172 case RISCVISD::DIVUW: { 5173 KnownBits Known2; 5174 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 5175 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 5176 // We only care about the lower 32 bits. 5177 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 5178 // Restore the original width by sign extending. 5179 Known = Known.sext(BitWidth); 5180 break; 5181 } 5182 case RISCVISD::CTZW: { 5183 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 5184 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros(); 5185 unsigned LowBits = Log2_32(PossibleTZ) + 1; 5186 Known.Zero.setBitsFrom(LowBits); 5187 break; 5188 } 5189 case RISCVISD::CLZW: { 5190 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 5191 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros(); 5192 unsigned LowBits = Log2_32(PossibleLZ) + 1; 5193 Known.Zero.setBitsFrom(LowBits); 5194 break; 5195 } 5196 case RISCVISD::READ_VLENB: 5197 // We assume VLENB is at least 8 bytes. 5198 // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits. 5199 Known.Zero.setLowBits(3); 5200 break; 5201 } 5202 } 5203 5204 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 5205 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 5206 unsigned Depth) const { 5207 switch (Op.getOpcode()) { 5208 default: 5209 break; 5210 case RISCVISD::SLLW: 5211 case RISCVISD::SRAW: 5212 case RISCVISD::SRLW: 5213 case RISCVISD::DIVW: 5214 case RISCVISD::DIVUW: 5215 case RISCVISD::REMUW: 5216 case RISCVISD::ROLW: 5217 case RISCVISD::RORW: 5218 case RISCVISD::GREVIW: 5219 case RISCVISD::GORCIW: 5220 case RISCVISD::FSLW: 5221 case RISCVISD::FSRW: 5222 // TODO: As the result is sign-extended, this is conservatively correct. A 5223 // more precise answer could be calculated for SRAW depending on known 5224 // bits in the shift amount. 5225 return 33; 5226 case RISCVISD::SHFLI: { 5227 // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word 5228 // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but 5229 // will stay within the upper 32 bits. If there were more than 32 sign bits 5230 // before there will be at least 33 sign bits after. 5231 if (Op.getValueType() == MVT::i64 && 5232 (Op.getConstantOperandVal(1) & 0x10) == 0) { 5233 unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); 5234 if (Tmp > 32) 5235 return 33; 5236 } 5237 break; 5238 } 5239 case RISCVISD::VMV_X_S: 5240 // The number of sign bits of the scalar result is computed by obtaining the 5241 // element type of the input vector operand, subtracting its width from the 5242 // XLEN, and then adding one (sign bit within the element type). If the 5243 // element type is wider than XLen, the least-significant XLEN bits are 5244 // taken. 5245 if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen()) 5246 return 1; 5247 return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; 5248 } 5249 5250 return 1; 5251 } 5252 5253 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 5254 MachineBasicBlock *BB) { 5255 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 5256 5257 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 5258 // Should the count have wrapped while it was being read, we need to try 5259 // again. 5260 // ... 5261 // read: 5262 // rdcycleh x3 # load high word of cycle 5263 // rdcycle x2 # load low word of cycle 5264 // rdcycleh x4 # load high word of cycle 5265 // bne x3, x4, read # check if high word reads match, otherwise try again 5266 // ... 5267 5268 MachineFunction &MF = *BB->getParent(); 5269 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5270 MachineFunction::iterator It = ++BB->getIterator(); 5271 5272 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 5273 MF.insert(It, LoopMBB); 5274 5275 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 5276 MF.insert(It, DoneMBB); 5277 5278 // Transfer the remainder of BB and its successor edges to DoneMBB. 5279 DoneMBB->splice(DoneMBB->begin(), BB, 5280 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 5281 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 5282 5283 BB->addSuccessor(LoopMBB); 5284 5285 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5286 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 5287 Register LoReg = MI.getOperand(0).getReg(); 5288 Register HiReg = MI.getOperand(1).getReg(); 5289 DebugLoc DL = MI.getDebugLoc(); 5290 5291 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 5292 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 5293 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 5294 .addReg(RISCV::X0); 5295 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 5296 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 5297 .addReg(RISCV::X0); 5298 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 5299 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 5300 .addReg(RISCV::X0); 5301 5302 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 5303 .addReg(HiReg) 5304 .addReg(ReadAgainReg) 5305 .addMBB(LoopMBB); 5306 5307 LoopMBB->addSuccessor(LoopMBB); 5308 LoopMBB->addSuccessor(DoneMBB); 5309 5310 MI.eraseFromParent(); 5311 5312 return DoneMBB; 5313 } 5314 5315 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 5316 MachineBasicBlock *BB) { 5317 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 5318 5319 MachineFunction &MF = *BB->getParent(); 5320 DebugLoc DL = MI.getDebugLoc(); 5321 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 5322 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 5323 Register LoReg = MI.getOperand(0).getReg(); 5324 Register HiReg = MI.getOperand(1).getReg(); 5325 Register SrcReg = MI.getOperand(2).getReg(); 5326 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 5327 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 5328 5329 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 5330 RI); 5331 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 5332 MachineMemOperand *MMOLo = 5333 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 5334 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 5335 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 5336 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 5337 .addFrameIndex(FI) 5338 .addImm(0) 5339 .addMemOperand(MMOLo); 5340 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 5341 .addFrameIndex(FI) 5342 .addImm(4) 5343 .addMemOperand(MMOHi); 5344 MI.eraseFromParent(); // The pseudo instruction is gone now. 5345 return BB; 5346 } 5347 5348 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 5349 MachineBasicBlock *BB) { 5350 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 5351 "Unexpected instruction"); 5352 5353 MachineFunction &MF = *BB->getParent(); 5354 DebugLoc DL = MI.getDebugLoc(); 5355 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 5356 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 5357 Register DstReg = MI.getOperand(0).getReg(); 5358 Register LoReg = MI.getOperand(1).getReg(); 5359 Register HiReg = MI.getOperand(2).getReg(); 5360 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 5361 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 5362 5363 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 5364 MachineMemOperand *MMOLo = 5365 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 5366 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 5367 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 5368 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 5369 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 5370 .addFrameIndex(FI) 5371 .addImm(0) 5372 .addMemOperand(MMOLo); 5373 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 5374 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 5375 .addFrameIndex(FI) 5376 .addImm(4) 5377 .addMemOperand(MMOHi); 5378 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 5379 MI.eraseFromParent(); // The pseudo instruction is gone now. 5380 return BB; 5381 } 5382 5383 static bool isSelectPseudo(MachineInstr &MI) { 5384 switch (MI.getOpcode()) { 5385 default: 5386 return false; 5387 case RISCV::Select_GPR_Using_CC_GPR: 5388 case RISCV::Select_FPR16_Using_CC_GPR: 5389 case RISCV::Select_FPR32_Using_CC_GPR: 5390 case RISCV::Select_FPR64_Using_CC_GPR: 5391 return true; 5392 } 5393 } 5394 5395 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 5396 MachineBasicBlock *BB) { 5397 // To "insert" Select_* instructions, we actually have to insert the triangle 5398 // control-flow pattern. The incoming instructions know the destination vreg 5399 // to set, the condition code register to branch on, the true/false values to 5400 // select between, and the condcode to use to select the appropriate branch. 5401 // 5402 // We produce the following control flow: 5403 // HeadMBB 5404 // | \ 5405 // | IfFalseMBB 5406 // | / 5407 // TailMBB 5408 // 5409 // When we find a sequence of selects we attempt to optimize their emission 5410 // by sharing the control flow. Currently we only handle cases where we have 5411 // multiple selects with the exact same condition (same LHS, RHS and CC). 5412 // The selects may be interleaved with other instructions if the other 5413 // instructions meet some requirements we deem safe: 5414 // - They are debug instructions. Otherwise, 5415 // - They do not have side-effects, do not access memory and their inputs do 5416 // not depend on the results of the select pseudo-instructions. 5417 // The TrueV/FalseV operands of the selects cannot depend on the result of 5418 // previous selects in the sequence. 5419 // These conditions could be further relaxed. See the X86 target for a 5420 // related approach and more information. 5421 Register LHS = MI.getOperand(1).getReg(); 5422 Register RHS = MI.getOperand(2).getReg(); 5423 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 5424 5425 SmallVector<MachineInstr *, 4> SelectDebugValues; 5426 SmallSet<Register, 4> SelectDests; 5427 SelectDests.insert(MI.getOperand(0).getReg()); 5428 5429 MachineInstr *LastSelectPseudo = &MI; 5430 5431 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 5432 SequenceMBBI != E; ++SequenceMBBI) { 5433 if (SequenceMBBI->isDebugInstr()) 5434 continue; 5435 else if (isSelectPseudo(*SequenceMBBI)) { 5436 if (SequenceMBBI->getOperand(1).getReg() != LHS || 5437 SequenceMBBI->getOperand(2).getReg() != RHS || 5438 SequenceMBBI->getOperand(3).getImm() != CC || 5439 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 5440 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 5441 break; 5442 LastSelectPseudo = &*SequenceMBBI; 5443 SequenceMBBI->collectDebugValues(SelectDebugValues); 5444 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 5445 } else { 5446 if (SequenceMBBI->hasUnmodeledSideEffects() || 5447 SequenceMBBI->mayLoadOrStore()) 5448 break; 5449 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 5450 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 5451 })) 5452 break; 5453 } 5454 } 5455 5456 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 5457 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5458 DebugLoc DL = MI.getDebugLoc(); 5459 MachineFunction::iterator I = ++BB->getIterator(); 5460 5461 MachineBasicBlock *HeadMBB = BB; 5462 MachineFunction *F = BB->getParent(); 5463 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 5464 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 5465 5466 F->insert(I, IfFalseMBB); 5467 F->insert(I, TailMBB); 5468 5469 // Transfer debug instructions associated with the selects to TailMBB. 5470 for (MachineInstr *DebugInstr : SelectDebugValues) { 5471 TailMBB->push_back(DebugInstr->removeFromParent()); 5472 } 5473 5474 // Move all instructions after the sequence to TailMBB. 5475 TailMBB->splice(TailMBB->end(), HeadMBB, 5476 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 5477 // Update machine-CFG edges by transferring all successors of the current 5478 // block to the new block which will contain the Phi nodes for the selects. 5479 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 5480 // Set the successors for HeadMBB. 5481 HeadMBB->addSuccessor(IfFalseMBB); 5482 HeadMBB->addSuccessor(TailMBB); 5483 5484 // Insert appropriate branch. 5485 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 5486 5487 BuildMI(HeadMBB, DL, TII.get(Opcode)) 5488 .addReg(LHS) 5489 .addReg(RHS) 5490 .addMBB(TailMBB); 5491 5492 // IfFalseMBB just falls through to TailMBB. 5493 IfFalseMBB->addSuccessor(TailMBB); 5494 5495 // Create PHIs for all of the select pseudo-instructions. 5496 auto SelectMBBI = MI.getIterator(); 5497 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 5498 auto InsertionPoint = TailMBB->begin(); 5499 while (SelectMBBI != SelectEnd) { 5500 auto Next = std::next(SelectMBBI); 5501 if (isSelectPseudo(*SelectMBBI)) { 5502 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 5503 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 5504 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 5505 .addReg(SelectMBBI->getOperand(4).getReg()) 5506 .addMBB(HeadMBB) 5507 .addReg(SelectMBBI->getOperand(5).getReg()) 5508 .addMBB(IfFalseMBB); 5509 SelectMBBI->eraseFromParent(); 5510 } 5511 SelectMBBI = Next; 5512 } 5513 5514 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 5515 return TailMBB; 5516 } 5517 5518 static MachineInstr *elideCopies(MachineInstr *MI, 5519 const MachineRegisterInfo &MRI) { 5520 while (true) { 5521 if (!MI->isFullCopy()) 5522 return MI; 5523 if (!Register::isVirtualRegister(MI->getOperand(1).getReg())) 5524 return nullptr; 5525 MI = MRI.getVRegDef(MI->getOperand(1).getReg()); 5526 if (!MI) 5527 return nullptr; 5528 } 5529 } 5530 5531 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, 5532 int VLIndex, unsigned SEWIndex, 5533 RISCVVLMUL VLMul, bool ForceTailAgnostic) { 5534 MachineFunction &MF = *BB->getParent(); 5535 DebugLoc DL = MI.getDebugLoc(); 5536 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 5537 5538 unsigned SEW = MI.getOperand(SEWIndex).getImm(); 5539 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 5540 RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8)); 5541 5542 MachineRegisterInfo &MRI = MF.getRegInfo(); 5543 5544 auto BuildVSETVLI = [&]() { 5545 if (VLIndex >= 0) { 5546 Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 5547 Register VLReg = MI.getOperand(VLIndex).getReg(); 5548 5549 // VL might be a compile time constant, but isel would have to put it 5550 // in a register. See if VL comes from an ADDI X0, imm. 5551 if (VLReg.isVirtual()) { 5552 MachineInstr *Def = MRI.getVRegDef(VLReg); 5553 if (Def && Def->getOpcode() == RISCV::ADDI && 5554 Def->getOperand(1).getReg() == RISCV::X0 && 5555 Def->getOperand(2).isImm()) { 5556 uint64_t Imm = Def->getOperand(2).getImm(); 5557 // VSETIVLI allows a 5-bit zero extended immediate. 5558 if (isUInt<5>(Imm)) 5559 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI)) 5560 .addReg(DestReg, RegState::Define | RegState::Dead) 5561 .addImm(Imm); 5562 } 5563 } 5564 5565 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) 5566 .addReg(DestReg, RegState::Define | RegState::Dead) 5567 .addReg(VLReg); 5568 } 5569 5570 // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). 5571 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) 5572 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 5573 .addReg(RISCV::X0, RegState::Kill); 5574 }; 5575 5576 MachineInstrBuilder MIB = BuildVSETVLI(); 5577 5578 // Default to tail agnostic unless the destination is tied to a source. In 5579 // that case the user would have some control over the tail values. The tail 5580 // policy is also ignored on instructions that only update element 0 like 5581 // vmv.s.x or reductions so use agnostic there to match the common case. 5582 // FIXME: This is conservatively correct, but we might want to detect that 5583 // the input is undefined. 5584 bool TailAgnostic = true; 5585 unsigned UseOpIdx; 5586 if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 5587 TailAgnostic = false; 5588 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 5589 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 5590 MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); 5591 if (UseMI) { 5592 UseMI = elideCopies(UseMI, MRI); 5593 if (UseMI && UseMI->isImplicitDef()) 5594 TailAgnostic = true; 5595 } 5596 } 5597 5598 // For simplicity we reuse the vtype representation here. 5599 MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth, 5600 /*TailAgnostic*/ TailAgnostic, 5601 /*MaskAgnostic*/ false)); 5602 5603 // Remove (now) redundant operands from pseudo 5604 if (VLIndex >= 0) { 5605 MI.getOperand(VLIndex).setReg(RISCV::NoRegister); 5606 MI.getOperand(VLIndex).setIsKill(false); 5607 } 5608 5609 return BB; 5610 } 5611 5612 MachineBasicBlock * 5613 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 5614 MachineBasicBlock *BB) const { 5615 uint64_t TSFlags = MI.getDesc().TSFlags; 5616 5617 if (TSFlags & RISCVII::HasSEWOpMask) { 5618 unsigned NumOperands = MI.getNumExplicitOperands(); 5619 int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1; 5620 unsigned SEWIndex = NumOperands - 1; 5621 bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask; 5622 5623 RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >> 5624 RISCVII::VLMulShift); 5625 return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic); 5626 } 5627 5628 switch (MI.getOpcode()) { 5629 default: 5630 llvm_unreachable("Unexpected instr type to insert"); 5631 case RISCV::ReadCycleWide: 5632 assert(!Subtarget.is64Bit() && 5633 "ReadCycleWrite is only to be used on riscv32"); 5634 return emitReadCycleWidePseudo(MI, BB); 5635 case RISCV::Select_GPR_Using_CC_GPR: 5636 case RISCV::Select_FPR16_Using_CC_GPR: 5637 case RISCV::Select_FPR32_Using_CC_GPR: 5638 case RISCV::Select_FPR64_Using_CC_GPR: 5639 return emitSelectPseudo(MI, BB); 5640 case RISCV::BuildPairF64Pseudo: 5641 return emitBuildPairF64Pseudo(MI, BB); 5642 case RISCV::SplitF64Pseudo: 5643 return emitSplitF64Pseudo(MI, BB); 5644 } 5645 } 5646 5647 // Calling Convention Implementation. 5648 // The expectations for frontend ABI lowering vary from target to target. 5649 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 5650 // details, but this is a longer term goal. For now, we simply try to keep the 5651 // role of the frontend as simple and well-defined as possible. The rules can 5652 // be summarised as: 5653 // * Never split up large scalar arguments. We handle them here. 5654 // * If a hardfloat calling convention is being used, and the struct may be 5655 // passed in a pair of registers (fp+fp, int+fp), and both registers are 5656 // available, then pass as two separate arguments. If either the GPRs or FPRs 5657 // are exhausted, then pass according to the rule below. 5658 // * If a struct could never be passed in registers or directly in a stack 5659 // slot (as it is larger than 2*XLEN and the floating point rules don't 5660 // apply), then pass it using a pointer with the byval attribute. 5661 // * If a struct is less than 2*XLEN, then coerce to either a two-element 5662 // word-sized array or a 2*XLEN scalar (depending on alignment). 5663 // * The frontend can determine whether a struct is returned by reference or 5664 // not based on its size and fields. If it will be returned by reference, the 5665 // frontend must modify the prototype so a pointer with the sret annotation is 5666 // passed as the first argument. This is not necessary for large scalar 5667 // returns. 5668 // * Struct return values and varargs should be coerced to structs containing 5669 // register-size fields in the same situations they would be for fixed 5670 // arguments. 5671 5672 static const MCPhysReg ArgGPRs[] = { 5673 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 5674 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 5675 }; 5676 static const MCPhysReg ArgFPR16s[] = { 5677 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 5678 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 5679 }; 5680 static const MCPhysReg ArgFPR32s[] = { 5681 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 5682 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 5683 }; 5684 static const MCPhysReg ArgFPR64s[] = { 5685 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 5686 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 5687 }; 5688 // This is an interim calling convention and it may be changed in the future. 5689 static const MCPhysReg ArgVRs[] = { 5690 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 5691 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 5692 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 5693 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 5694 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 5695 RISCV::V20M2, RISCV::V22M2}; 5696 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 5697 RISCV::V20M4}; 5698 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 5699 5700 // Pass a 2*XLEN argument that has been split into two XLEN values through 5701 // registers or the stack as necessary. 5702 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 5703 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 5704 MVT ValVT2, MVT LocVT2, 5705 ISD::ArgFlagsTy ArgFlags2) { 5706 unsigned XLenInBytes = XLen / 8; 5707 if (Register Reg = State.AllocateReg(ArgGPRs)) { 5708 // At least one half can be passed via register. 5709 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 5710 VA1.getLocVT(), CCValAssign::Full)); 5711 } else { 5712 // Both halves must be passed on the stack, with proper alignment. 5713 Align StackAlign = 5714 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 5715 State.addLoc( 5716 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 5717 State.AllocateStack(XLenInBytes, StackAlign), 5718 VA1.getLocVT(), CCValAssign::Full)); 5719 State.addLoc(CCValAssign::getMem( 5720 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 5721 LocVT2, CCValAssign::Full)); 5722 return false; 5723 } 5724 5725 if (Register Reg = State.AllocateReg(ArgGPRs)) { 5726 // The second half can also be passed via register. 5727 State.addLoc( 5728 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 5729 } else { 5730 // The second half is passed via the stack, without additional alignment. 5731 State.addLoc(CCValAssign::getMem( 5732 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 5733 LocVT2, CCValAssign::Full)); 5734 } 5735 5736 return false; 5737 } 5738 5739 // Implements the RISC-V calling convention. Returns true upon failure. 5740 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 5741 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 5742 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 5743 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 5744 Optional<unsigned> FirstMaskArgument) { 5745 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 5746 assert(XLen == 32 || XLen == 64); 5747 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 5748 5749 // Any return value split in to more than two values can't be returned 5750 // directly. Vectors are returned via the available vector registers. 5751 if (!LocVT.isVector() && IsRet && ValNo > 1) 5752 return true; 5753 5754 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 5755 // variadic argument, or if no F16/F32 argument registers are available. 5756 bool UseGPRForF16_F32 = true; 5757 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 5758 // variadic argument, or if no F64 argument registers are available. 5759 bool UseGPRForF64 = true; 5760 5761 switch (ABI) { 5762 default: 5763 llvm_unreachable("Unexpected ABI"); 5764 case RISCVABI::ABI_ILP32: 5765 case RISCVABI::ABI_LP64: 5766 break; 5767 case RISCVABI::ABI_ILP32F: 5768 case RISCVABI::ABI_LP64F: 5769 UseGPRForF16_F32 = !IsFixed; 5770 break; 5771 case RISCVABI::ABI_ILP32D: 5772 case RISCVABI::ABI_LP64D: 5773 UseGPRForF16_F32 = !IsFixed; 5774 UseGPRForF64 = !IsFixed; 5775 break; 5776 } 5777 5778 // FPR16, FPR32, and FPR64 alias each other. 5779 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { 5780 UseGPRForF16_F32 = true; 5781 UseGPRForF64 = true; 5782 } 5783 5784 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 5785 // similar local variables rather than directly checking against the target 5786 // ABI. 5787 5788 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { 5789 LocVT = XLenVT; 5790 LocInfo = CCValAssign::BCvt; 5791 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 5792 LocVT = MVT::i64; 5793 LocInfo = CCValAssign::BCvt; 5794 } 5795 5796 // If this is a variadic argument, the RISC-V calling convention requires 5797 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 5798 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 5799 // be used regardless of whether the original argument was split during 5800 // legalisation or not. The argument will not be passed by registers if the 5801 // original type is larger than 2*XLEN, so the register alignment rule does 5802 // not apply. 5803 unsigned TwoXLenInBytes = (2 * XLen) / 8; 5804 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 5805 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 5806 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 5807 // Skip 'odd' register if necessary. 5808 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 5809 State.AllocateReg(ArgGPRs); 5810 } 5811 5812 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 5813 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 5814 State.getPendingArgFlags(); 5815 5816 assert(PendingLocs.size() == PendingArgFlags.size() && 5817 "PendingLocs and PendingArgFlags out of sync"); 5818 5819 // Handle passing f64 on RV32D with a soft float ABI or when floating point 5820 // registers are exhausted. 5821 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 5822 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 5823 "Can't lower f64 if it is split"); 5824 // Depending on available argument GPRS, f64 may be passed in a pair of 5825 // GPRs, split between a GPR and the stack, or passed completely on the 5826 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 5827 // cases. 5828 Register Reg = State.AllocateReg(ArgGPRs); 5829 LocVT = MVT::i32; 5830 if (!Reg) { 5831 unsigned StackOffset = State.AllocateStack(8, Align(8)); 5832 State.addLoc( 5833 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 5834 return false; 5835 } 5836 if (!State.AllocateReg(ArgGPRs)) 5837 State.AllocateStack(4, Align(4)); 5838 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5839 return false; 5840 } 5841 5842 // Fixed-length vectors are located in the corresponding scalable-vector 5843 // container types. 5844 if (ValVT.isFixedLengthVector()) 5845 LocVT = TLI.getContainerForFixedLengthVector(LocVT); 5846 5847 // Split arguments might be passed indirectly, so keep track of the pending 5848 // values. Split vectors are passed via a mix of registers and indirectly, so 5849 // treat them as we would any other argument. 5850 if (!LocVT.isVector() && (ArgFlags.isSplit() || !PendingLocs.empty())) { 5851 LocVT = XLenVT; 5852 LocInfo = CCValAssign::Indirect; 5853 PendingLocs.push_back( 5854 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 5855 PendingArgFlags.push_back(ArgFlags); 5856 if (!ArgFlags.isSplitEnd()) { 5857 return false; 5858 } 5859 } 5860 5861 // If the split argument only had two elements, it should be passed directly 5862 // in registers or on the stack. 5863 if (!LocVT.isVector() && ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 5864 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 5865 // Apply the normal calling convention rules to the first half of the 5866 // split argument. 5867 CCValAssign VA = PendingLocs[0]; 5868 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 5869 PendingLocs.clear(); 5870 PendingArgFlags.clear(); 5871 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 5872 ArgFlags); 5873 } 5874 5875 // Allocate to a register if possible, or else a stack slot. 5876 Register Reg; 5877 if (ValVT == MVT::f16 && !UseGPRForF16_F32) 5878 Reg = State.AllocateReg(ArgFPR16s); 5879 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 5880 Reg = State.AllocateReg(ArgFPR32s); 5881 else if (ValVT == MVT::f64 && !UseGPRForF64) 5882 Reg = State.AllocateReg(ArgFPR64s); 5883 else if (ValVT.isVector()) { 5884 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 5885 if (RC == &RISCV::VRRegClass) { 5886 // Assign the first mask argument to V0. 5887 // This is an interim calling convention and it may be changed in the 5888 // future. 5889 if (FirstMaskArgument.hasValue() && 5890 ValNo == FirstMaskArgument.getValue()) { 5891 Reg = State.AllocateReg(RISCV::V0); 5892 } else { 5893 Reg = State.AllocateReg(ArgVRs); 5894 } 5895 } else if (RC == &RISCV::VRM2RegClass) { 5896 Reg = State.AllocateReg(ArgVRM2s); 5897 } else if (RC == &RISCV::VRM4RegClass) { 5898 Reg = State.AllocateReg(ArgVRM4s); 5899 } else if (RC == &RISCV::VRM8RegClass) { 5900 Reg = State.AllocateReg(ArgVRM8s); 5901 } else { 5902 llvm_unreachable("Unhandled class register for ValueType"); 5903 } 5904 if (!Reg) { 5905 // For return values, the vector must be passed fully via registers or 5906 // via the stack. 5907 // FIXME: The proposed vector ABI only mandates v8-v15 for return values, 5908 // but we're using all of them. 5909 if (IsRet) 5910 return true; 5911 LocInfo = CCValAssign::Indirect; 5912 // Try using a GPR to pass the address 5913 Reg = State.AllocateReg(ArgGPRs); 5914 LocVT = XLenVT; 5915 } 5916 } else 5917 Reg = State.AllocateReg(ArgGPRs); 5918 unsigned StackOffset = 5919 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); 5920 5921 // If we reach this point and PendingLocs is non-empty, we must be at the 5922 // end of a split argument that must be passed indirectly. 5923 if (!PendingLocs.empty()) { 5924 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 5925 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 5926 5927 for (auto &It : PendingLocs) { 5928 if (Reg) 5929 It.convertToReg(Reg); 5930 else 5931 It.convertToMem(StackOffset); 5932 State.addLoc(It); 5933 } 5934 PendingLocs.clear(); 5935 PendingArgFlags.clear(); 5936 return false; 5937 } 5938 5939 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 5940 (TLI.getSubtarget().hasStdExtV() && ValVT.isVector())) && 5941 "Expected an XLenVT or vector types at this stage"); 5942 5943 if (Reg) { 5944 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5945 return false; 5946 } 5947 5948 // When a floating-point value is passed on the stack, no bit-conversion is 5949 // needed. 5950 if (ValVT.isFloatingPoint()) { 5951 LocVT = ValVT; 5952 LocInfo = CCValAssign::Full; 5953 } 5954 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 5955 return false; 5956 } 5957 5958 template <typename ArgTy> 5959 static Optional<unsigned> preAssignMask(const ArgTy &Args) { 5960 for (const auto &ArgIdx : enumerate(Args)) { 5961 MVT ArgVT = ArgIdx.value().VT; 5962 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1) 5963 return ArgIdx.index(); 5964 } 5965 return None; 5966 } 5967 5968 void RISCVTargetLowering::analyzeInputArgs( 5969 MachineFunction &MF, CCState &CCInfo, 5970 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 5971 unsigned NumArgs = Ins.size(); 5972 FunctionType *FType = MF.getFunction().getFunctionType(); 5973 5974 Optional<unsigned> FirstMaskArgument; 5975 if (Subtarget.hasStdExtV()) 5976 FirstMaskArgument = preAssignMask(Ins); 5977 5978 for (unsigned i = 0; i != NumArgs; ++i) { 5979 MVT ArgVT = Ins[i].VT; 5980 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 5981 5982 Type *ArgTy = nullptr; 5983 if (IsRet) 5984 ArgTy = FType->getReturnType(); 5985 else if (Ins[i].isOrigArg()) 5986 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 5987 5988 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 5989 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 5990 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 5991 FirstMaskArgument)) { 5992 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 5993 << EVT(ArgVT).getEVTString() << '\n'); 5994 llvm_unreachable(nullptr); 5995 } 5996 } 5997 } 5998 5999 void RISCVTargetLowering::analyzeOutputArgs( 6000 MachineFunction &MF, CCState &CCInfo, 6001 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 6002 CallLoweringInfo *CLI) const { 6003 unsigned NumArgs = Outs.size(); 6004 6005 Optional<unsigned> FirstMaskArgument; 6006 if (Subtarget.hasStdExtV()) 6007 FirstMaskArgument = preAssignMask(Outs); 6008 6009 for (unsigned i = 0; i != NumArgs; i++) { 6010 MVT ArgVT = Outs[i].VT; 6011 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 6012 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 6013 6014 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 6015 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 6016 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 6017 FirstMaskArgument)) { 6018 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 6019 << EVT(ArgVT).getEVTString() << "\n"); 6020 llvm_unreachable(nullptr); 6021 } 6022 } 6023 } 6024 6025 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 6026 // values. 6027 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 6028 const CCValAssign &VA, const SDLoc &DL, 6029 const RISCVSubtarget &Subtarget) { 6030 switch (VA.getLocInfo()) { 6031 default: 6032 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 6033 case CCValAssign::Full: 6034 if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector()) 6035 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget); 6036 break; 6037 case CCValAssign::BCvt: 6038 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 6039 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); 6040 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 6041 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 6042 else 6043 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 6044 break; 6045 } 6046 return Val; 6047 } 6048 6049 // The caller is responsible for loading the full value if the argument is 6050 // passed with CCValAssign::Indirect. 6051 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 6052 const CCValAssign &VA, const SDLoc &DL, 6053 const RISCVTargetLowering &TLI) { 6054 MachineFunction &MF = DAG.getMachineFunction(); 6055 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 6056 EVT LocVT = VA.getLocVT(); 6057 SDValue Val; 6058 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 6059 Register VReg = RegInfo.createVirtualRegister(RC); 6060 RegInfo.addLiveIn(VA.getLocReg(), VReg); 6061 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 6062 6063 if (VA.getLocInfo() == CCValAssign::Indirect) 6064 return Val; 6065 6066 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget()); 6067 } 6068 6069 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 6070 const CCValAssign &VA, const SDLoc &DL, 6071 const RISCVSubtarget &Subtarget) { 6072 EVT LocVT = VA.getLocVT(); 6073 6074 switch (VA.getLocInfo()) { 6075 default: 6076 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 6077 case CCValAssign::Full: 6078 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector()) 6079 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget); 6080 break; 6081 case CCValAssign::BCvt: 6082 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 6083 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 6084 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 6085 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 6086 else 6087 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 6088 break; 6089 } 6090 return Val; 6091 } 6092 6093 // The caller is responsible for loading the full value if the argument is 6094 // passed with CCValAssign::Indirect. 6095 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 6096 const CCValAssign &VA, const SDLoc &DL) { 6097 MachineFunction &MF = DAG.getMachineFunction(); 6098 MachineFrameInfo &MFI = MF.getFrameInfo(); 6099 EVT LocVT = VA.getLocVT(); 6100 EVT ValVT = VA.getValVT(); 6101 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 6102 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 6103 VA.getLocMemOffset(), /*Immutable=*/true); 6104 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 6105 SDValue Val; 6106 6107 ISD::LoadExtType ExtType; 6108 switch (VA.getLocInfo()) { 6109 default: 6110 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 6111 case CCValAssign::Full: 6112 case CCValAssign::Indirect: 6113 case CCValAssign::BCvt: 6114 ExtType = ISD::NON_EXTLOAD; 6115 break; 6116 } 6117 Val = DAG.getExtLoad( 6118 ExtType, DL, LocVT, Chain, FIN, 6119 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 6120 return Val; 6121 } 6122 6123 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 6124 const CCValAssign &VA, const SDLoc &DL) { 6125 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 6126 "Unexpected VA"); 6127 MachineFunction &MF = DAG.getMachineFunction(); 6128 MachineFrameInfo &MFI = MF.getFrameInfo(); 6129 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 6130 6131 if (VA.isMemLoc()) { 6132 // f64 is passed on the stack. 6133 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 6134 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 6135 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 6136 MachinePointerInfo::getFixedStack(MF, FI)); 6137 } 6138 6139 assert(VA.isRegLoc() && "Expected register VA assignment"); 6140 6141 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 6142 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 6143 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 6144 SDValue Hi; 6145 if (VA.getLocReg() == RISCV::X17) { 6146 // Second half of f64 is passed on the stack. 6147 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 6148 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 6149 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 6150 MachinePointerInfo::getFixedStack(MF, FI)); 6151 } else { 6152 // Second half of f64 is passed in another GPR. 6153 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 6154 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 6155 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 6156 } 6157 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 6158 } 6159 6160 // FastCC has less than 1% performance improvement for some particular 6161 // benchmark. But theoretically, it may has benenfit for some cases. 6162 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, 6163 CCValAssign::LocInfo LocInfo, 6164 ISD::ArgFlagsTy ArgFlags, CCState &State) { 6165 6166 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 6167 // X5 and X6 might be used for save-restore libcall. 6168 static const MCPhysReg GPRList[] = { 6169 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 6170 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 6171 RISCV::X29, RISCV::X30, RISCV::X31}; 6172 if (unsigned Reg = State.AllocateReg(GPRList)) { 6173 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 6174 return false; 6175 } 6176 } 6177 6178 if (LocVT == MVT::f16) { 6179 static const MCPhysReg FPR16List[] = { 6180 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 6181 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 6182 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 6183 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 6184 if (unsigned Reg = State.AllocateReg(FPR16List)) { 6185 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 6186 return false; 6187 } 6188 } 6189 6190 if (LocVT == MVT::f32) { 6191 static const MCPhysReg FPR32List[] = { 6192 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 6193 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 6194 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 6195 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 6196 if (unsigned Reg = State.AllocateReg(FPR32List)) { 6197 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 6198 return false; 6199 } 6200 } 6201 6202 if (LocVT == MVT::f64) { 6203 static const MCPhysReg FPR64List[] = { 6204 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 6205 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 6206 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 6207 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 6208 if (unsigned Reg = State.AllocateReg(FPR64List)) { 6209 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 6210 return false; 6211 } 6212 } 6213 6214 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 6215 unsigned Offset4 = State.AllocateStack(4, Align(4)); 6216 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 6217 return false; 6218 } 6219 6220 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 6221 unsigned Offset5 = State.AllocateStack(8, Align(8)); 6222 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 6223 return false; 6224 } 6225 6226 return true; // CC didn't match. 6227 } 6228 6229 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 6230 CCValAssign::LocInfo LocInfo, 6231 ISD::ArgFlagsTy ArgFlags, CCState &State) { 6232 6233 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 6234 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 6235 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 6236 static const MCPhysReg GPRList[] = { 6237 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 6238 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 6239 if (unsigned Reg = State.AllocateReg(GPRList)) { 6240 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 6241 return false; 6242 } 6243 } 6244 6245 if (LocVT == MVT::f32) { 6246 // Pass in STG registers: F1, ..., F6 6247 // fs0 ... fs5 6248 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 6249 RISCV::F18_F, RISCV::F19_F, 6250 RISCV::F20_F, RISCV::F21_F}; 6251 if (unsigned Reg = State.AllocateReg(FPR32List)) { 6252 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 6253 return false; 6254 } 6255 } 6256 6257 if (LocVT == MVT::f64) { 6258 // Pass in STG registers: D1, ..., D6 6259 // fs6 ... fs11 6260 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 6261 RISCV::F24_D, RISCV::F25_D, 6262 RISCV::F26_D, RISCV::F27_D}; 6263 if (unsigned Reg = State.AllocateReg(FPR64List)) { 6264 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 6265 return false; 6266 } 6267 } 6268 6269 report_fatal_error("No registers left in GHC calling convention"); 6270 return true; 6271 } 6272 6273 // Transform physical registers into virtual registers. 6274 SDValue RISCVTargetLowering::LowerFormalArguments( 6275 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 6276 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 6277 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 6278 6279 MachineFunction &MF = DAG.getMachineFunction(); 6280 6281 switch (CallConv) { 6282 default: 6283 report_fatal_error("Unsupported calling convention"); 6284 case CallingConv::C: 6285 case CallingConv::Fast: 6286 break; 6287 case CallingConv::GHC: 6288 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || 6289 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) 6290 report_fatal_error( 6291 "GHC calling convention requires the F and D instruction set extensions"); 6292 } 6293 6294 const Function &Func = MF.getFunction(); 6295 if (Func.hasFnAttribute("interrupt")) { 6296 if (!Func.arg_empty()) 6297 report_fatal_error( 6298 "Functions with the interrupt attribute cannot have arguments!"); 6299 6300 StringRef Kind = 6301 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 6302 6303 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 6304 report_fatal_error( 6305 "Function interrupt attribute argument not supported!"); 6306 } 6307 6308 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6309 MVT XLenVT = Subtarget.getXLenVT(); 6310 unsigned XLenInBytes = Subtarget.getXLen() / 8; 6311 // Used with vargs to acumulate store chains. 6312 std::vector<SDValue> OutChains; 6313 6314 // Assign locations to all of the incoming arguments. 6315 SmallVector<CCValAssign, 16> ArgLocs; 6316 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 6317 6318 if (CallConv == CallingConv::Fast) 6319 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); 6320 else if (CallConv == CallingConv::GHC) 6321 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); 6322 else 6323 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 6324 6325 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 6326 CCValAssign &VA = ArgLocs[i]; 6327 SDValue ArgValue; 6328 // Passing f64 on RV32D with a soft float ABI must be handled as a special 6329 // case. 6330 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 6331 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 6332 else if (VA.isRegLoc()) 6333 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 6334 else 6335 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 6336 6337 if (VA.getLocInfo() == CCValAssign::Indirect) { 6338 // If the original argument was split and passed by reference (e.g. i128 6339 // on RV32), we need to load all parts of it here (using the same 6340 // address). Vectors may be partly split to registers and partly to the 6341 // stack, in which case the base address is partly offset and subsequent 6342 // stores are relative to that. 6343 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 6344 MachinePointerInfo())); 6345 unsigned ArgIndex = Ins[i].OrigArgIndex; 6346 unsigned ArgPartOffset = Ins[i].PartOffset; 6347 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 6348 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 6349 CCValAssign &PartVA = ArgLocs[i + 1]; 6350 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; 6351 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 6352 DAG.getIntPtrConstant(PartOffset, DL)); 6353 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 6354 MachinePointerInfo())); 6355 ++i; 6356 } 6357 continue; 6358 } 6359 InVals.push_back(ArgValue); 6360 } 6361 6362 if (IsVarArg) { 6363 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 6364 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 6365 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 6366 MachineFrameInfo &MFI = MF.getFrameInfo(); 6367 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 6368 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 6369 6370 // Offset of the first variable argument from stack pointer, and size of 6371 // the vararg save area. For now, the varargs save area is either zero or 6372 // large enough to hold a0-a7. 6373 int VaArgOffset, VarArgsSaveSize; 6374 6375 // If all registers are allocated, then all varargs must be passed on the 6376 // stack and we don't need to save any argregs. 6377 if (ArgRegs.size() == Idx) { 6378 VaArgOffset = CCInfo.getNextStackOffset(); 6379 VarArgsSaveSize = 0; 6380 } else { 6381 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 6382 VaArgOffset = -VarArgsSaveSize; 6383 } 6384 6385 // Record the frame index of the first variable argument 6386 // which is a value necessary to VASTART. 6387 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 6388 RVFI->setVarArgsFrameIndex(FI); 6389 6390 // If saving an odd number of registers then create an extra stack slot to 6391 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 6392 // offsets to even-numbered registered remain 2*XLEN-aligned. 6393 if (Idx % 2) { 6394 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 6395 VarArgsSaveSize += XLenInBytes; 6396 } 6397 6398 // Copy the integer registers that may have been used for passing varargs 6399 // to the vararg save area. 6400 for (unsigned I = Idx; I < ArgRegs.size(); 6401 ++I, VaArgOffset += XLenInBytes) { 6402 const Register Reg = RegInfo.createVirtualRegister(RC); 6403 RegInfo.addLiveIn(ArgRegs[I], Reg); 6404 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 6405 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 6406 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 6407 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 6408 MachinePointerInfo::getFixedStack(MF, FI)); 6409 cast<StoreSDNode>(Store.getNode()) 6410 ->getMemOperand() 6411 ->setValue((Value *)nullptr); 6412 OutChains.push_back(Store); 6413 } 6414 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 6415 } 6416 6417 // All stores are grouped in one node to allow the matching between 6418 // the size of Ins and InVals. This only happens for vararg functions. 6419 if (!OutChains.empty()) { 6420 OutChains.push_back(Chain); 6421 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 6422 } 6423 6424 return Chain; 6425 } 6426 6427 /// isEligibleForTailCallOptimization - Check whether the call is eligible 6428 /// for tail call optimization. 6429 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 6430 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 6431 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 6432 const SmallVector<CCValAssign, 16> &ArgLocs) const { 6433 6434 auto &Callee = CLI.Callee; 6435 auto CalleeCC = CLI.CallConv; 6436 auto &Outs = CLI.Outs; 6437 auto &Caller = MF.getFunction(); 6438 auto CallerCC = Caller.getCallingConv(); 6439 6440 // Exception-handling functions need a special set of instructions to 6441 // indicate a return to the hardware. Tail-calling another function would 6442 // probably break this. 6443 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 6444 // should be expanded as new function attributes are introduced. 6445 if (Caller.hasFnAttribute("interrupt")) 6446 return false; 6447 6448 // Do not tail call opt if the stack is used to pass parameters. 6449 if (CCInfo.getNextStackOffset() != 0) 6450 return false; 6451 6452 // Do not tail call opt if any parameters need to be passed indirectly. 6453 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 6454 // passed indirectly. So the address of the value will be passed in a 6455 // register, or if not available, then the address is put on the stack. In 6456 // order to pass indirectly, space on the stack often needs to be allocated 6457 // in order to store the value. In this case the CCInfo.getNextStackOffset() 6458 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 6459 // are passed CCValAssign::Indirect. 6460 for (auto &VA : ArgLocs) 6461 if (VA.getLocInfo() == CCValAssign::Indirect) 6462 return false; 6463 6464 // Do not tail call opt if either caller or callee uses struct return 6465 // semantics. 6466 auto IsCallerStructRet = Caller.hasStructRetAttr(); 6467 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 6468 if (IsCallerStructRet || IsCalleeStructRet) 6469 return false; 6470 6471 // Externally-defined functions with weak linkage should not be 6472 // tail-called. The behaviour of branch instructions in this situation (as 6473 // used for tail calls) is implementation-defined, so we cannot rely on the 6474 // linker replacing the tail call with a return. 6475 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 6476 const GlobalValue *GV = G->getGlobal(); 6477 if (GV->hasExternalWeakLinkage()) 6478 return false; 6479 } 6480 6481 // The callee has to preserve all registers the caller needs to preserve. 6482 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 6483 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 6484 if (CalleeCC != CallerCC) { 6485 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 6486 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 6487 return false; 6488 } 6489 6490 // Byval parameters hand the function a pointer directly into the stack area 6491 // we want to reuse during a tail call. Working around this *is* possible 6492 // but less efficient and uglier in LowerCall. 6493 for (auto &Arg : Outs) 6494 if (Arg.Flags.isByVal()) 6495 return false; 6496 6497 return true; 6498 } 6499 6500 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 6501 // and output parameter nodes. 6502 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 6503 SmallVectorImpl<SDValue> &InVals) const { 6504 SelectionDAG &DAG = CLI.DAG; 6505 SDLoc &DL = CLI.DL; 6506 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 6507 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 6508 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 6509 SDValue Chain = CLI.Chain; 6510 SDValue Callee = CLI.Callee; 6511 bool &IsTailCall = CLI.IsTailCall; 6512 CallingConv::ID CallConv = CLI.CallConv; 6513 bool IsVarArg = CLI.IsVarArg; 6514 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6515 MVT XLenVT = Subtarget.getXLenVT(); 6516 6517 MachineFunction &MF = DAG.getMachineFunction(); 6518 6519 // Analyze the operands of the call, assigning locations to each operand. 6520 SmallVector<CCValAssign, 16> ArgLocs; 6521 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 6522 6523 if (CallConv == CallingConv::Fast) 6524 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); 6525 else if (CallConv == CallingConv::GHC) 6526 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); 6527 else 6528 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 6529 6530 // Check if it's really possible to do a tail call. 6531 if (IsTailCall) 6532 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 6533 6534 if (IsTailCall) 6535 ++NumTailCalls; 6536 else if (CLI.CB && CLI.CB->isMustTailCall()) 6537 report_fatal_error("failed to perform tail call elimination on a call " 6538 "site marked musttail"); 6539 6540 // Get a count of how many bytes are to be pushed on the stack. 6541 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 6542 6543 // Create local copies for byval args 6544 SmallVector<SDValue, 8> ByValArgs; 6545 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 6546 ISD::ArgFlagsTy Flags = Outs[i].Flags; 6547 if (!Flags.isByVal()) 6548 continue; 6549 6550 SDValue Arg = OutVals[i]; 6551 unsigned Size = Flags.getByValSize(); 6552 Align Alignment = Flags.getNonZeroByValAlign(); 6553 6554 int FI = 6555 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 6556 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 6557 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 6558 6559 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 6560 /*IsVolatile=*/false, 6561 /*AlwaysInline=*/false, IsTailCall, 6562 MachinePointerInfo(), MachinePointerInfo()); 6563 ByValArgs.push_back(FIPtr); 6564 } 6565 6566 if (!IsTailCall) 6567 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 6568 6569 // Copy argument values to their designated locations. 6570 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 6571 SmallVector<SDValue, 8> MemOpChains; 6572 SDValue StackPtr; 6573 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 6574 CCValAssign &VA = ArgLocs[i]; 6575 SDValue ArgValue = OutVals[i]; 6576 ISD::ArgFlagsTy Flags = Outs[i].Flags; 6577 6578 // Handle passing f64 on RV32D with a soft float ABI as a special case. 6579 bool IsF64OnRV32DSoftABI = 6580 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 6581 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 6582 SDValue SplitF64 = DAG.getNode( 6583 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 6584 SDValue Lo = SplitF64.getValue(0); 6585 SDValue Hi = SplitF64.getValue(1); 6586 6587 Register RegLo = VA.getLocReg(); 6588 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 6589 6590 if (RegLo == RISCV::X17) { 6591 // Second half of f64 is passed on the stack. 6592 // Work out the address of the stack slot. 6593 if (!StackPtr.getNode()) 6594 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 6595 // Emit the store. 6596 MemOpChains.push_back( 6597 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 6598 } else { 6599 // Second half of f64 is passed in another GPR. 6600 assert(RegLo < RISCV::X31 && "Invalid register pair"); 6601 Register RegHigh = RegLo + 1; 6602 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 6603 } 6604 continue; 6605 } 6606 6607 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 6608 // as any other MemLoc. 6609 6610 // Promote the value if needed. 6611 // For now, only handle fully promoted and indirect arguments. 6612 if (VA.getLocInfo() == CCValAssign::Indirect) { 6613 // Store the argument in a stack slot and pass its address. 6614 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 6615 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 6616 MemOpChains.push_back( 6617 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 6618 MachinePointerInfo::getFixedStack(MF, FI))); 6619 // If the original argument was split (e.g. i128), we need 6620 // to store the required parts of it here (and pass just one address). 6621 // Vectors may be partly split to registers and partly to the stack, in 6622 // which case the base address is partly offset and subsequent stores are 6623 // relative to that. 6624 unsigned ArgIndex = Outs[i].OrigArgIndex; 6625 unsigned ArgPartOffset = Outs[i].PartOffset; 6626 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 6627 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 6628 SDValue PartValue = OutVals[i + 1]; 6629 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; 6630 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 6631 DAG.getIntPtrConstant(PartOffset, DL)); 6632 MemOpChains.push_back( 6633 DAG.getStore(Chain, DL, PartValue, Address, 6634 MachinePointerInfo::getFixedStack(MF, FI))); 6635 ++i; 6636 } 6637 ArgValue = SpillSlot; 6638 } else { 6639 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget); 6640 } 6641 6642 // Use local copy if it is a byval arg. 6643 if (Flags.isByVal()) 6644 ArgValue = ByValArgs[j++]; 6645 6646 if (VA.isRegLoc()) { 6647 // Queue up the argument copies and emit them at the end. 6648 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 6649 } else { 6650 assert(VA.isMemLoc() && "Argument not register or memory"); 6651 assert(!IsTailCall && "Tail call not allowed if stack is used " 6652 "for passing parameters"); 6653 6654 // Work out the address of the stack slot. 6655 if (!StackPtr.getNode()) 6656 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 6657 SDValue Address = 6658 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 6659 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 6660 6661 // Emit the store. 6662 MemOpChains.push_back( 6663 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 6664 } 6665 } 6666 6667 // Join the stores, which are independent of one another. 6668 if (!MemOpChains.empty()) 6669 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 6670 6671 SDValue Glue; 6672 6673 // Build a sequence of copy-to-reg nodes, chained and glued together. 6674 for (auto &Reg : RegsToPass) { 6675 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 6676 Glue = Chain.getValue(1); 6677 } 6678 6679 // Validate that none of the argument registers have been marked as 6680 // reserved, if so report an error. Do the same for the return address if this 6681 // is not a tailcall. 6682 validateCCReservedRegs(RegsToPass, MF); 6683 if (!IsTailCall && 6684 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 6685 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 6686 MF.getFunction(), 6687 "Return address register required, but has been reserved."}); 6688 6689 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 6690 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 6691 // split it and then direct call can be matched by PseudoCALL. 6692 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 6693 const GlobalValue *GV = S->getGlobal(); 6694 6695 unsigned OpFlags = RISCVII::MO_CALL; 6696 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 6697 OpFlags = RISCVII::MO_PLT; 6698 6699 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 6700 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 6701 unsigned OpFlags = RISCVII::MO_CALL; 6702 6703 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 6704 nullptr)) 6705 OpFlags = RISCVII::MO_PLT; 6706 6707 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 6708 } 6709 6710 // The first call operand is the chain and the second is the target address. 6711 SmallVector<SDValue, 8> Ops; 6712 Ops.push_back(Chain); 6713 Ops.push_back(Callee); 6714 6715 // Add argument registers to the end of the list so that they are 6716 // known live into the call. 6717 for (auto &Reg : RegsToPass) 6718 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 6719 6720 if (!IsTailCall) { 6721 // Add a register mask operand representing the call-preserved registers. 6722 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 6723 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 6724 assert(Mask && "Missing call preserved mask for calling convention"); 6725 Ops.push_back(DAG.getRegisterMask(Mask)); 6726 } 6727 6728 // Glue the call to the argument copies, if any. 6729 if (Glue.getNode()) 6730 Ops.push_back(Glue); 6731 6732 // Emit the call. 6733 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 6734 6735 if (IsTailCall) { 6736 MF.getFrameInfo().setHasTailCall(); 6737 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 6738 } 6739 6740 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 6741 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 6742 Glue = Chain.getValue(1); 6743 6744 // Mark the end of the call, which is glued to the call itself. 6745 Chain = DAG.getCALLSEQ_END(Chain, 6746 DAG.getConstant(NumBytes, DL, PtrVT, true), 6747 DAG.getConstant(0, DL, PtrVT, true), 6748 Glue, DL); 6749 Glue = Chain.getValue(1); 6750 6751 // Assign locations to each value returned by this call. 6752 SmallVector<CCValAssign, 16> RVLocs; 6753 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 6754 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 6755 6756 // Copy all of the result registers out of their specified physreg. 6757 for (auto &VA : RVLocs) { 6758 // Copy the value out 6759 SDValue RetValue = 6760 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 6761 // Glue the RetValue to the end of the call sequence 6762 Chain = RetValue.getValue(1); 6763 Glue = RetValue.getValue(2); 6764 6765 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 6766 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 6767 SDValue RetValue2 = 6768 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 6769 Chain = RetValue2.getValue(1); 6770 Glue = RetValue2.getValue(2); 6771 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 6772 RetValue2); 6773 } 6774 6775 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget); 6776 6777 InVals.push_back(RetValue); 6778 } 6779 6780 return Chain; 6781 } 6782 6783 bool RISCVTargetLowering::CanLowerReturn( 6784 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 6785 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 6786 SmallVector<CCValAssign, 16> RVLocs; 6787 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 6788 6789 Optional<unsigned> FirstMaskArgument; 6790 if (Subtarget.hasStdExtV()) 6791 FirstMaskArgument = preAssignMask(Outs); 6792 6793 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 6794 MVT VT = Outs[i].VT; 6795 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 6796 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 6797 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 6798 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 6799 *this, FirstMaskArgument)) 6800 return false; 6801 } 6802 return true; 6803 } 6804 6805 SDValue 6806 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 6807 bool IsVarArg, 6808 const SmallVectorImpl<ISD::OutputArg> &Outs, 6809 const SmallVectorImpl<SDValue> &OutVals, 6810 const SDLoc &DL, SelectionDAG &DAG) const { 6811 const MachineFunction &MF = DAG.getMachineFunction(); 6812 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 6813 6814 // Stores the assignment of the return value to a location. 6815 SmallVector<CCValAssign, 16> RVLocs; 6816 6817 // Info about the registers and stack slot. 6818 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 6819 *DAG.getContext()); 6820 6821 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 6822 nullptr); 6823 6824 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 6825 report_fatal_error("GHC functions return void only"); 6826 6827 SDValue Glue; 6828 SmallVector<SDValue, 4> RetOps(1, Chain); 6829 6830 // Copy the result values into the output registers. 6831 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 6832 SDValue Val = OutVals[i]; 6833 CCValAssign &VA = RVLocs[i]; 6834 assert(VA.isRegLoc() && "Can only return in registers!"); 6835 6836 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 6837 // Handle returning f64 on RV32D with a soft float ABI. 6838 assert(VA.isRegLoc() && "Expected return via registers"); 6839 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 6840 DAG.getVTList(MVT::i32, MVT::i32), Val); 6841 SDValue Lo = SplitF64.getValue(0); 6842 SDValue Hi = SplitF64.getValue(1); 6843 Register RegLo = VA.getLocReg(); 6844 assert(RegLo < RISCV::X31 && "Invalid register pair"); 6845 Register RegHi = RegLo + 1; 6846 6847 if (STI.isRegisterReservedByUser(RegLo) || 6848 STI.isRegisterReservedByUser(RegHi)) 6849 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 6850 MF.getFunction(), 6851 "Return value register required, but has been reserved."}); 6852 6853 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 6854 Glue = Chain.getValue(1); 6855 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 6856 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 6857 Glue = Chain.getValue(1); 6858 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 6859 } else { 6860 // Handle a 'normal' return. 6861 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget); 6862 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 6863 6864 if (STI.isRegisterReservedByUser(VA.getLocReg())) 6865 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 6866 MF.getFunction(), 6867 "Return value register required, but has been reserved."}); 6868 6869 // Guarantee that all emitted copies are stuck together. 6870 Glue = Chain.getValue(1); 6871 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 6872 } 6873 } 6874 6875 RetOps[0] = Chain; // Update chain. 6876 6877 // Add the glue node if we have it. 6878 if (Glue.getNode()) { 6879 RetOps.push_back(Glue); 6880 } 6881 6882 // Interrupt service routines use different return instructions. 6883 const Function &Func = DAG.getMachineFunction().getFunction(); 6884 if (Func.hasFnAttribute("interrupt")) { 6885 if (!Func.getReturnType()->isVoidTy()) 6886 report_fatal_error( 6887 "Functions with the interrupt attribute must have void return type!"); 6888 6889 MachineFunction &MF = DAG.getMachineFunction(); 6890 StringRef Kind = 6891 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 6892 6893 unsigned RetOpc; 6894 if (Kind == "user") 6895 RetOpc = RISCVISD::URET_FLAG; 6896 else if (Kind == "supervisor") 6897 RetOpc = RISCVISD::SRET_FLAG; 6898 else 6899 RetOpc = RISCVISD::MRET_FLAG; 6900 6901 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 6902 } 6903 6904 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 6905 } 6906 6907 void RISCVTargetLowering::validateCCReservedRegs( 6908 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 6909 MachineFunction &MF) const { 6910 const Function &F = MF.getFunction(); 6911 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 6912 6913 if (llvm::any_of(Regs, [&STI](auto Reg) { 6914 return STI.isRegisterReservedByUser(Reg.first); 6915 })) 6916 F.getContext().diagnose(DiagnosticInfoUnsupported{ 6917 F, "Argument register required, but has been reserved."}); 6918 } 6919 6920 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 6921 return CI->isTailCall(); 6922 } 6923 6924 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 6925 #define NODE_NAME_CASE(NODE) \ 6926 case RISCVISD::NODE: \ 6927 return "RISCVISD::" #NODE; 6928 // clang-format off 6929 switch ((RISCVISD::NodeType)Opcode) { 6930 case RISCVISD::FIRST_NUMBER: 6931 break; 6932 NODE_NAME_CASE(RET_FLAG) 6933 NODE_NAME_CASE(URET_FLAG) 6934 NODE_NAME_CASE(SRET_FLAG) 6935 NODE_NAME_CASE(MRET_FLAG) 6936 NODE_NAME_CASE(CALL) 6937 NODE_NAME_CASE(SELECT_CC) 6938 NODE_NAME_CASE(BR_CC) 6939 NODE_NAME_CASE(BuildPairF64) 6940 NODE_NAME_CASE(SplitF64) 6941 NODE_NAME_CASE(TAIL) 6942 NODE_NAME_CASE(MULHSU) 6943 NODE_NAME_CASE(SLLW) 6944 NODE_NAME_CASE(SRAW) 6945 NODE_NAME_CASE(SRLW) 6946 NODE_NAME_CASE(DIVW) 6947 NODE_NAME_CASE(DIVUW) 6948 NODE_NAME_CASE(REMUW) 6949 NODE_NAME_CASE(ROLW) 6950 NODE_NAME_CASE(RORW) 6951 NODE_NAME_CASE(CLZW) 6952 NODE_NAME_CASE(CTZW) 6953 NODE_NAME_CASE(FSLW) 6954 NODE_NAME_CASE(FSRW) 6955 NODE_NAME_CASE(FSL) 6956 NODE_NAME_CASE(FSR) 6957 NODE_NAME_CASE(FMV_H_X) 6958 NODE_NAME_CASE(FMV_X_ANYEXTH) 6959 NODE_NAME_CASE(FMV_W_X_RV64) 6960 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 6961 NODE_NAME_CASE(READ_CYCLE_WIDE) 6962 NODE_NAME_CASE(GREVI) 6963 NODE_NAME_CASE(GREVIW) 6964 NODE_NAME_CASE(GORCI) 6965 NODE_NAME_CASE(GORCIW) 6966 NODE_NAME_CASE(SHFLI) 6967 NODE_NAME_CASE(VMV_V_X_VL) 6968 NODE_NAME_CASE(VFMV_V_F_VL) 6969 NODE_NAME_CASE(VMV_X_S) 6970 NODE_NAME_CASE(VMV_S_X_VL) 6971 NODE_NAME_CASE(VFMV_S_F_VL) 6972 NODE_NAME_CASE(SPLAT_VECTOR_I64) 6973 NODE_NAME_CASE(READ_VLENB) 6974 NODE_NAME_CASE(TRUNCATE_VECTOR_VL) 6975 NODE_NAME_CASE(VLEFF) 6976 NODE_NAME_CASE(VLEFF_MASK) 6977 NODE_NAME_CASE(VSLIDEUP_VL) 6978 NODE_NAME_CASE(VSLIDE1UP_VL) 6979 NODE_NAME_CASE(VSLIDEDOWN_VL) 6980 NODE_NAME_CASE(VID_VL) 6981 NODE_NAME_CASE(VFNCVT_ROD_VL) 6982 NODE_NAME_CASE(VECREDUCE_ADD_VL) 6983 NODE_NAME_CASE(VECREDUCE_UMAX_VL) 6984 NODE_NAME_CASE(VECREDUCE_SMAX_VL) 6985 NODE_NAME_CASE(VECREDUCE_UMIN_VL) 6986 NODE_NAME_CASE(VECREDUCE_SMIN_VL) 6987 NODE_NAME_CASE(VECREDUCE_AND_VL) 6988 NODE_NAME_CASE(VECREDUCE_OR_VL) 6989 NODE_NAME_CASE(VECREDUCE_XOR_VL) 6990 NODE_NAME_CASE(VECREDUCE_FADD_VL) 6991 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL) 6992 NODE_NAME_CASE(ADD_VL) 6993 NODE_NAME_CASE(AND_VL) 6994 NODE_NAME_CASE(MUL_VL) 6995 NODE_NAME_CASE(OR_VL) 6996 NODE_NAME_CASE(SDIV_VL) 6997 NODE_NAME_CASE(SHL_VL) 6998 NODE_NAME_CASE(SREM_VL) 6999 NODE_NAME_CASE(SRA_VL) 7000 NODE_NAME_CASE(SRL_VL) 7001 NODE_NAME_CASE(SUB_VL) 7002 NODE_NAME_CASE(UDIV_VL) 7003 NODE_NAME_CASE(UREM_VL) 7004 NODE_NAME_CASE(XOR_VL) 7005 NODE_NAME_CASE(FADD_VL) 7006 NODE_NAME_CASE(FSUB_VL) 7007 NODE_NAME_CASE(FMUL_VL) 7008 NODE_NAME_CASE(FDIV_VL) 7009 NODE_NAME_CASE(FNEG_VL) 7010 NODE_NAME_CASE(FABS_VL) 7011 NODE_NAME_CASE(FSQRT_VL) 7012 NODE_NAME_CASE(FMA_VL) 7013 NODE_NAME_CASE(FCOPYSIGN_VL) 7014 NODE_NAME_CASE(SMIN_VL) 7015 NODE_NAME_CASE(SMAX_VL) 7016 NODE_NAME_CASE(UMIN_VL) 7017 NODE_NAME_CASE(UMAX_VL) 7018 NODE_NAME_CASE(MULHS_VL) 7019 NODE_NAME_CASE(MULHU_VL) 7020 NODE_NAME_CASE(FP_TO_SINT_VL) 7021 NODE_NAME_CASE(FP_TO_UINT_VL) 7022 NODE_NAME_CASE(SINT_TO_FP_VL) 7023 NODE_NAME_CASE(UINT_TO_FP_VL) 7024 NODE_NAME_CASE(FP_EXTEND_VL) 7025 NODE_NAME_CASE(FP_ROUND_VL) 7026 NODE_NAME_CASE(SETCC_VL) 7027 NODE_NAME_CASE(VSELECT_VL) 7028 NODE_NAME_CASE(VMAND_VL) 7029 NODE_NAME_CASE(VMOR_VL) 7030 NODE_NAME_CASE(VMXOR_VL) 7031 NODE_NAME_CASE(VMCLR_VL) 7032 NODE_NAME_CASE(VMSET_VL) 7033 NODE_NAME_CASE(VRGATHER_VX_VL) 7034 NODE_NAME_CASE(VRGATHER_VV_VL) 7035 NODE_NAME_CASE(VRGATHEREI16_VV_VL) 7036 NODE_NAME_CASE(VSEXT_VL) 7037 NODE_NAME_CASE(VZEXT_VL) 7038 NODE_NAME_CASE(VLE_VL) 7039 NODE_NAME_CASE(VSE_VL) 7040 } 7041 // clang-format on 7042 return nullptr; 7043 #undef NODE_NAME_CASE 7044 } 7045 7046 /// getConstraintType - Given a constraint letter, return the type of 7047 /// constraint it is for this target. 7048 RISCVTargetLowering::ConstraintType 7049 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 7050 if (Constraint.size() == 1) { 7051 switch (Constraint[0]) { 7052 default: 7053 break; 7054 case 'f': 7055 case 'v': 7056 return C_RegisterClass; 7057 case 'I': 7058 case 'J': 7059 case 'K': 7060 return C_Immediate; 7061 case 'A': 7062 return C_Memory; 7063 } 7064 } 7065 return TargetLowering::getConstraintType(Constraint); 7066 } 7067 7068 std::pair<unsigned, const TargetRegisterClass *> 7069 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 7070 StringRef Constraint, 7071 MVT VT) const { 7072 // First, see if this is a constraint that directly corresponds to a 7073 // RISCV register class. 7074 if (Constraint.size() == 1) { 7075 switch (Constraint[0]) { 7076 case 'r': 7077 return std::make_pair(0U, &RISCV::GPRRegClass); 7078 case 'f': 7079 if (Subtarget.hasStdExtZfh() && VT == MVT::f16) 7080 return std::make_pair(0U, &RISCV::FPR16RegClass); 7081 if (Subtarget.hasStdExtF() && VT == MVT::f32) 7082 return std::make_pair(0U, &RISCV::FPR32RegClass); 7083 if (Subtarget.hasStdExtD() && VT == MVT::f64) 7084 return std::make_pair(0U, &RISCV::FPR64RegClass); 7085 break; 7086 case 'v': 7087 for (const auto *RC : 7088 {&RISCV::VMRegClass, &RISCV::VRRegClass, &RISCV::VRM2RegClass, 7089 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 7090 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) 7091 return std::make_pair(0U, RC); 7092 } 7093 break; 7094 default: 7095 break; 7096 } 7097 } 7098 7099 // Clang will correctly decode the usage of register name aliases into their 7100 // official names. However, other frontends like `rustc` do not. This allows 7101 // users of these frontends to use the ABI names for registers in LLVM-style 7102 // register constraints. 7103 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 7104 .Case("{zero}", RISCV::X0) 7105 .Case("{ra}", RISCV::X1) 7106 .Case("{sp}", RISCV::X2) 7107 .Case("{gp}", RISCV::X3) 7108 .Case("{tp}", RISCV::X4) 7109 .Case("{t0}", RISCV::X5) 7110 .Case("{t1}", RISCV::X6) 7111 .Case("{t2}", RISCV::X7) 7112 .Cases("{s0}", "{fp}", RISCV::X8) 7113 .Case("{s1}", RISCV::X9) 7114 .Case("{a0}", RISCV::X10) 7115 .Case("{a1}", RISCV::X11) 7116 .Case("{a2}", RISCV::X12) 7117 .Case("{a3}", RISCV::X13) 7118 .Case("{a4}", RISCV::X14) 7119 .Case("{a5}", RISCV::X15) 7120 .Case("{a6}", RISCV::X16) 7121 .Case("{a7}", RISCV::X17) 7122 .Case("{s2}", RISCV::X18) 7123 .Case("{s3}", RISCV::X19) 7124 .Case("{s4}", RISCV::X20) 7125 .Case("{s5}", RISCV::X21) 7126 .Case("{s6}", RISCV::X22) 7127 .Case("{s7}", RISCV::X23) 7128 .Case("{s8}", RISCV::X24) 7129 .Case("{s9}", RISCV::X25) 7130 .Case("{s10}", RISCV::X26) 7131 .Case("{s11}", RISCV::X27) 7132 .Case("{t3}", RISCV::X28) 7133 .Case("{t4}", RISCV::X29) 7134 .Case("{t5}", RISCV::X30) 7135 .Case("{t6}", RISCV::X31) 7136 .Default(RISCV::NoRegister); 7137 if (XRegFromAlias != RISCV::NoRegister) 7138 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 7139 7140 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 7141 // TableGen record rather than the AsmName to choose registers for InlineAsm 7142 // constraints, plus we want to match those names to the widest floating point 7143 // register type available, manually select floating point registers here. 7144 // 7145 // The second case is the ABI name of the register, so that frontends can also 7146 // use the ABI names in register constraint lists. 7147 if (Subtarget.hasStdExtF()) { 7148 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 7149 .Cases("{f0}", "{ft0}", RISCV::F0_F) 7150 .Cases("{f1}", "{ft1}", RISCV::F1_F) 7151 .Cases("{f2}", "{ft2}", RISCV::F2_F) 7152 .Cases("{f3}", "{ft3}", RISCV::F3_F) 7153 .Cases("{f4}", "{ft4}", RISCV::F4_F) 7154 .Cases("{f5}", "{ft5}", RISCV::F5_F) 7155 .Cases("{f6}", "{ft6}", RISCV::F6_F) 7156 .Cases("{f7}", "{ft7}", RISCV::F7_F) 7157 .Cases("{f8}", "{fs0}", RISCV::F8_F) 7158 .Cases("{f9}", "{fs1}", RISCV::F9_F) 7159 .Cases("{f10}", "{fa0}", RISCV::F10_F) 7160 .Cases("{f11}", "{fa1}", RISCV::F11_F) 7161 .Cases("{f12}", "{fa2}", RISCV::F12_F) 7162 .Cases("{f13}", "{fa3}", RISCV::F13_F) 7163 .Cases("{f14}", "{fa4}", RISCV::F14_F) 7164 .Cases("{f15}", "{fa5}", RISCV::F15_F) 7165 .Cases("{f16}", "{fa6}", RISCV::F16_F) 7166 .Cases("{f17}", "{fa7}", RISCV::F17_F) 7167 .Cases("{f18}", "{fs2}", RISCV::F18_F) 7168 .Cases("{f19}", "{fs3}", RISCV::F19_F) 7169 .Cases("{f20}", "{fs4}", RISCV::F20_F) 7170 .Cases("{f21}", "{fs5}", RISCV::F21_F) 7171 .Cases("{f22}", "{fs6}", RISCV::F22_F) 7172 .Cases("{f23}", "{fs7}", RISCV::F23_F) 7173 .Cases("{f24}", "{fs8}", RISCV::F24_F) 7174 .Cases("{f25}", "{fs9}", RISCV::F25_F) 7175 .Cases("{f26}", "{fs10}", RISCV::F26_F) 7176 .Cases("{f27}", "{fs11}", RISCV::F27_F) 7177 .Cases("{f28}", "{ft8}", RISCV::F28_F) 7178 .Cases("{f29}", "{ft9}", RISCV::F29_F) 7179 .Cases("{f30}", "{ft10}", RISCV::F30_F) 7180 .Cases("{f31}", "{ft11}", RISCV::F31_F) 7181 .Default(RISCV::NoRegister); 7182 if (FReg != RISCV::NoRegister) { 7183 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 7184 if (Subtarget.hasStdExtD()) { 7185 unsigned RegNo = FReg - RISCV::F0_F; 7186 unsigned DReg = RISCV::F0_D + RegNo; 7187 return std::make_pair(DReg, &RISCV::FPR64RegClass); 7188 } 7189 return std::make_pair(FReg, &RISCV::FPR32RegClass); 7190 } 7191 } 7192 7193 if (Subtarget.hasStdExtV()) { 7194 Register VReg = StringSwitch<Register>(Constraint.lower()) 7195 .Case("{v0}", RISCV::V0) 7196 .Case("{v1}", RISCV::V1) 7197 .Case("{v2}", RISCV::V2) 7198 .Case("{v3}", RISCV::V3) 7199 .Case("{v4}", RISCV::V4) 7200 .Case("{v5}", RISCV::V5) 7201 .Case("{v6}", RISCV::V6) 7202 .Case("{v7}", RISCV::V7) 7203 .Case("{v8}", RISCV::V8) 7204 .Case("{v9}", RISCV::V9) 7205 .Case("{v10}", RISCV::V10) 7206 .Case("{v11}", RISCV::V11) 7207 .Case("{v12}", RISCV::V12) 7208 .Case("{v13}", RISCV::V13) 7209 .Case("{v14}", RISCV::V14) 7210 .Case("{v15}", RISCV::V15) 7211 .Case("{v16}", RISCV::V16) 7212 .Case("{v17}", RISCV::V17) 7213 .Case("{v18}", RISCV::V18) 7214 .Case("{v19}", RISCV::V19) 7215 .Case("{v20}", RISCV::V20) 7216 .Case("{v21}", RISCV::V21) 7217 .Case("{v22}", RISCV::V22) 7218 .Case("{v23}", RISCV::V23) 7219 .Case("{v24}", RISCV::V24) 7220 .Case("{v25}", RISCV::V25) 7221 .Case("{v26}", RISCV::V26) 7222 .Case("{v27}", RISCV::V27) 7223 .Case("{v28}", RISCV::V28) 7224 .Case("{v29}", RISCV::V29) 7225 .Case("{v30}", RISCV::V30) 7226 .Case("{v31}", RISCV::V31) 7227 .Default(RISCV::NoRegister); 7228 if (VReg != RISCV::NoRegister) { 7229 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy)) 7230 return std::make_pair(VReg, &RISCV::VMRegClass); 7231 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy)) 7232 return std::make_pair(VReg, &RISCV::VRRegClass); 7233 for (const auto *RC : 7234 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 7235 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) { 7236 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC); 7237 return std::make_pair(VReg, RC); 7238 } 7239 } 7240 } 7241 } 7242 7243 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 7244 } 7245 7246 unsigned 7247 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 7248 // Currently only support length 1 constraints. 7249 if (ConstraintCode.size() == 1) { 7250 switch (ConstraintCode[0]) { 7251 case 'A': 7252 return InlineAsm::Constraint_A; 7253 default: 7254 break; 7255 } 7256 } 7257 7258 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 7259 } 7260 7261 void RISCVTargetLowering::LowerAsmOperandForConstraint( 7262 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 7263 SelectionDAG &DAG) const { 7264 // Currently only support length 1 constraints. 7265 if (Constraint.length() == 1) { 7266 switch (Constraint[0]) { 7267 case 'I': 7268 // Validate & create a 12-bit signed immediate operand. 7269 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 7270 uint64_t CVal = C->getSExtValue(); 7271 if (isInt<12>(CVal)) 7272 Ops.push_back( 7273 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 7274 } 7275 return; 7276 case 'J': 7277 // Validate & create an integer zero operand. 7278 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 7279 if (C->getZExtValue() == 0) 7280 Ops.push_back( 7281 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 7282 return; 7283 case 'K': 7284 // Validate & create a 5-bit unsigned immediate operand. 7285 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 7286 uint64_t CVal = C->getZExtValue(); 7287 if (isUInt<5>(CVal)) 7288 Ops.push_back( 7289 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 7290 } 7291 return; 7292 default: 7293 break; 7294 } 7295 } 7296 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 7297 } 7298 7299 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 7300 Instruction *Inst, 7301 AtomicOrdering Ord) const { 7302 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 7303 return Builder.CreateFence(Ord); 7304 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 7305 return Builder.CreateFence(AtomicOrdering::Release); 7306 return nullptr; 7307 } 7308 7309 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 7310 Instruction *Inst, 7311 AtomicOrdering Ord) const { 7312 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 7313 return Builder.CreateFence(AtomicOrdering::Acquire); 7314 return nullptr; 7315 } 7316 7317 TargetLowering::AtomicExpansionKind 7318 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 7319 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 7320 // point operations can't be used in an lr/sc sequence without breaking the 7321 // forward-progress guarantee. 7322 if (AI->isFloatingPointOperation()) 7323 return AtomicExpansionKind::CmpXChg; 7324 7325 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 7326 if (Size == 8 || Size == 16) 7327 return AtomicExpansionKind::MaskedIntrinsic; 7328 return AtomicExpansionKind::None; 7329 } 7330 7331 static Intrinsic::ID 7332 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 7333 if (XLen == 32) { 7334 switch (BinOp) { 7335 default: 7336 llvm_unreachable("Unexpected AtomicRMW BinOp"); 7337 case AtomicRMWInst::Xchg: 7338 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 7339 case AtomicRMWInst::Add: 7340 return Intrinsic::riscv_masked_atomicrmw_add_i32; 7341 case AtomicRMWInst::Sub: 7342 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 7343 case AtomicRMWInst::Nand: 7344 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 7345 case AtomicRMWInst::Max: 7346 return Intrinsic::riscv_masked_atomicrmw_max_i32; 7347 case AtomicRMWInst::Min: 7348 return Intrinsic::riscv_masked_atomicrmw_min_i32; 7349 case AtomicRMWInst::UMax: 7350 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 7351 case AtomicRMWInst::UMin: 7352 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 7353 } 7354 } 7355 7356 if (XLen == 64) { 7357 switch (BinOp) { 7358 default: 7359 llvm_unreachable("Unexpected AtomicRMW BinOp"); 7360 case AtomicRMWInst::Xchg: 7361 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 7362 case AtomicRMWInst::Add: 7363 return Intrinsic::riscv_masked_atomicrmw_add_i64; 7364 case AtomicRMWInst::Sub: 7365 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 7366 case AtomicRMWInst::Nand: 7367 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 7368 case AtomicRMWInst::Max: 7369 return Intrinsic::riscv_masked_atomicrmw_max_i64; 7370 case AtomicRMWInst::Min: 7371 return Intrinsic::riscv_masked_atomicrmw_min_i64; 7372 case AtomicRMWInst::UMax: 7373 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 7374 case AtomicRMWInst::UMin: 7375 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 7376 } 7377 } 7378 7379 llvm_unreachable("Unexpected XLen\n"); 7380 } 7381 7382 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 7383 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 7384 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 7385 unsigned XLen = Subtarget.getXLen(); 7386 Value *Ordering = 7387 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 7388 Type *Tys[] = {AlignedAddr->getType()}; 7389 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 7390 AI->getModule(), 7391 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 7392 7393 if (XLen == 64) { 7394 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 7395 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 7396 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 7397 } 7398 7399 Value *Result; 7400 7401 // Must pass the shift amount needed to sign extend the loaded value prior 7402 // to performing a signed comparison for min/max. ShiftAmt is the number of 7403 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 7404 // is the number of bits to left+right shift the value in order to 7405 // sign-extend. 7406 if (AI->getOperation() == AtomicRMWInst::Min || 7407 AI->getOperation() == AtomicRMWInst::Max) { 7408 const DataLayout &DL = AI->getModule()->getDataLayout(); 7409 unsigned ValWidth = 7410 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 7411 Value *SextShamt = 7412 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 7413 Result = Builder.CreateCall(LrwOpScwLoop, 7414 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 7415 } else { 7416 Result = 7417 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 7418 } 7419 7420 if (XLen == 64) 7421 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 7422 return Result; 7423 } 7424 7425 TargetLowering::AtomicExpansionKind 7426 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 7427 AtomicCmpXchgInst *CI) const { 7428 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 7429 if (Size == 8 || Size == 16) 7430 return AtomicExpansionKind::MaskedIntrinsic; 7431 return AtomicExpansionKind::None; 7432 } 7433 7434 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 7435 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 7436 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 7437 unsigned XLen = Subtarget.getXLen(); 7438 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 7439 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 7440 if (XLen == 64) { 7441 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 7442 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 7443 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 7444 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 7445 } 7446 Type *Tys[] = {AlignedAddr->getType()}; 7447 Function *MaskedCmpXchg = 7448 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 7449 Value *Result = Builder.CreateCall( 7450 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 7451 if (XLen == 64) 7452 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 7453 return Result; 7454 } 7455 7456 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { 7457 return false; 7458 } 7459 7460 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 7461 EVT VT) const { 7462 VT = VT.getScalarType(); 7463 7464 if (!VT.isSimple()) 7465 return false; 7466 7467 switch (VT.getSimpleVT().SimpleTy) { 7468 case MVT::f16: 7469 return Subtarget.hasStdExtZfh(); 7470 case MVT::f32: 7471 return Subtarget.hasStdExtF(); 7472 case MVT::f64: 7473 return Subtarget.hasStdExtD(); 7474 default: 7475 break; 7476 } 7477 7478 return false; 7479 } 7480 7481 Register RISCVTargetLowering::getExceptionPointerRegister( 7482 const Constant *PersonalityFn) const { 7483 return RISCV::X10; 7484 } 7485 7486 Register RISCVTargetLowering::getExceptionSelectorRegister( 7487 const Constant *PersonalityFn) const { 7488 return RISCV::X11; 7489 } 7490 7491 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 7492 // Return false to suppress the unnecessary extensions if the LibCall 7493 // arguments or return value is f32 type for LP64 ABI. 7494 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 7495 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 7496 return false; 7497 7498 return true; 7499 } 7500 7501 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 7502 if (Subtarget.is64Bit() && Type == MVT::i32) 7503 return true; 7504 7505 return IsSigned; 7506 } 7507 7508 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 7509 SDValue C) const { 7510 // Check integral scalar types. 7511 if (VT.isScalarInteger()) { 7512 // Omit the optimization if the sub target has the M extension and the data 7513 // size exceeds XLen. 7514 if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) 7515 return false; 7516 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 7517 // Break the MUL to a SLLI and an ADD/SUB. 7518 const APInt &Imm = ConstNode->getAPIntValue(); 7519 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 7520 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 7521 return true; 7522 // Omit the following optimization if the sub target has the M extension 7523 // and the data size >= XLen. 7524 if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) 7525 return false; 7526 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 7527 // a pair of LUI/ADDI. 7528 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { 7529 APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); 7530 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 7531 (1 - ImmS).isPowerOf2()) 7532 return true; 7533 } 7534 } 7535 } 7536 7537 return false; 7538 } 7539 7540 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { 7541 if (!Subtarget.useRVVForFixedLengthVectors()) 7542 return false; 7543 7544 if (!VT.isFixedLengthVector()) 7545 return false; 7546 7547 // Don't use RVV for vectors we cannot scalarize if required. 7548 switch (VT.getVectorElementType().SimpleTy) { 7549 // i1 is supported but has different rules. 7550 default: 7551 return false; 7552 case MVT::i1: 7553 // Masks can only use a single register. 7554 if (VT.getVectorNumElements() > Subtarget.getMinRVVVectorSizeInBits()) 7555 return false; 7556 break; 7557 case MVT::i8: 7558 case MVT::i16: 7559 case MVT::i32: 7560 case MVT::i64: 7561 break; 7562 case MVT::f16: 7563 if (!Subtarget.hasStdExtZfh()) 7564 return false; 7565 break; 7566 case MVT::f32: 7567 if (!Subtarget.hasStdExtF()) 7568 return false; 7569 break; 7570 case MVT::f64: 7571 if (!Subtarget.hasStdExtD()) 7572 return false; 7573 break; 7574 } 7575 7576 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 7577 // Don't use RVV for types that don't fit. 7578 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) 7579 return false; 7580 7581 // TODO: Perhaps an artificial restriction, but worth having whilst getting 7582 // the base fixed length RVV support in place. 7583 if (!VT.isPow2VectorType()) 7584 return false; 7585 7586 return true; 7587 } 7588 7589 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( 7590 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 7591 bool *Fast) const { 7592 if (!VT.isScalableVector()) 7593 return false; 7594 7595 EVT ElemVT = VT.getVectorElementType(); 7596 if (Alignment >= ElemVT.getStoreSize()) { 7597 if (Fast) 7598 *Fast = true; 7599 return true; 7600 } 7601 7602 return false; 7603 } 7604 7605 bool RISCVTargetLowering::splitValueIntoRegisterParts( 7606 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 7607 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const { 7608 bool IsABIRegCopy = CC.hasValue(); 7609 EVT ValueVT = Val.getValueType(); 7610 if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { 7611 // Cast the f16 to i16, extend to i32, pad with ones to make a float nan, 7612 // and cast to f32. 7613 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val); 7614 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val); 7615 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val, 7616 DAG.getConstant(0xFFFF0000, DL, MVT::i32)); 7617 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); 7618 Parts[0] = Val; 7619 return true; 7620 } 7621 7622 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 7623 LLVMContext &Context = *DAG.getContext(); 7624 EVT ValueEltVT = ValueVT.getVectorElementType(); 7625 EVT PartEltVT = PartVT.getVectorElementType(); 7626 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize(); 7627 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize(); 7628 if (PartVTBitSize % ValueVTBitSize == 0) { 7629 // If the element types are different, bitcast to the same element type of 7630 // PartVT first. 7631 if (ValueEltVT != PartEltVT) { 7632 unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits(); 7633 assert(Count != 0 && "The number of element should not be zero."); 7634 EVT SameEltTypeVT = 7635 EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true); 7636 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val); 7637 } 7638 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), 7639 Val, DAG.getConstant(0, DL, Subtarget.getXLenVT())); 7640 Parts[0] = Val; 7641 return true; 7642 } 7643 } 7644 return false; 7645 } 7646 7647 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( 7648 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, 7649 MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const { 7650 bool IsABIRegCopy = CC.hasValue(); 7651 if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { 7652 SDValue Val = Parts[0]; 7653 7654 // Cast the f32 to i32, truncate to i16, and cast back to f16. 7655 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); 7656 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val); 7657 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val); 7658 return Val; 7659 } 7660 7661 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 7662 LLVMContext &Context = *DAG.getContext(); 7663 SDValue Val = Parts[0]; 7664 EVT ValueEltVT = ValueVT.getVectorElementType(); 7665 EVT PartEltVT = PartVT.getVectorElementType(); 7666 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize(); 7667 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize(); 7668 if (PartVTBitSize % ValueVTBitSize == 0) { 7669 EVT SameEltTypeVT = ValueVT; 7670 // If the element types are different, convert it to the same element type 7671 // of PartVT. 7672 if (ValueEltVT != PartEltVT) { 7673 unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits(); 7674 assert(Count != 0 && "The number of element should not be zero."); 7675 SameEltTypeVT = 7676 EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true); 7677 } 7678 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SameEltTypeVT, Val, 7679 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 7680 if (ValueEltVT != PartEltVT) 7681 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 7682 return Val; 7683 } 7684 } 7685 return SDValue(); 7686 } 7687 7688 #define GET_REGISTER_MATCHER 7689 #include "RISCVGenAsmMatcher.inc" 7690 7691 Register 7692 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 7693 const MachineFunction &MF) const { 7694 Register Reg = MatchRegisterAltName(RegName); 7695 if (Reg == RISCV::NoRegister) 7696 Reg = MatchRegisterName(RegName); 7697 if (Reg == RISCV::NoRegister) 7698 report_fatal_error( 7699 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 7700 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 7701 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 7702 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 7703 StringRef(RegName) + "\".")); 7704 return Reg; 7705 } 7706 7707 namespace llvm { 7708 namespace RISCVVIntrinsicsTable { 7709 7710 #define GET_RISCVVIntrinsicsTable_IMPL 7711 #include "RISCVGenSearchableTables.inc" 7712 7713 } // namespace RISCVVIntrinsicsTable 7714 7715 } // namespace llvm 7716