1 //===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Subclass of MipsTargetLowering specialized for mips32/64. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MipsSEISelLowering.h" 14 #include "MipsMachineFunction.h" 15 #include "MipsRegisterInfo.h" 16 #include "MipsSubtarget.h" 17 #include "llvm/ADT/APInt.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/Triple.h" 22 #include "llvm/CodeGen/CallingConvLower.h" 23 #include "llvm/CodeGen/ISDOpcodes.h" 24 #include "llvm/CodeGen/MachineBasicBlock.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstr.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineMemOperand.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/SelectionDAG.h" 31 #include "llvm/CodeGen/SelectionDAGNodes.h" 32 #include "llvm/CodeGen/TargetInstrInfo.h" 33 #include "llvm/CodeGen/TargetSubtargetInfo.h" 34 #include "llvm/CodeGen/ValueTypes.h" 35 #include "llvm/IR/DebugLoc.h" 36 #include "llvm/IR/Intrinsics.h" 37 #include "llvm/IR/IntrinsicsMips.h" 38 #include "llvm/Support/Casting.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/ErrorHandling.h" 42 #include "llvm/Support/MachineValueType.h" 43 #include "llvm/Support/MathExtras.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include <algorithm> 46 #include <cassert> 47 #include <cstdint> 48 #include <iterator> 49 #include <utility> 50 51 using namespace llvm; 52 53 #define DEBUG_TYPE "mips-isel" 54 55 static cl::opt<bool> 56 UseMipsTailCalls("mips-tail-calls", cl::Hidden, 57 cl::desc("MIPS: permit tail calls."), cl::init(false)); 58 59 static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), 60 cl::desc("Expand double precision loads and " 61 "stores to their single precision " 62 "counterparts")); 63 64 MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, 65 const MipsSubtarget &STI) 66 : MipsTargetLowering(TM, STI) { 67 // Set up the register classes 68 addRegisterClass(MVT::i32, &Mips::GPR32RegClass); 69 70 if (Subtarget.isGP64bit()) 71 addRegisterClass(MVT::i64, &Mips::GPR64RegClass); 72 73 if (Subtarget.hasDSP() || Subtarget.hasMSA()) { 74 // Expand all truncating stores and extending loads. 75 for (MVT VT0 : MVT::fixedlen_vector_valuetypes()) { 76 for (MVT VT1 : MVT::fixedlen_vector_valuetypes()) { 77 setTruncStoreAction(VT0, VT1, Expand); 78 setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); 79 setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); 80 setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); 81 } 82 } 83 } 84 85 if (Subtarget.hasDSP()) { 86 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; 87 88 for (const auto &VecTy : VecTys) { 89 addRegisterClass(VecTy, &Mips::DSPRRegClass); 90 91 // Expand all builtin opcodes. 92 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 93 setOperationAction(Opc, VecTy, Expand); 94 95 setOperationAction(ISD::ADD, VecTy, Legal); 96 setOperationAction(ISD::SUB, VecTy, Legal); 97 setOperationAction(ISD::LOAD, VecTy, Legal); 98 setOperationAction(ISD::STORE, VecTy, Legal); 99 setOperationAction(ISD::BITCAST, VecTy, Legal); 100 } 101 102 setTargetDAGCombine( 103 {ISD::SHL, ISD::SRA, ISD::SRL, ISD::SETCC, ISD::VSELECT}); 104 105 if (Subtarget.hasMips32r2()) { 106 setOperationAction(ISD::ADDC, MVT::i32, Legal); 107 setOperationAction(ISD::ADDE, MVT::i32, Legal); 108 } 109 } 110 111 if (Subtarget.hasDSPR2()) 112 setOperationAction(ISD::MUL, MVT::v2i16, Legal); 113 114 if (Subtarget.hasMSA()) { 115 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); 116 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); 117 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); 118 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass); 119 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass); 120 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); 121 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); 122 123 // f16 is a storage-only type, always promote it to f32. 124 addRegisterClass(MVT::f16, &Mips::MSA128HRegClass); 125 setOperationAction(ISD::SETCC, MVT::f16, Promote); 126 setOperationAction(ISD::BR_CC, MVT::f16, Promote); 127 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); 128 setOperationAction(ISD::SELECT, MVT::f16, Promote); 129 setOperationAction(ISD::FADD, MVT::f16, Promote); 130 setOperationAction(ISD::FSUB, MVT::f16, Promote); 131 setOperationAction(ISD::FMUL, MVT::f16, Promote); 132 setOperationAction(ISD::FDIV, MVT::f16, Promote); 133 setOperationAction(ISD::FREM, MVT::f16, Promote); 134 setOperationAction(ISD::FMA, MVT::f16, Promote); 135 setOperationAction(ISD::FNEG, MVT::f16, Promote); 136 setOperationAction(ISD::FABS, MVT::f16, Promote); 137 setOperationAction(ISD::FCEIL, MVT::f16, Promote); 138 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); 139 setOperationAction(ISD::FCOS, MVT::f16, Promote); 140 setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); 141 setOperationAction(ISD::FFLOOR, MVT::f16, Promote); 142 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); 143 setOperationAction(ISD::FPOW, MVT::f16, Promote); 144 setOperationAction(ISD::FPOWI, MVT::f16, Promote); 145 setOperationAction(ISD::FRINT, MVT::f16, Promote); 146 setOperationAction(ISD::FSIN, MVT::f16, Promote); 147 setOperationAction(ISD::FSINCOS, MVT::f16, Promote); 148 setOperationAction(ISD::FSQRT, MVT::f16, Promote); 149 setOperationAction(ISD::FEXP, MVT::f16, Promote); 150 setOperationAction(ISD::FEXP2, MVT::f16, Promote); 151 setOperationAction(ISD::FLOG, MVT::f16, Promote); 152 setOperationAction(ISD::FLOG2, MVT::f16, Promote); 153 setOperationAction(ISD::FLOG10, MVT::f16, Promote); 154 setOperationAction(ISD::FROUND, MVT::f16, Promote); 155 setOperationAction(ISD::FTRUNC, MVT::f16, Promote); 156 setOperationAction(ISD::FMINNUM, MVT::f16, Promote); 157 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); 158 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); 159 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); 160 161 setTargetDAGCombine({ISD::AND, ISD::OR, ISD::SRA, ISD::VSELECT, ISD::XOR}); 162 } 163 164 if (!Subtarget.useSoftFloat()) { 165 addRegisterClass(MVT::f32, &Mips::FGR32RegClass); 166 167 // When dealing with single precision only, use libcalls 168 if (!Subtarget.isSingleFloat()) { 169 if (Subtarget.isFP64bit()) 170 addRegisterClass(MVT::f64, &Mips::FGR64RegClass); 171 else 172 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); 173 } 174 } 175 176 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); 177 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); 178 setOperationAction(ISD::MULHS, MVT::i32, Custom); 179 setOperationAction(ISD::MULHU, MVT::i32, Custom); 180 181 if (Subtarget.hasCnMips()) 182 setOperationAction(ISD::MUL, MVT::i64, Legal); 183 else if (Subtarget.isGP64bit()) 184 setOperationAction(ISD::MUL, MVT::i64, Custom); 185 186 if (Subtarget.isGP64bit()) { 187 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); 188 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); 189 setOperationAction(ISD::MULHS, MVT::i64, Custom); 190 setOperationAction(ISD::MULHU, MVT::i64, Custom); 191 setOperationAction(ISD::SDIVREM, MVT::i64, Custom); 192 setOperationAction(ISD::UDIVREM, MVT::i64, Custom); 193 } 194 195 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 196 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 197 198 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 199 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 200 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 201 setOperationAction(ISD::LOAD, MVT::i32, Custom); 202 setOperationAction(ISD::STORE, MVT::i32, Custom); 203 204 setTargetDAGCombine(ISD::MUL); 205 206 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 207 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 208 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 209 210 if (Subtarget.hasMips32r2() && !Subtarget.useSoftFloat() && 211 !Subtarget.hasMips64()) { 212 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 213 } 214 215 if (NoDPLoadStore) { 216 setOperationAction(ISD::LOAD, MVT::f64, Custom); 217 setOperationAction(ISD::STORE, MVT::f64, Custom); 218 } 219 220 if (Subtarget.hasMips32r6()) { 221 // MIPS32r6 replaces the accumulator-based multiplies with a three register 222 // instruction 223 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 224 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 225 setOperationAction(ISD::MUL, MVT::i32, Legal); 226 setOperationAction(ISD::MULHS, MVT::i32, Legal); 227 setOperationAction(ISD::MULHU, MVT::i32, Legal); 228 229 // MIPS32r6 replaces the accumulator-based division/remainder with separate 230 // three register division and remainder instructions. 231 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 232 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 233 setOperationAction(ISD::SDIV, MVT::i32, Legal); 234 setOperationAction(ISD::UDIV, MVT::i32, Legal); 235 setOperationAction(ISD::SREM, MVT::i32, Legal); 236 setOperationAction(ISD::UREM, MVT::i32, Legal); 237 238 // MIPS32r6 replaces conditional moves with an equivalent that removes the 239 // need for three GPR read ports. 240 setOperationAction(ISD::SETCC, MVT::i32, Legal); 241 setOperationAction(ISD::SELECT, MVT::i32, Legal); 242 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 243 244 setOperationAction(ISD::SETCC, MVT::f32, Legal); 245 setOperationAction(ISD::SELECT, MVT::f32, Legal); 246 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 247 248 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6"); 249 setOperationAction(ISD::SETCC, MVT::f64, Legal); 250 setOperationAction(ISD::SELECT, MVT::f64, Custom); 251 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 252 253 setOperationAction(ISD::BRCOND, MVT::Other, Legal); 254 255 // Floating point > and >= are supported via < and <= 256 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 257 setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); 258 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); 259 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 260 261 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 262 setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); 263 setCondCodeAction(ISD::SETUGE, MVT::f64, Expand); 264 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 265 } 266 267 if (Subtarget.hasMips64r6()) { 268 // MIPS64r6 replaces the accumulator-based multiplies with a three register 269 // instruction 270 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 271 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 272 setOperationAction(ISD::MUL, MVT::i64, Legal); 273 setOperationAction(ISD::MULHS, MVT::i64, Legal); 274 setOperationAction(ISD::MULHU, MVT::i64, Legal); 275 276 // MIPS32r6 replaces the accumulator-based division/remainder with separate 277 // three register division and remainder instructions. 278 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 279 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 280 setOperationAction(ISD::SDIV, MVT::i64, Legal); 281 setOperationAction(ISD::UDIV, MVT::i64, Legal); 282 setOperationAction(ISD::SREM, MVT::i64, Legal); 283 setOperationAction(ISD::UREM, MVT::i64, Legal); 284 285 // MIPS64r6 replaces conditional moves with an equivalent that removes the 286 // need for three GPR read ports. 287 setOperationAction(ISD::SETCC, MVT::i64, Legal); 288 setOperationAction(ISD::SELECT, MVT::i64, Legal); 289 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 290 } 291 292 computeRegisterProperties(Subtarget.getRegisterInfo()); 293 } 294 295 const MipsTargetLowering * 296 llvm::createMipsSETargetLowering(const MipsTargetMachine &TM, 297 const MipsSubtarget &STI) { 298 return new MipsSETargetLowering(TM, STI); 299 } 300 301 const TargetRegisterClass * 302 MipsSETargetLowering::getRepRegClassFor(MVT VT) const { 303 if (VT == MVT::Untyped) 304 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass; 305 306 return TargetLowering::getRepRegClassFor(VT); 307 } 308 309 // Enable MSA support for the given integer type and Register class. 310 void MipsSETargetLowering:: 311 addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 312 addRegisterClass(Ty, RC); 313 314 // Expand all builtin opcodes. 315 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 316 setOperationAction(Opc, Ty, Expand); 317 318 setOperationAction(ISD::BITCAST, Ty, Legal); 319 setOperationAction(ISD::LOAD, Ty, Legal); 320 setOperationAction(ISD::STORE, Ty, Legal); 321 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); 322 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 323 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 324 setOperationAction(ISD::UNDEF, Ty, Legal); 325 326 setOperationAction(ISD::ADD, Ty, Legal); 327 setOperationAction(ISD::AND, Ty, Legal); 328 setOperationAction(ISD::CTLZ, Ty, Legal); 329 setOperationAction(ISD::CTPOP, Ty, Legal); 330 setOperationAction(ISD::MUL, Ty, Legal); 331 setOperationAction(ISD::OR, Ty, Legal); 332 setOperationAction(ISD::SDIV, Ty, Legal); 333 setOperationAction(ISD::SREM, Ty, Legal); 334 setOperationAction(ISD::SHL, Ty, Legal); 335 setOperationAction(ISD::SRA, Ty, Legal); 336 setOperationAction(ISD::SRL, Ty, Legal); 337 setOperationAction(ISD::SUB, Ty, Legal); 338 setOperationAction(ISD::SMAX, Ty, Legal); 339 setOperationAction(ISD::SMIN, Ty, Legal); 340 setOperationAction(ISD::UDIV, Ty, Legal); 341 setOperationAction(ISD::UREM, Ty, Legal); 342 setOperationAction(ISD::UMAX, Ty, Legal); 343 setOperationAction(ISD::UMIN, Ty, Legal); 344 setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); 345 setOperationAction(ISD::VSELECT, Ty, Legal); 346 setOperationAction(ISD::XOR, Ty, Legal); 347 348 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { 349 setOperationAction(ISD::FP_TO_SINT, Ty, Legal); 350 setOperationAction(ISD::FP_TO_UINT, Ty, Legal); 351 setOperationAction(ISD::SINT_TO_FP, Ty, Legal); 352 setOperationAction(ISD::UINT_TO_FP, Ty, Legal); 353 } 354 355 setOperationAction(ISD::SETCC, Ty, Legal); 356 setCondCodeAction(ISD::SETNE, Ty, Expand); 357 setCondCodeAction(ISD::SETGE, Ty, Expand); 358 setCondCodeAction(ISD::SETGT, Ty, Expand); 359 setCondCodeAction(ISD::SETUGE, Ty, Expand); 360 setCondCodeAction(ISD::SETUGT, Ty, Expand); 361 } 362 363 // Enable MSA support for the given floating-point type and Register class. 364 void MipsSETargetLowering:: 365 addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 366 addRegisterClass(Ty, RC); 367 368 // Expand all builtin opcodes. 369 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 370 setOperationAction(Opc, Ty, Expand); 371 372 setOperationAction(ISD::LOAD, Ty, Legal); 373 setOperationAction(ISD::STORE, Ty, Legal); 374 setOperationAction(ISD::BITCAST, Ty, Legal); 375 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); 376 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 377 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 378 379 if (Ty != MVT::v8f16) { 380 setOperationAction(ISD::FABS, Ty, Legal); 381 setOperationAction(ISD::FADD, Ty, Legal); 382 setOperationAction(ISD::FDIV, Ty, Legal); 383 setOperationAction(ISD::FEXP2, Ty, Legal); 384 setOperationAction(ISD::FLOG2, Ty, Legal); 385 setOperationAction(ISD::FMA, Ty, Legal); 386 setOperationAction(ISD::FMUL, Ty, Legal); 387 setOperationAction(ISD::FRINT, Ty, Legal); 388 setOperationAction(ISD::FSQRT, Ty, Legal); 389 setOperationAction(ISD::FSUB, Ty, Legal); 390 setOperationAction(ISD::VSELECT, Ty, Legal); 391 392 setOperationAction(ISD::SETCC, Ty, Legal); 393 setCondCodeAction(ISD::SETOGE, Ty, Expand); 394 setCondCodeAction(ISD::SETOGT, Ty, Expand); 395 setCondCodeAction(ISD::SETUGE, Ty, Expand); 396 setCondCodeAction(ISD::SETUGT, Ty, Expand); 397 setCondCodeAction(ISD::SETGE, Ty, Expand); 398 setCondCodeAction(ISD::SETGT, Ty, Expand); 399 } 400 } 401 402 SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 403 if(!Subtarget.hasMips32r6()) 404 return MipsTargetLowering::LowerOperation(Op, DAG); 405 406 EVT ResTy = Op->getValueType(0); 407 SDLoc DL(Op); 408 409 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the 410 // floating point register are undefined. Not really an issue as sel.d, which 411 // is produced from an FSELECT node, only looks at bit 0. 412 SDValue Tmp = DAG.getNode(MipsISD::MTC1_D64, DL, MVT::f64, Op->getOperand(0)); 413 return DAG.getNode(MipsISD::FSELECT, DL, ResTy, Tmp, Op->getOperand(1), 414 Op->getOperand(2)); 415 } 416 417 bool MipsSETargetLowering::allowsMisalignedMemoryAccesses( 418 EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const { 419 MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; 420 421 if (Subtarget.systemSupportsUnalignedAccess()) { 422 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's 423 // implementation defined whether this is handled by hardware, software, or 424 // a hybrid of the two but it's expected that most implementations will 425 // handle the majority of cases in hardware. 426 if (Fast) 427 *Fast = true; 428 return true; 429 } 430 431 switch (SVT) { 432 case MVT::i64: 433 case MVT::i32: 434 if (Fast) 435 *Fast = true; 436 return true; 437 default: 438 return false; 439 } 440 } 441 442 SDValue MipsSETargetLowering::LowerOperation(SDValue Op, 443 SelectionDAG &DAG) const { 444 switch(Op.getOpcode()) { 445 case ISD::LOAD: return lowerLOAD(Op, DAG); 446 case ISD::STORE: return lowerSTORE(Op, DAG); 447 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); 448 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); 449 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); 450 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG); 451 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG); 452 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG); 453 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, 454 DAG); 455 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); 456 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); 457 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); 458 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); 459 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); 460 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); 461 case ISD::SELECT: return lowerSELECT(Op, DAG); 462 case ISD::BITCAST: return lowerBITCAST(Op, DAG); 463 } 464 465 return MipsTargetLowering::LowerOperation(Op, DAG); 466 } 467 468 // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT 469 // 470 // Performs the following transformations: 471 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its 472 // sign/zero-extension is completely overwritten by the new one performed by 473 // the ISD::AND. 474 // - Removes redundant zero extensions performed by an ISD::AND. 475 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 476 TargetLowering::DAGCombinerInfo &DCI, 477 const MipsSubtarget &Subtarget) { 478 if (!Subtarget.hasMSA()) 479 return SDValue(); 480 481 SDValue Op0 = N->getOperand(0); 482 SDValue Op1 = N->getOperand(1); 483 unsigned Op0Opcode = Op0->getOpcode(); 484 485 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) 486 // where $d + 1 == 2^n and n == 32 487 // or $d + 1 == 2^n and n <= 32 and ZExt 488 // -> (MipsVExtractZExt $a, $b, $c) 489 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || 490 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { 491 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1); 492 493 if (!Mask) 494 return SDValue(); 495 496 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); 497 498 if (Log2IfPositive <= 0) 499 return SDValue(); // Mask+1 is not a power of 2 500 501 SDValue Op0Op2 = Op0->getOperand(2); 502 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT(); 503 unsigned ExtendTySize = ExtendTy.getSizeInBits(); 504 unsigned Log2 = Log2IfPositive; 505 506 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || 507 Log2 == ExtendTySize) { 508 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; 509 return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0), 510 Op0->getVTList(), 511 makeArrayRef(Ops, Op0->getNumOperands())); 512 } 513 } 514 515 return SDValue(); 516 } 517 518 // Determine if the specified node is a constant vector splat. 519 // 520 // Returns true and sets Imm if: 521 // * N is a ISD::BUILD_VECTOR representing a constant splat 522 // 523 // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The 524 // differences are that it assumes the MSA has already been checked and the 525 // arbitrary requirement for a maximum of 32-bit integers isn't applied (and 526 // must not be in order for binsri.d to be selectable). 527 static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { 528 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode()); 529 530 if (!Node) 531 return false; 532 533 APInt SplatValue, SplatUndef; 534 unsigned SplatBitSize; 535 bool HasAnyUndefs; 536 537 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 538 8, !IsLittleEndian)) 539 return false; 540 541 Imm = SplatValue; 542 543 return true; 544 } 545 546 // Test whether the given node is an all-ones build_vector. 547 static bool isVectorAllOnes(SDValue N) { 548 // Look through bitcasts. Endianness doesn't matter because we are looking 549 // for an all-ones value. 550 if (N->getOpcode() == ISD::BITCAST) 551 N = N->getOperand(0); 552 553 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); 554 555 if (!BVN) 556 return false; 557 558 APInt SplatValue, SplatUndef; 559 unsigned SplatBitSize; 560 bool HasAnyUndefs; 561 562 // Endianness doesn't matter in this context because we are looking for 563 // an all-ones value. 564 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) 565 return SplatValue.isAllOnes(); 566 567 return false; 568 } 569 570 // Test whether N is the bitwise inverse of OfNode. 571 static bool isBitwiseInverse(SDValue N, SDValue OfNode) { 572 if (N->getOpcode() != ISD::XOR) 573 return false; 574 575 if (isVectorAllOnes(N->getOperand(0))) 576 return N->getOperand(1) == OfNode; 577 578 if (isVectorAllOnes(N->getOperand(1))) 579 return N->getOperand(0) == OfNode; 580 581 return false; 582 } 583 584 // Perform combines where ISD::OR is the root node. 585 // 586 // Performs the following transformations: 587 // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) 588 // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit 589 // vector type. 590 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 591 TargetLowering::DAGCombinerInfo &DCI, 592 const MipsSubtarget &Subtarget) { 593 if (!Subtarget.hasMSA()) 594 return SDValue(); 595 596 EVT Ty = N->getValueType(0); 597 598 if (!Ty.is128BitVector()) 599 return SDValue(); 600 601 SDValue Op0 = N->getOperand(0); 602 SDValue Op1 = N->getOperand(1); 603 604 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { 605 SDValue Op0Op0 = Op0->getOperand(0); 606 SDValue Op0Op1 = Op0->getOperand(1); 607 SDValue Op1Op0 = Op1->getOperand(0); 608 SDValue Op1Op1 = Op1->getOperand(1); 609 bool IsLittleEndian = !Subtarget.isLittle(); 610 611 SDValue IfSet, IfClr, Cond; 612 bool IsConstantMask = false; 613 APInt Mask, InvMask; 614 615 // If Op0Op0 is an appropriate mask, try to find it's inverse in either 616 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while 617 // looking. 618 // IfClr will be set if we find a valid match. 619 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) { 620 Cond = Op0Op0; 621 IfSet = Op0Op1; 622 623 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 624 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 625 IfClr = Op1Op1; 626 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 627 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 628 IfClr = Op1Op0; 629 630 IsConstantMask = true; 631 } 632 633 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same 634 // thing again using this mask. 635 // IfClr will be set if we find a valid match. 636 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) { 637 Cond = Op0Op1; 638 IfSet = Op0Op0; 639 640 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 641 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 642 IfClr = Op1Op1; 643 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 644 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 645 IfClr = Op1Op0; 646 647 IsConstantMask = true; 648 } 649 650 // If IfClr is not yet set, try looking for a non-constant match. 651 // IfClr will be set if we find a valid match amongst the eight 652 // possibilities. 653 if (!IfClr.getNode()) { 654 if (isBitwiseInverse(Op0Op0, Op1Op0)) { 655 Cond = Op1Op0; 656 IfSet = Op1Op1; 657 IfClr = Op0Op1; 658 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { 659 Cond = Op1Op0; 660 IfSet = Op1Op1; 661 IfClr = Op0Op0; 662 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { 663 Cond = Op1Op1; 664 IfSet = Op1Op0; 665 IfClr = Op0Op1; 666 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { 667 Cond = Op1Op1; 668 IfSet = Op1Op0; 669 IfClr = Op0Op0; 670 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { 671 Cond = Op0Op0; 672 IfSet = Op0Op1; 673 IfClr = Op1Op1; 674 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { 675 Cond = Op0Op0; 676 IfSet = Op0Op1; 677 IfClr = Op1Op0; 678 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { 679 Cond = Op0Op1; 680 IfSet = Op0Op0; 681 IfClr = Op1Op1; 682 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { 683 Cond = Op0Op1; 684 IfSet = Op0Op0; 685 IfClr = Op1Op0; 686 } 687 } 688 689 // At this point, IfClr will be set if we have a valid match. 690 if (!IfClr.getNode()) 691 return SDValue(); 692 693 assert(Cond.getNode() && IfSet.getNode()); 694 695 // Fold degenerate cases. 696 if (IsConstantMask) { 697 if (Mask.isAllOnes()) 698 return IfSet; 699 else if (Mask == 0) 700 return IfClr; 701 } 702 703 // Transform the DAG into an equivalent VSELECT. 704 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr); 705 } 706 707 return SDValue(); 708 } 709 710 static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, 711 SelectionDAG &DAG, 712 const MipsSubtarget &Subtarget) { 713 // Estimate the number of operations the below transform will turn a 714 // constant multiply into. The number is approximately equal to the minimal 715 // number of powers of two that constant can be broken down to by adding 716 // or subtracting them. 717 // 718 // If we have taken more than 12[1] / 8[2] steps to attempt the 719 // optimization for a native sized value, it is more than likely that this 720 // optimization will make things worse. 721 // 722 // [1] MIPS64 requires 6 instructions at most to materialize any constant, 723 // multiplication requires at least 4 cycles, but another cycle (or two) 724 // to retrieve the result from the HI/LO registers. 725 // 726 // [2] For MIPS32, more than 8 steps is expensive as the constant could be 727 // materialized in 2 instructions, multiplication requires at least 4 728 // cycles, but another cycle (or two) to retrieve the result from the 729 // HI/LO registers. 730 // 731 // TODO: 732 // - MaxSteps needs to consider the `VT` of the constant for the current 733 // target. 734 // - Consider to perform this optimization after type legalization. 735 // That allows to remove a workaround for types not supported natively. 736 // - Take in account `-Os, -Oz` flags because this optimization 737 // increases code size. 738 unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12; 739 740 SmallVector<APInt, 16> WorkStack(1, C); 741 unsigned Steps = 0; 742 unsigned BitWidth = C.getBitWidth(); 743 744 while (!WorkStack.empty()) { 745 APInt Val = WorkStack.pop_back_val(); 746 747 if (Val == 0 || Val == 1) 748 continue; 749 750 if (Steps >= MaxSteps) 751 return false; 752 753 if (Val.isPowerOf2()) { 754 ++Steps; 755 continue; 756 } 757 758 APInt Floor = APInt(BitWidth, 1) << Val.logBase2(); 759 APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0) 760 : APInt(BitWidth, 1) << C.ceilLogBase2(); 761 if ((Val - Floor).ule(Ceil - Val)) { 762 WorkStack.push_back(Floor); 763 WorkStack.push_back(Val - Floor); 764 } else { 765 WorkStack.push_back(Ceil); 766 WorkStack.push_back(Ceil - Val); 767 } 768 769 ++Steps; 770 } 771 772 // If the value being multiplied is not supported natively, we have to pay 773 // an additional legalization cost, conservatively assume an increase in the 774 // cost of 3 instructions per step. This values for this heuristic were 775 // determined experimentally. 776 unsigned RegisterSize = DAG.getTargetLoweringInfo() 777 .getRegisterType(*DAG.getContext(), VT) 778 .getSizeInBits(); 779 Steps *= (VT.getSizeInBits() != RegisterSize) * 3; 780 if (Steps > 27) 781 return false; 782 783 return true; 784 } 785 786 static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, 787 EVT ShiftTy, SelectionDAG &DAG) { 788 // Return 0. 789 if (C == 0) 790 return DAG.getConstant(0, DL, VT); 791 792 // Return x. 793 if (C == 1) 794 return X; 795 796 // If c is power of 2, return (shl x, log2(c)). 797 if (C.isPowerOf2()) 798 return DAG.getNode(ISD::SHL, DL, VT, X, 799 DAG.getConstant(C.logBase2(), DL, ShiftTy)); 800 801 unsigned BitWidth = C.getBitWidth(); 802 APInt Floor = APInt(BitWidth, 1) << C.logBase2(); 803 APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) : 804 APInt(BitWidth, 1) << C.ceilLogBase2(); 805 806 // If |c - floor_c| <= |c - ceil_c|, 807 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), 808 // return (add constMult(x, floor_c), constMult(x, c - floor_c)). 809 if ((C - Floor).ule(Ceil - C)) { 810 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); 811 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); 812 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); 813 } 814 815 // If |c - floor_c| > |c - ceil_c|, 816 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). 817 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); 818 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); 819 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); 820 } 821 822 static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, 823 const TargetLowering::DAGCombinerInfo &DCI, 824 const MipsSETargetLowering *TL, 825 const MipsSubtarget &Subtarget) { 826 EVT VT = N->getValueType(0); 827 828 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 829 if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs( 830 C->getAPIntValue(), VT, DAG, Subtarget)) 831 return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT, 832 TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), 833 DAG); 834 835 return SDValue(N, 0); 836 } 837 838 static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, 839 SelectionDAG &DAG, 840 const MipsSubtarget &Subtarget) { 841 // See if this is a vector splat immediate node. 842 APInt SplatValue, SplatUndef; 843 unsigned SplatBitSize; 844 bool HasAnyUndefs; 845 unsigned EltSize = Ty.getScalarSizeInBits(); 846 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 847 848 if (!Subtarget.hasDSP()) 849 return SDValue(); 850 851 if (!BV || 852 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 853 EltSize, !Subtarget.isLittle()) || 854 (SplatBitSize != EltSize) || 855 (SplatValue.getZExtValue() >= EltSize)) 856 return SDValue(); 857 858 SDLoc DL(N); 859 return DAG.getNode(Opc, DL, Ty, N->getOperand(0), 860 DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32)); 861 } 862 863 static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, 864 TargetLowering::DAGCombinerInfo &DCI, 865 const MipsSubtarget &Subtarget) { 866 EVT Ty = N->getValueType(0); 867 868 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 869 return SDValue(); 870 871 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); 872 } 873 874 // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold 875 // constant splats into MipsISD::SHRA_DSP for DSPr2. 876 // 877 // Performs the following transformations: 878 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its 879 // sign/zero-extension is completely overwritten by the new one performed by 880 // the ISD::SRA and ISD::SHL nodes. 881 // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL 882 // sequence. 883 // 884 // See performDSPShiftCombine for more information about the transformation 885 // used for DSPr2. 886 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 887 TargetLowering::DAGCombinerInfo &DCI, 888 const MipsSubtarget &Subtarget) { 889 EVT Ty = N->getValueType(0); 890 891 if (Subtarget.hasMSA()) { 892 SDValue Op0 = N->getOperand(0); 893 SDValue Op1 = N->getOperand(1); 894 895 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) 896 // where $d + sizeof($c) == 32 897 // or $d + sizeof($c) <= 32 and SExt 898 // -> (MipsVExtractSExt $a, $b, $c) 899 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { 900 SDValue Op0Op0 = Op0->getOperand(0); 901 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1); 902 903 if (!ShAmount) 904 return SDValue(); 905 906 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && 907 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) 908 return SDValue(); 909 910 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT(); 911 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); 912 913 if (TotalBits == 32 || 914 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && 915 TotalBits <= 32)) { 916 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), 917 Op0Op0->getOperand(2) }; 918 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0), 919 Op0Op0->getVTList(), 920 makeArrayRef(Ops, Op0Op0->getNumOperands())); 921 } 922 } 923 } 924 925 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2())) 926 return SDValue(); 927 928 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); 929 } 930 931 932 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 933 TargetLowering::DAGCombinerInfo &DCI, 934 const MipsSubtarget &Subtarget) { 935 EVT Ty = N->getValueType(0); 936 937 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8)) 938 return SDValue(); 939 940 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); 941 } 942 943 static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { 944 bool IsV216 = (Ty == MVT::v2i16); 945 946 switch (CC) { 947 case ISD::SETEQ: 948 case ISD::SETNE: return true; 949 case ISD::SETLT: 950 case ISD::SETLE: 951 case ISD::SETGT: 952 case ISD::SETGE: return IsV216; 953 case ISD::SETULT: 954 case ISD::SETULE: 955 case ISD::SETUGT: 956 case ISD::SETUGE: return !IsV216; 957 default: return false; 958 } 959 } 960 961 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { 962 EVT Ty = N->getValueType(0); 963 964 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 965 return SDValue(); 966 967 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get())) 968 return SDValue(); 969 970 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0), 971 N->getOperand(1), N->getOperand(2)); 972 } 973 974 static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { 975 EVT Ty = N->getValueType(0); 976 977 if (Ty == MVT::v2i16 || Ty == MVT::v4i8) { 978 SDValue SetCC = N->getOperand(0); 979 980 if (SetCC.getOpcode() != MipsISD::SETCC_DSP) 981 return SDValue(); 982 983 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, 984 SetCC.getOperand(0), SetCC.getOperand(1), 985 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); 986 } 987 988 return SDValue(); 989 } 990 991 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 992 const MipsSubtarget &Subtarget) { 993 EVT Ty = N->getValueType(0); 994 995 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { 996 // Try the following combines: 997 // (xor (or $a, $b), (build_vector allones)) 998 // (xor (or $a, $b), (bitcast (build_vector allones))) 999 SDValue Op0 = N->getOperand(0); 1000 SDValue Op1 = N->getOperand(1); 1001 SDValue NotOp; 1002 1003 if (ISD::isBuildVectorAllOnes(Op0.getNode())) 1004 NotOp = Op1; 1005 else if (ISD::isBuildVectorAllOnes(Op1.getNode())) 1006 NotOp = Op0; 1007 else 1008 return SDValue(); 1009 1010 if (NotOp->getOpcode() == ISD::OR) 1011 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), 1012 NotOp->getOperand(1)); 1013 } 1014 1015 return SDValue(); 1016 } 1017 1018 SDValue 1019 MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 1020 SelectionDAG &DAG = DCI.DAG; 1021 SDValue Val; 1022 1023 switch (N->getOpcode()) { 1024 case ISD::AND: 1025 Val = performANDCombine(N, DAG, DCI, Subtarget); 1026 break; 1027 case ISD::OR: 1028 Val = performORCombine(N, DAG, DCI, Subtarget); 1029 break; 1030 case ISD::MUL: 1031 return performMULCombine(N, DAG, DCI, this, Subtarget); 1032 case ISD::SHL: 1033 Val = performSHLCombine(N, DAG, DCI, Subtarget); 1034 break; 1035 case ISD::SRA: 1036 return performSRACombine(N, DAG, DCI, Subtarget); 1037 case ISD::SRL: 1038 return performSRLCombine(N, DAG, DCI, Subtarget); 1039 case ISD::VSELECT: 1040 return performVSELECTCombine(N, DAG); 1041 case ISD::XOR: 1042 Val = performXORCombine(N, DAG, Subtarget); 1043 break; 1044 case ISD::SETCC: 1045 Val = performSETCCCombine(N, DAG); 1046 break; 1047 } 1048 1049 if (Val.getNode()) { 1050 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n"; 1051 N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n"; 1052 Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n"); 1053 return Val; 1054 } 1055 1056 return MipsTargetLowering::PerformDAGCombine(N, DCI); 1057 } 1058 1059 MachineBasicBlock * 1060 MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 1061 MachineBasicBlock *BB) const { 1062 switch (MI.getOpcode()) { 1063 default: 1064 return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); 1065 case Mips::BPOSGE32_PSEUDO: 1066 return emitBPOSGE32(MI, BB); 1067 case Mips::SNZ_B_PSEUDO: 1068 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B); 1069 case Mips::SNZ_H_PSEUDO: 1070 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H); 1071 case Mips::SNZ_W_PSEUDO: 1072 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W); 1073 case Mips::SNZ_D_PSEUDO: 1074 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D); 1075 case Mips::SNZ_V_PSEUDO: 1076 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V); 1077 case Mips::SZ_B_PSEUDO: 1078 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B); 1079 case Mips::SZ_H_PSEUDO: 1080 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H); 1081 case Mips::SZ_W_PSEUDO: 1082 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W); 1083 case Mips::SZ_D_PSEUDO: 1084 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D); 1085 case Mips::SZ_V_PSEUDO: 1086 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V); 1087 case Mips::COPY_FW_PSEUDO: 1088 return emitCOPY_FW(MI, BB); 1089 case Mips::COPY_FD_PSEUDO: 1090 return emitCOPY_FD(MI, BB); 1091 case Mips::INSERT_FW_PSEUDO: 1092 return emitINSERT_FW(MI, BB); 1093 case Mips::INSERT_FD_PSEUDO: 1094 return emitINSERT_FD(MI, BB); 1095 case Mips::INSERT_B_VIDX_PSEUDO: 1096 case Mips::INSERT_B_VIDX64_PSEUDO: 1097 return emitINSERT_DF_VIDX(MI, BB, 1, false); 1098 case Mips::INSERT_H_VIDX_PSEUDO: 1099 case Mips::INSERT_H_VIDX64_PSEUDO: 1100 return emitINSERT_DF_VIDX(MI, BB, 2, false); 1101 case Mips::INSERT_W_VIDX_PSEUDO: 1102 case Mips::INSERT_W_VIDX64_PSEUDO: 1103 return emitINSERT_DF_VIDX(MI, BB, 4, false); 1104 case Mips::INSERT_D_VIDX_PSEUDO: 1105 case Mips::INSERT_D_VIDX64_PSEUDO: 1106 return emitINSERT_DF_VIDX(MI, BB, 8, false); 1107 case Mips::INSERT_FW_VIDX_PSEUDO: 1108 case Mips::INSERT_FW_VIDX64_PSEUDO: 1109 return emitINSERT_DF_VIDX(MI, BB, 4, true); 1110 case Mips::INSERT_FD_VIDX_PSEUDO: 1111 case Mips::INSERT_FD_VIDX64_PSEUDO: 1112 return emitINSERT_DF_VIDX(MI, BB, 8, true); 1113 case Mips::FILL_FW_PSEUDO: 1114 return emitFILL_FW(MI, BB); 1115 case Mips::FILL_FD_PSEUDO: 1116 return emitFILL_FD(MI, BB); 1117 case Mips::FEXP2_W_1_PSEUDO: 1118 return emitFEXP2_W_1(MI, BB); 1119 case Mips::FEXP2_D_1_PSEUDO: 1120 return emitFEXP2_D_1(MI, BB); 1121 case Mips::ST_F16: 1122 return emitST_F16_PSEUDO(MI, BB); 1123 case Mips::LD_F16: 1124 return emitLD_F16_PSEUDO(MI, BB); 1125 case Mips::MSA_FP_EXTEND_W_PSEUDO: 1126 return emitFPEXTEND_PSEUDO(MI, BB, false); 1127 case Mips::MSA_FP_ROUND_W_PSEUDO: 1128 return emitFPROUND_PSEUDO(MI, BB, false); 1129 case Mips::MSA_FP_EXTEND_D_PSEUDO: 1130 return emitFPEXTEND_PSEUDO(MI, BB, true); 1131 case Mips::MSA_FP_ROUND_D_PSEUDO: 1132 return emitFPROUND_PSEUDO(MI, BB, true); 1133 } 1134 } 1135 1136 bool MipsSETargetLowering::isEligibleForTailCallOptimization( 1137 const CCState &CCInfo, unsigned NextStackOffset, 1138 const MipsFunctionInfo &FI) const { 1139 if (!UseMipsTailCalls) 1140 return false; 1141 1142 // Exception has to be cleared with eret. 1143 if (FI.isISR()) 1144 return false; 1145 1146 // Return false if either the callee or caller has a byval argument. 1147 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) 1148 return false; 1149 1150 // Return true if the callee's argument area is no larger than the 1151 // caller's. 1152 return NextStackOffset <= FI.getIncomingArgSize(); 1153 } 1154 1155 void MipsSETargetLowering:: 1156 getOpndList(SmallVectorImpl<SDValue> &Ops, 1157 std::deque<std::pair<unsigned, SDValue>> &RegsToPass, 1158 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, 1159 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, 1160 SDValue Chain) const { 1161 Ops.push_back(Callee); 1162 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, 1163 InternalLinkage, IsCallReloc, CLI, Callee, 1164 Chain); 1165 } 1166 1167 SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1168 LoadSDNode &Nd = *cast<LoadSDNode>(Op); 1169 1170 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1171 return MipsTargetLowering::lowerLOAD(Op, DAG); 1172 1173 // Replace a double precision load with two i32 loads and a buildpair64. 1174 SDLoc DL(Op); 1175 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1176 EVT PtrVT = Ptr.getValueType(); 1177 1178 // i32 load from lower address. 1179 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(), 1180 Nd.getAlignment(), Nd.getMemOperand()->getFlags()); 1181 1182 // i32 load from higher address. 1183 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1184 SDValue Hi = DAG.getLoad( 1185 MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(), 1186 std::min(Nd.getAlignment(), 4U), Nd.getMemOperand()->getFlags()); 1187 1188 if (!Subtarget.isLittle()) 1189 std::swap(Lo, Hi); 1190 1191 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1192 SDValue Ops[2] = {BP, Hi.getValue(1)}; 1193 return DAG.getMergeValues(Ops, DL); 1194 } 1195 1196 SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1197 StoreSDNode &Nd = *cast<StoreSDNode>(Op); 1198 1199 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1200 return MipsTargetLowering::lowerSTORE(Op, DAG); 1201 1202 // Replace a double precision store with two extractelement64s and i32 stores. 1203 SDLoc DL(Op); 1204 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1205 EVT PtrVT = Ptr.getValueType(); 1206 SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1207 Val, DAG.getConstant(0, DL, MVT::i32)); 1208 SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1209 Val, DAG.getConstant(1, DL, MVT::i32)); 1210 1211 if (!Subtarget.isLittle()) 1212 std::swap(Lo, Hi); 1213 1214 // i32 store to lower address. 1215 Chain = 1216 DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlignment(), 1217 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1218 1219 // i32 store to higher address. 1220 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1221 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), 1222 std::min(Nd.getAlignment(), 4U), 1223 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1224 } 1225 1226 SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op, 1227 SelectionDAG &DAG) const { 1228 SDLoc DL(Op); 1229 MVT Src = Op.getOperand(0).getValueType().getSimpleVT(); 1230 MVT Dest = Op.getValueType().getSimpleVT(); 1231 1232 // Bitcast i64 to double. 1233 if (Src == MVT::i64 && Dest == MVT::f64) { 1234 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, 1235 Op.getOperand(0), DAG.getIntPtrConstant(0, DL)); 1236 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, 1237 Op.getOperand(0), DAG.getIntPtrConstant(1, DL)); 1238 return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1239 } 1240 1241 // Bitcast double to i64. 1242 if (Src == MVT::f64 && Dest == MVT::i64) { 1243 SDValue Lo = 1244 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), 1245 DAG.getConstant(0, DL, MVT::i32)); 1246 SDValue Hi = 1247 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), 1248 DAG.getConstant(1, DL, MVT::i32)); 1249 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1250 } 1251 1252 // Skip other cases of bitcast and use default lowering. 1253 return SDValue(); 1254 } 1255 1256 SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, 1257 bool HasLo, bool HasHi, 1258 SelectionDAG &DAG) const { 1259 // MIPS32r6/MIPS64r6 removed accumulator based multiplies. 1260 assert(!Subtarget.hasMips32r6()); 1261 1262 EVT Ty = Op.getOperand(0).getValueType(); 1263 SDLoc DL(Op); 1264 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, 1265 Op.getOperand(0), Op.getOperand(1)); 1266 SDValue Lo, Hi; 1267 1268 if (HasLo) 1269 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult); 1270 if (HasHi) 1271 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult); 1272 1273 if (!HasLo || !HasHi) 1274 return HasLo ? Lo : Hi; 1275 1276 SDValue Vals[] = { Lo, Hi }; 1277 return DAG.getMergeValues(Vals, DL); 1278 } 1279 1280 static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) { 1281 SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1282 DAG.getConstant(0, DL, MVT::i32)); 1283 SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1284 DAG.getConstant(1, DL, MVT::i32)); 1285 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi); 1286 } 1287 1288 static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) { 1289 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op); 1290 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op); 1291 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1292 } 1293 1294 // This function expands mips intrinsic nodes which have 64-bit input operands 1295 // or output values. 1296 // 1297 // out64 = intrinsic-node in64 1298 // => 1299 // lo = copy (extract-element (in64, 0)) 1300 // hi = copy (extract-element (in64, 1)) 1301 // mips-specific-node 1302 // v0 = copy lo 1303 // v1 = copy hi 1304 // out64 = merge-values (v0, v1) 1305 // 1306 static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1307 SDLoc DL(Op); 1308 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; 1309 SmallVector<SDValue, 3> Ops; 1310 unsigned OpNo = 0; 1311 1312 // See if Op has a chain input. 1313 if (HasChainIn) 1314 Ops.push_back(Op->getOperand(OpNo++)); 1315 1316 // The next operand is the intrinsic opcode. 1317 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); 1318 1319 // See if the next operand has type i64. 1320 SDValue Opnd = Op->getOperand(++OpNo), In64; 1321 1322 if (Opnd.getValueType() == MVT::i64) 1323 In64 = initAccumulator(Opnd, DL, DAG); 1324 else 1325 Ops.push_back(Opnd); 1326 1327 // Push the remaining operands. 1328 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) 1329 Ops.push_back(Op->getOperand(OpNo)); 1330 1331 // Add In64 to the end of the list. 1332 if (In64.getNode()) 1333 Ops.push_back(In64); 1334 1335 // Scan output. 1336 SmallVector<EVT, 2> ResTys; 1337 1338 for (EVT Ty : Op->values()) 1339 ResTys.push_back((Ty == MVT::i64) ? MVT::Untyped : Ty); 1340 1341 // Create node. 1342 SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops); 1343 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; 1344 1345 if (!HasChainIn) 1346 return Out; 1347 1348 assert(Val->getValueType(1) == MVT::Other); 1349 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; 1350 return DAG.getMergeValues(Vals, DL); 1351 } 1352 1353 // Lower an MSA copy intrinsic into the specified SelectionDAG node 1354 static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1355 SDLoc DL(Op); 1356 SDValue Vec = Op->getOperand(1); 1357 SDValue Idx = Op->getOperand(2); 1358 EVT ResTy = Op->getValueType(0); 1359 EVT EltTy = Vec->getValueType(0).getVectorElementType(); 1360 1361 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx, 1362 DAG.getValueType(EltTy)); 1363 1364 return Result; 1365 } 1366 1367 static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { 1368 EVT ResVecTy = Op->getValueType(0); 1369 EVT ViaVecTy = ResVecTy; 1370 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); 1371 SDLoc DL(Op); 1372 1373 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and 1374 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating 1375 // lanes. 1376 SDValue LaneA = Op->getOperand(OpNr); 1377 SDValue LaneB; 1378 1379 if (ResVecTy == MVT::v2i64) { 1380 // In case of the index being passed as an immediate value, set the upper 1381 // lane to 0 so that the splati.d instruction can be matched. 1382 if (isa<ConstantSDNode>(LaneA)) 1383 LaneB = DAG.getConstant(0, DL, MVT::i32); 1384 // Having the index passed in a register, set the upper lane to the same 1385 // value as the lower - this results in the BUILD_VECTOR node not being 1386 // expanded through stack. This way we are able to pattern match the set of 1387 // nodes created here to splat.d. 1388 else 1389 LaneB = LaneA; 1390 ViaVecTy = MVT::v4i32; 1391 if(BigEndian) 1392 std::swap(LaneA, LaneB); 1393 } else 1394 LaneB = LaneA; 1395 1396 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, 1397 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; 1398 1399 SDValue Result = DAG.getBuildVector( 1400 ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1401 1402 if (ViaVecTy != ResVecTy) { 1403 SDValue One = DAG.getConstant(1, DL, ViaVecTy); 1404 Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, 1405 DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One)); 1406 } 1407 1408 return Result; 1409 } 1410 1411 static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, 1412 bool IsSigned = false) { 1413 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp)); 1414 return DAG.getConstant( 1415 APInt(Op->getValueType(0).getScalarType().getSizeInBits(), 1416 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), 1417 SDLoc(Op), Op->getValueType(0)); 1418 } 1419 1420 static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, 1421 bool BigEndian, SelectionDAG &DAG) { 1422 EVT ViaVecTy = VecTy; 1423 SDValue SplatValueA = SplatValue; 1424 SDValue SplatValueB = SplatValue; 1425 SDLoc DL(SplatValue); 1426 1427 if (VecTy == MVT::v2i64) { 1428 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. 1429 ViaVecTy = MVT::v4i32; 1430 1431 SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); 1432 SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, 1433 DAG.getConstant(32, DL, MVT::i32)); 1434 SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); 1435 } 1436 1437 // We currently hold the parts in little endian order. Swap them if 1438 // necessary. 1439 if (BigEndian) 1440 std::swap(SplatValueA, SplatValueB); 1441 1442 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1443 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1444 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1445 SplatValueA, SplatValueB, SplatValueA, SplatValueB }; 1446 1447 SDValue Result = DAG.getBuildVector( 1448 ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1449 1450 if (VecTy != ViaVecTy) 1451 Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); 1452 1453 return Result; 1454 } 1455 1456 static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, 1457 unsigned Opc, SDValue Imm, 1458 bool BigEndian) { 1459 EVT VecTy = Op->getValueType(0); 1460 SDValue Exp2Imm; 1461 SDLoc DL(Op); 1462 1463 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it 1464 // here for now. 1465 if (VecTy == MVT::v2i64) { 1466 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) { 1467 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue(); 1468 1469 SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL, 1470 MVT::i32); 1471 SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32); 1472 1473 if (BigEndian) 1474 std::swap(BitImmLoOp, BitImmHiOp); 1475 1476 Exp2Imm = DAG.getNode( 1477 ISD::BITCAST, DL, MVT::v2i64, 1478 DAG.getBuildVector(MVT::v4i32, DL, 1479 {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp})); 1480 } 1481 } 1482 1483 if (!Exp2Imm.getNode()) { 1484 // We couldnt constant fold, do a vector shift instead 1485 1486 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since 1487 // only values 0-63 are valid. 1488 if (VecTy == MVT::v2i64) 1489 Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm); 1490 1491 Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG); 1492 1493 Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy), 1494 Exp2Imm); 1495 } 1496 1497 return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); 1498 } 1499 1500 static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { 1501 SDLoc DL(Op); 1502 EVT ResTy = Op->getValueType(0); 1503 SDValue Vec = Op->getOperand(2); 1504 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); 1505 MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32; 1506 SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, 1507 DL, ResEltTy); 1508 SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG); 1509 1510 return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec); 1511 } 1512 1513 static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { 1514 EVT ResTy = Op->getValueType(0); 1515 SDLoc DL(Op); 1516 SDValue One = DAG.getConstant(1, DL, ResTy); 1517 SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG)); 1518 1519 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), 1520 DAG.getNOT(DL, Bit, ResTy)); 1521 } 1522 1523 static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { 1524 SDLoc DL(Op); 1525 EVT ResTy = Op->getValueType(0); 1526 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) 1527 << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue(); 1528 SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy); 1529 1530 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask); 1531 } 1532 1533 SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 1534 SelectionDAG &DAG) const { 1535 SDLoc DL(Op); 1536 unsigned Intrinsic = cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue(); 1537 switch (Intrinsic) { 1538 default: 1539 return SDValue(); 1540 case Intrinsic::mips_shilo: 1541 return lowerDSPIntr(Op, DAG, MipsISD::SHILO); 1542 case Intrinsic::mips_dpau_h_qbl: 1543 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); 1544 case Intrinsic::mips_dpau_h_qbr: 1545 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); 1546 case Intrinsic::mips_dpsu_h_qbl: 1547 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); 1548 case Intrinsic::mips_dpsu_h_qbr: 1549 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); 1550 case Intrinsic::mips_dpa_w_ph: 1551 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); 1552 case Intrinsic::mips_dps_w_ph: 1553 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); 1554 case Intrinsic::mips_dpax_w_ph: 1555 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); 1556 case Intrinsic::mips_dpsx_w_ph: 1557 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); 1558 case Intrinsic::mips_mulsa_w_ph: 1559 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); 1560 case Intrinsic::mips_mult: 1561 return lowerDSPIntr(Op, DAG, MipsISD::Mult); 1562 case Intrinsic::mips_multu: 1563 return lowerDSPIntr(Op, DAG, MipsISD::Multu); 1564 case Intrinsic::mips_madd: 1565 return lowerDSPIntr(Op, DAG, MipsISD::MAdd); 1566 case Intrinsic::mips_maddu: 1567 return lowerDSPIntr(Op, DAG, MipsISD::MAddu); 1568 case Intrinsic::mips_msub: 1569 return lowerDSPIntr(Op, DAG, MipsISD::MSub); 1570 case Intrinsic::mips_msubu: 1571 return lowerDSPIntr(Op, DAG, MipsISD::MSubu); 1572 case Intrinsic::mips_addv_b: 1573 case Intrinsic::mips_addv_h: 1574 case Intrinsic::mips_addv_w: 1575 case Intrinsic::mips_addv_d: 1576 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1577 Op->getOperand(2)); 1578 case Intrinsic::mips_addvi_b: 1579 case Intrinsic::mips_addvi_h: 1580 case Intrinsic::mips_addvi_w: 1581 case Intrinsic::mips_addvi_d: 1582 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1583 lowerMSASplatImm(Op, 2, DAG)); 1584 case Intrinsic::mips_and_v: 1585 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1586 Op->getOperand(2)); 1587 case Intrinsic::mips_andi_b: 1588 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1589 lowerMSASplatImm(Op, 2, DAG)); 1590 case Intrinsic::mips_bclr_b: 1591 case Intrinsic::mips_bclr_h: 1592 case Intrinsic::mips_bclr_w: 1593 case Intrinsic::mips_bclr_d: 1594 return lowerMSABitClear(Op, DAG); 1595 case Intrinsic::mips_bclri_b: 1596 case Intrinsic::mips_bclri_h: 1597 case Intrinsic::mips_bclri_w: 1598 case Intrinsic::mips_bclri_d: 1599 return lowerMSABitClearImm(Op, DAG); 1600 case Intrinsic::mips_binsli_b: 1601 case Intrinsic::mips_binsli_h: 1602 case Intrinsic::mips_binsli_w: 1603 case Intrinsic::mips_binsli_d: { 1604 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear) 1605 EVT VecTy = Op->getValueType(0); 1606 EVT EltTy = VecTy.getVectorElementType(); 1607 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1608 report_fatal_error("Immediate out of range"); 1609 APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), 1610 Op->getConstantOperandVal(3) + 1); 1611 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1612 DAG.getConstant(Mask, DL, VecTy, true), 1613 Op->getOperand(2), Op->getOperand(1)); 1614 } 1615 case Intrinsic::mips_binsri_b: 1616 case Intrinsic::mips_binsri_h: 1617 case Intrinsic::mips_binsri_w: 1618 case Intrinsic::mips_binsri_d: { 1619 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear) 1620 EVT VecTy = Op->getValueType(0); 1621 EVT EltTy = VecTy.getVectorElementType(); 1622 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1623 report_fatal_error("Immediate out of range"); 1624 APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), 1625 Op->getConstantOperandVal(3) + 1); 1626 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1627 DAG.getConstant(Mask, DL, VecTy, true), 1628 Op->getOperand(2), Op->getOperand(1)); 1629 } 1630 case Intrinsic::mips_bmnz_v: 1631 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1632 Op->getOperand(2), Op->getOperand(1)); 1633 case Intrinsic::mips_bmnzi_b: 1634 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1635 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2), 1636 Op->getOperand(1)); 1637 case Intrinsic::mips_bmz_v: 1638 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1639 Op->getOperand(1), Op->getOperand(2)); 1640 case Intrinsic::mips_bmzi_b: 1641 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1642 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1), 1643 Op->getOperand(2)); 1644 case Intrinsic::mips_bneg_b: 1645 case Intrinsic::mips_bneg_h: 1646 case Intrinsic::mips_bneg_w: 1647 case Intrinsic::mips_bneg_d: { 1648 EVT VecTy = Op->getValueType(0); 1649 SDValue One = DAG.getConstant(1, DL, VecTy); 1650 1651 return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), 1652 DAG.getNode(ISD::SHL, DL, VecTy, One, 1653 truncateVecElts(Op, DAG))); 1654 } 1655 case Intrinsic::mips_bnegi_b: 1656 case Intrinsic::mips_bnegi_h: 1657 case Intrinsic::mips_bnegi_w: 1658 case Intrinsic::mips_bnegi_d: 1659 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2), 1660 !Subtarget.isLittle()); 1661 case Intrinsic::mips_bnz_b: 1662 case Intrinsic::mips_bnz_h: 1663 case Intrinsic::mips_bnz_w: 1664 case Intrinsic::mips_bnz_d: 1665 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0), 1666 Op->getOperand(1)); 1667 case Intrinsic::mips_bnz_v: 1668 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0), 1669 Op->getOperand(1)); 1670 case Intrinsic::mips_bsel_v: 1671 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1672 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1673 Op->getOperand(1), Op->getOperand(3), 1674 Op->getOperand(2)); 1675 case Intrinsic::mips_bseli_b: 1676 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1677 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1678 Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG), 1679 Op->getOperand(2)); 1680 case Intrinsic::mips_bset_b: 1681 case Intrinsic::mips_bset_h: 1682 case Intrinsic::mips_bset_w: 1683 case Intrinsic::mips_bset_d: { 1684 EVT VecTy = Op->getValueType(0); 1685 SDValue One = DAG.getConstant(1, DL, VecTy); 1686 1687 return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), 1688 DAG.getNode(ISD::SHL, DL, VecTy, One, 1689 truncateVecElts(Op, DAG))); 1690 } 1691 case Intrinsic::mips_bseti_b: 1692 case Intrinsic::mips_bseti_h: 1693 case Intrinsic::mips_bseti_w: 1694 case Intrinsic::mips_bseti_d: 1695 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2), 1696 !Subtarget.isLittle()); 1697 case Intrinsic::mips_bz_b: 1698 case Intrinsic::mips_bz_h: 1699 case Intrinsic::mips_bz_w: 1700 case Intrinsic::mips_bz_d: 1701 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0), 1702 Op->getOperand(1)); 1703 case Intrinsic::mips_bz_v: 1704 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0), 1705 Op->getOperand(1)); 1706 case Intrinsic::mips_ceq_b: 1707 case Intrinsic::mips_ceq_h: 1708 case Intrinsic::mips_ceq_w: 1709 case Intrinsic::mips_ceq_d: 1710 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1711 Op->getOperand(2), ISD::SETEQ); 1712 case Intrinsic::mips_ceqi_b: 1713 case Intrinsic::mips_ceqi_h: 1714 case Intrinsic::mips_ceqi_w: 1715 case Intrinsic::mips_ceqi_d: 1716 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1717 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ); 1718 case Intrinsic::mips_cle_s_b: 1719 case Intrinsic::mips_cle_s_h: 1720 case Intrinsic::mips_cle_s_w: 1721 case Intrinsic::mips_cle_s_d: 1722 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1723 Op->getOperand(2), ISD::SETLE); 1724 case Intrinsic::mips_clei_s_b: 1725 case Intrinsic::mips_clei_s_h: 1726 case Intrinsic::mips_clei_s_w: 1727 case Intrinsic::mips_clei_s_d: 1728 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1729 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE); 1730 case Intrinsic::mips_cle_u_b: 1731 case Intrinsic::mips_cle_u_h: 1732 case Intrinsic::mips_cle_u_w: 1733 case Intrinsic::mips_cle_u_d: 1734 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1735 Op->getOperand(2), ISD::SETULE); 1736 case Intrinsic::mips_clei_u_b: 1737 case Intrinsic::mips_clei_u_h: 1738 case Intrinsic::mips_clei_u_w: 1739 case Intrinsic::mips_clei_u_d: 1740 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1741 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE); 1742 case Intrinsic::mips_clt_s_b: 1743 case Intrinsic::mips_clt_s_h: 1744 case Intrinsic::mips_clt_s_w: 1745 case Intrinsic::mips_clt_s_d: 1746 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1747 Op->getOperand(2), ISD::SETLT); 1748 case Intrinsic::mips_clti_s_b: 1749 case Intrinsic::mips_clti_s_h: 1750 case Intrinsic::mips_clti_s_w: 1751 case Intrinsic::mips_clti_s_d: 1752 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1753 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT); 1754 case Intrinsic::mips_clt_u_b: 1755 case Intrinsic::mips_clt_u_h: 1756 case Intrinsic::mips_clt_u_w: 1757 case Intrinsic::mips_clt_u_d: 1758 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1759 Op->getOperand(2), ISD::SETULT); 1760 case Intrinsic::mips_clti_u_b: 1761 case Intrinsic::mips_clti_u_h: 1762 case Intrinsic::mips_clti_u_w: 1763 case Intrinsic::mips_clti_u_d: 1764 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1765 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT); 1766 case Intrinsic::mips_copy_s_b: 1767 case Intrinsic::mips_copy_s_h: 1768 case Intrinsic::mips_copy_s_w: 1769 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1770 case Intrinsic::mips_copy_s_d: 1771 if (Subtarget.hasMips64()) 1772 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64. 1773 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1774 else { 1775 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1776 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1777 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1778 Op->getValueType(0), Op->getOperand(1), 1779 Op->getOperand(2)); 1780 } 1781 case Intrinsic::mips_copy_u_b: 1782 case Intrinsic::mips_copy_u_h: 1783 case Intrinsic::mips_copy_u_w: 1784 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1785 case Intrinsic::mips_copy_u_d: 1786 if (Subtarget.hasMips64()) 1787 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64. 1788 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1789 else { 1790 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1791 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1792 // Note: When i64 is illegal, this results in copy_s.w instructions 1793 // instead of copy_u.w instructions. This makes no difference to the 1794 // behaviour since i64 is only illegal when the register file is 32-bit. 1795 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1796 Op->getValueType(0), Op->getOperand(1), 1797 Op->getOperand(2)); 1798 } 1799 case Intrinsic::mips_div_s_b: 1800 case Intrinsic::mips_div_s_h: 1801 case Intrinsic::mips_div_s_w: 1802 case Intrinsic::mips_div_s_d: 1803 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), 1804 Op->getOperand(2)); 1805 case Intrinsic::mips_div_u_b: 1806 case Intrinsic::mips_div_u_h: 1807 case Intrinsic::mips_div_u_w: 1808 case Intrinsic::mips_div_u_d: 1809 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), 1810 Op->getOperand(2)); 1811 case Intrinsic::mips_fadd_w: 1812 case Intrinsic::mips_fadd_d: 1813 // TODO: If intrinsics have fast-math-flags, propagate them. 1814 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), 1815 Op->getOperand(2)); 1816 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away 1817 case Intrinsic::mips_fceq_w: 1818 case Intrinsic::mips_fceq_d: 1819 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1820 Op->getOperand(2), ISD::SETOEQ); 1821 case Intrinsic::mips_fcle_w: 1822 case Intrinsic::mips_fcle_d: 1823 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1824 Op->getOperand(2), ISD::SETOLE); 1825 case Intrinsic::mips_fclt_w: 1826 case Intrinsic::mips_fclt_d: 1827 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1828 Op->getOperand(2), ISD::SETOLT); 1829 case Intrinsic::mips_fcne_w: 1830 case Intrinsic::mips_fcne_d: 1831 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1832 Op->getOperand(2), ISD::SETONE); 1833 case Intrinsic::mips_fcor_w: 1834 case Intrinsic::mips_fcor_d: 1835 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1836 Op->getOperand(2), ISD::SETO); 1837 case Intrinsic::mips_fcueq_w: 1838 case Intrinsic::mips_fcueq_d: 1839 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1840 Op->getOperand(2), ISD::SETUEQ); 1841 case Intrinsic::mips_fcule_w: 1842 case Intrinsic::mips_fcule_d: 1843 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1844 Op->getOperand(2), ISD::SETULE); 1845 case Intrinsic::mips_fcult_w: 1846 case Intrinsic::mips_fcult_d: 1847 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1848 Op->getOperand(2), ISD::SETULT); 1849 case Intrinsic::mips_fcun_w: 1850 case Intrinsic::mips_fcun_d: 1851 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1852 Op->getOperand(2), ISD::SETUO); 1853 case Intrinsic::mips_fcune_w: 1854 case Intrinsic::mips_fcune_d: 1855 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1856 Op->getOperand(2), ISD::SETUNE); 1857 case Intrinsic::mips_fdiv_w: 1858 case Intrinsic::mips_fdiv_d: 1859 // TODO: If intrinsics have fast-math-flags, propagate them. 1860 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), 1861 Op->getOperand(2)); 1862 case Intrinsic::mips_ffint_u_w: 1863 case Intrinsic::mips_ffint_u_d: 1864 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), 1865 Op->getOperand(1)); 1866 case Intrinsic::mips_ffint_s_w: 1867 case Intrinsic::mips_ffint_s_d: 1868 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), 1869 Op->getOperand(1)); 1870 case Intrinsic::mips_fill_b: 1871 case Intrinsic::mips_fill_h: 1872 case Intrinsic::mips_fill_w: 1873 case Intrinsic::mips_fill_d: { 1874 EVT ResTy = Op->getValueType(0); 1875 SmallVector<SDValue, 16> Ops(ResTy.getVectorNumElements(), 1876 Op->getOperand(1)); 1877 1878 // If ResTy is v2i64 then the type legalizer will break this node down into 1879 // an equivalent v4i32. 1880 return DAG.getBuildVector(ResTy, DL, Ops); 1881 } 1882 case Intrinsic::mips_fexp2_w: 1883 case Intrinsic::mips_fexp2_d: { 1884 // TODO: If intrinsics have fast-math-flags, propagate them. 1885 EVT ResTy = Op->getValueType(0); 1886 return DAG.getNode( 1887 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1), 1888 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2))); 1889 } 1890 case Intrinsic::mips_flog2_w: 1891 case Intrinsic::mips_flog2_d: 1892 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1)); 1893 case Intrinsic::mips_fmadd_w: 1894 case Intrinsic::mips_fmadd_d: 1895 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0), 1896 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1897 case Intrinsic::mips_fmul_w: 1898 case Intrinsic::mips_fmul_d: 1899 // TODO: If intrinsics have fast-math-flags, propagate them. 1900 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), 1901 Op->getOperand(2)); 1902 case Intrinsic::mips_fmsub_w: 1903 case Intrinsic::mips_fmsub_d: { 1904 // TODO: If intrinsics have fast-math-flags, propagate them. 1905 return DAG.getNode(MipsISD::FMS, SDLoc(Op), Op->getValueType(0), 1906 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1907 } 1908 case Intrinsic::mips_frint_w: 1909 case Intrinsic::mips_frint_d: 1910 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); 1911 case Intrinsic::mips_fsqrt_w: 1912 case Intrinsic::mips_fsqrt_d: 1913 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); 1914 case Intrinsic::mips_fsub_w: 1915 case Intrinsic::mips_fsub_d: 1916 // TODO: If intrinsics have fast-math-flags, propagate them. 1917 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), 1918 Op->getOperand(2)); 1919 case Intrinsic::mips_ftrunc_u_w: 1920 case Intrinsic::mips_ftrunc_u_d: 1921 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), 1922 Op->getOperand(1)); 1923 case Intrinsic::mips_ftrunc_s_w: 1924 case Intrinsic::mips_ftrunc_s_d: 1925 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0), 1926 Op->getOperand(1)); 1927 case Intrinsic::mips_ilvev_b: 1928 case Intrinsic::mips_ilvev_h: 1929 case Intrinsic::mips_ilvev_w: 1930 case Intrinsic::mips_ilvev_d: 1931 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0), 1932 Op->getOperand(1), Op->getOperand(2)); 1933 case Intrinsic::mips_ilvl_b: 1934 case Intrinsic::mips_ilvl_h: 1935 case Intrinsic::mips_ilvl_w: 1936 case Intrinsic::mips_ilvl_d: 1937 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0), 1938 Op->getOperand(1), Op->getOperand(2)); 1939 case Intrinsic::mips_ilvod_b: 1940 case Intrinsic::mips_ilvod_h: 1941 case Intrinsic::mips_ilvod_w: 1942 case Intrinsic::mips_ilvod_d: 1943 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0), 1944 Op->getOperand(1), Op->getOperand(2)); 1945 case Intrinsic::mips_ilvr_b: 1946 case Intrinsic::mips_ilvr_h: 1947 case Intrinsic::mips_ilvr_w: 1948 case Intrinsic::mips_ilvr_d: 1949 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0), 1950 Op->getOperand(1), Op->getOperand(2)); 1951 case Intrinsic::mips_insert_b: 1952 case Intrinsic::mips_insert_h: 1953 case Intrinsic::mips_insert_w: 1954 case Intrinsic::mips_insert_d: 1955 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 1956 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2)); 1957 case Intrinsic::mips_insve_b: 1958 case Intrinsic::mips_insve_h: 1959 case Intrinsic::mips_insve_w: 1960 case Intrinsic::mips_insve_d: { 1961 // Report an error for out of range values. 1962 int64_t Max; 1963 switch (Intrinsic) { 1964 case Intrinsic::mips_insve_b: Max = 15; break; 1965 case Intrinsic::mips_insve_h: Max = 7; break; 1966 case Intrinsic::mips_insve_w: Max = 3; break; 1967 case Intrinsic::mips_insve_d: Max = 1; break; 1968 default: llvm_unreachable("Unmatched intrinsic"); 1969 } 1970 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 1971 if (Value < 0 || Value > Max) 1972 report_fatal_error("Immediate out of range"); 1973 return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0), 1974 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3), 1975 DAG.getConstant(0, DL, MVT::i32)); 1976 } 1977 case Intrinsic::mips_ldi_b: 1978 case Intrinsic::mips_ldi_h: 1979 case Intrinsic::mips_ldi_w: 1980 case Intrinsic::mips_ldi_d: 1981 return lowerMSASplatImm(Op, 1, DAG, true); 1982 case Intrinsic::mips_lsa: 1983 case Intrinsic::mips_dlsa: { 1984 EVT ResTy = Op->getValueType(0); 1985 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1986 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy, 1987 Op->getOperand(2), Op->getOperand(3))); 1988 } 1989 case Intrinsic::mips_maddv_b: 1990 case Intrinsic::mips_maddv_h: 1991 case Intrinsic::mips_maddv_w: 1992 case Intrinsic::mips_maddv_d: { 1993 EVT ResTy = Op->getValueType(0); 1994 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1995 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 1996 Op->getOperand(2), Op->getOperand(3))); 1997 } 1998 case Intrinsic::mips_max_s_b: 1999 case Intrinsic::mips_max_s_h: 2000 case Intrinsic::mips_max_s_w: 2001 case Intrinsic::mips_max_s_d: 2002 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), 2003 Op->getOperand(1), Op->getOperand(2)); 2004 case Intrinsic::mips_max_u_b: 2005 case Intrinsic::mips_max_u_h: 2006 case Intrinsic::mips_max_u_w: 2007 case Intrinsic::mips_max_u_d: 2008 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), 2009 Op->getOperand(1), Op->getOperand(2)); 2010 case Intrinsic::mips_maxi_s_b: 2011 case Intrinsic::mips_maxi_s_h: 2012 case Intrinsic::mips_maxi_s_w: 2013 case Intrinsic::mips_maxi_s_d: 2014 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), 2015 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 2016 case Intrinsic::mips_maxi_u_b: 2017 case Intrinsic::mips_maxi_u_h: 2018 case Intrinsic::mips_maxi_u_w: 2019 case Intrinsic::mips_maxi_u_d: 2020 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), 2021 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2022 case Intrinsic::mips_min_s_b: 2023 case Intrinsic::mips_min_s_h: 2024 case Intrinsic::mips_min_s_w: 2025 case Intrinsic::mips_min_s_d: 2026 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), 2027 Op->getOperand(1), Op->getOperand(2)); 2028 case Intrinsic::mips_min_u_b: 2029 case Intrinsic::mips_min_u_h: 2030 case Intrinsic::mips_min_u_w: 2031 case Intrinsic::mips_min_u_d: 2032 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), 2033 Op->getOperand(1), Op->getOperand(2)); 2034 case Intrinsic::mips_mini_s_b: 2035 case Intrinsic::mips_mini_s_h: 2036 case Intrinsic::mips_mini_s_w: 2037 case Intrinsic::mips_mini_s_d: 2038 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), 2039 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 2040 case Intrinsic::mips_mini_u_b: 2041 case Intrinsic::mips_mini_u_h: 2042 case Intrinsic::mips_mini_u_w: 2043 case Intrinsic::mips_mini_u_d: 2044 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), 2045 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2046 case Intrinsic::mips_mod_s_b: 2047 case Intrinsic::mips_mod_s_h: 2048 case Intrinsic::mips_mod_s_w: 2049 case Intrinsic::mips_mod_s_d: 2050 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), 2051 Op->getOperand(2)); 2052 case Intrinsic::mips_mod_u_b: 2053 case Intrinsic::mips_mod_u_h: 2054 case Intrinsic::mips_mod_u_w: 2055 case Intrinsic::mips_mod_u_d: 2056 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), 2057 Op->getOperand(2)); 2058 case Intrinsic::mips_mulv_b: 2059 case Intrinsic::mips_mulv_h: 2060 case Intrinsic::mips_mulv_w: 2061 case Intrinsic::mips_mulv_d: 2062 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), 2063 Op->getOperand(2)); 2064 case Intrinsic::mips_msubv_b: 2065 case Intrinsic::mips_msubv_h: 2066 case Intrinsic::mips_msubv_w: 2067 case Intrinsic::mips_msubv_d: { 2068 EVT ResTy = Op->getValueType(0); 2069 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), 2070 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 2071 Op->getOperand(2), Op->getOperand(3))); 2072 } 2073 case Intrinsic::mips_nlzc_b: 2074 case Intrinsic::mips_nlzc_h: 2075 case Intrinsic::mips_nlzc_w: 2076 case Intrinsic::mips_nlzc_d: 2077 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); 2078 case Intrinsic::mips_nor_v: { 2079 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2080 Op->getOperand(1), Op->getOperand(2)); 2081 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2082 } 2083 case Intrinsic::mips_nori_b: { 2084 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2085 Op->getOperand(1), 2086 lowerMSASplatImm(Op, 2, DAG)); 2087 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2088 } 2089 case Intrinsic::mips_or_v: 2090 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), 2091 Op->getOperand(2)); 2092 case Intrinsic::mips_ori_b: 2093 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2094 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2095 case Intrinsic::mips_pckev_b: 2096 case Intrinsic::mips_pckev_h: 2097 case Intrinsic::mips_pckev_w: 2098 case Intrinsic::mips_pckev_d: 2099 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0), 2100 Op->getOperand(1), Op->getOperand(2)); 2101 case Intrinsic::mips_pckod_b: 2102 case Intrinsic::mips_pckod_h: 2103 case Intrinsic::mips_pckod_w: 2104 case Intrinsic::mips_pckod_d: 2105 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0), 2106 Op->getOperand(1), Op->getOperand(2)); 2107 case Intrinsic::mips_pcnt_b: 2108 case Intrinsic::mips_pcnt_h: 2109 case Intrinsic::mips_pcnt_w: 2110 case Intrinsic::mips_pcnt_d: 2111 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); 2112 case Intrinsic::mips_sat_s_b: 2113 case Intrinsic::mips_sat_s_h: 2114 case Intrinsic::mips_sat_s_w: 2115 case Intrinsic::mips_sat_s_d: 2116 case Intrinsic::mips_sat_u_b: 2117 case Intrinsic::mips_sat_u_h: 2118 case Intrinsic::mips_sat_u_w: 2119 case Intrinsic::mips_sat_u_d: { 2120 // Report an error for out of range values. 2121 int64_t Max; 2122 switch (Intrinsic) { 2123 case Intrinsic::mips_sat_s_b: 2124 case Intrinsic::mips_sat_u_b: Max = 7; break; 2125 case Intrinsic::mips_sat_s_h: 2126 case Intrinsic::mips_sat_u_h: Max = 15; break; 2127 case Intrinsic::mips_sat_s_w: 2128 case Intrinsic::mips_sat_u_w: Max = 31; break; 2129 case Intrinsic::mips_sat_s_d: 2130 case Intrinsic::mips_sat_u_d: Max = 63; break; 2131 default: llvm_unreachable("Unmatched intrinsic"); 2132 } 2133 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2134 if (Value < 0 || Value > Max) 2135 report_fatal_error("Immediate out of range"); 2136 return SDValue(); 2137 } 2138 case Intrinsic::mips_shf_b: 2139 case Intrinsic::mips_shf_h: 2140 case Intrinsic::mips_shf_w: { 2141 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2142 if (Value < 0 || Value > 255) 2143 report_fatal_error("Immediate out of range"); 2144 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0), 2145 Op->getOperand(2), Op->getOperand(1)); 2146 } 2147 case Intrinsic::mips_sldi_b: 2148 case Intrinsic::mips_sldi_h: 2149 case Intrinsic::mips_sldi_w: 2150 case Intrinsic::mips_sldi_d: { 2151 // Report an error for out of range values. 2152 int64_t Max; 2153 switch (Intrinsic) { 2154 case Intrinsic::mips_sldi_b: Max = 15; break; 2155 case Intrinsic::mips_sldi_h: Max = 7; break; 2156 case Intrinsic::mips_sldi_w: Max = 3; break; 2157 case Intrinsic::mips_sldi_d: Max = 1; break; 2158 default: llvm_unreachable("Unmatched intrinsic"); 2159 } 2160 int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue(); 2161 if (Value < 0 || Value > Max) 2162 report_fatal_error("Immediate out of range"); 2163 return SDValue(); 2164 } 2165 case Intrinsic::mips_sll_b: 2166 case Intrinsic::mips_sll_h: 2167 case Intrinsic::mips_sll_w: 2168 case Intrinsic::mips_sll_d: 2169 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), 2170 truncateVecElts(Op, DAG)); 2171 case Intrinsic::mips_slli_b: 2172 case Intrinsic::mips_slli_h: 2173 case Intrinsic::mips_slli_w: 2174 case Intrinsic::mips_slli_d: 2175 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), 2176 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2177 case Intrinsic::mips_splat_b: 2178 case Intrinsic::mips_splat_h: 2179 case Intrinsic::mips_splat_w: 2180 case Intrinsic::mips_splat_d: 2181 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle 2182 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because 2183 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32. 2184 // Instead we lower to MipsISD::VSHF and match from there. 2185 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2186 lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1), 2187 Op->getOperand(1)); 2188 case Intrinsic::mips_splati_b: 2189 case Intrinsic::mips_splati_h: 2190 case Intrinsic::mips_splati_w: 2191 case Intrinsic::mips_splati_d: 2192 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2193 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), 2194 Op->getOperand(1)); 2195 case Intrinsic::mips_sra_b: 2196 case Intrinsic::mips_sra_h: 2197 case Intrinsic::mips_sra_w: 2198 case Intrinsic::mips_sra_d: 2199 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), 2200 truncateVecElts(Op, DAG)); 2201 case Intrinsic::mips_srai_b: 2202 case Intrinsic::mips_srai_h: 2203 case Intrinsic::mips_srai_w: 2204 case Intrinsic::mips_srai_d: 2205 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), 2206 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2207 case Intrinsic::mips_srari_b: 2208 case Intrinsic::mips_srari_h: 2209 case Intrinsic::mips_srari_w: 2210 case Intrinsic::mips_srari_d: { 2211 // Report an error for out of range values. 2212 int64_t Max; 2213 switch (Intrinsic) { 2214 case Intrinsic::mips_srari_b: Max = 7; break; 2215 case Intrinsic::mips_srari_h: Max = 15; break; 2216 case Intrinsic::mips_srari_w: Max = 31; break; 2217 case Intrinsic::mips_srari_d: Max = 63; break; 2218 default: llvm_unreachable("Unmatched intrinsic"); 2219 } 2220 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2221 if (Value < 0 || Value > Max) 2222 report_fatal_error("Immediate out of range"); 2223 return SDValue(); 2224 } 2225 case Intrinsic::mips_srl_b: 2226 case Intrinsic::mips_srl_h: 2227 case Intrinsic::mips_srl_w: 2228 case Intrinsic::mips_srl_d: 2229 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), 2230 truncateVecElts(Op, DAG)); 2231 case Intrinsic::mips_srli_b: 2232 case Intrinsic::mips_srli_h: 2233 case Intrinsic::mips_srli_w: 2234 case Intrinsic::mips_srli_d: 2235 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), 2236 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2237 case Intrinsic::mips_srlri_b: 2238 case Intrinsic::mips_srlri_h: 2239 case Intrinsic::mips_srlri_w: 2240 case Intrinsic::mips_srlri_d: { 2241 // Report an error for out of range values. 2242 int64_t Max; 2243 switch (Intrinsic) { 2244 case Intrinsic::mips_srlri_b: Max = 7; break; 2245 case Intrinsic::mips_srlri_h: Max = 15; break; 2246 case Intrinsic::mips_srlri_w: Max = 31; break; 2247 case Intrinsic::mips_srlri_d: Max = 63; break; 2248 default: llvm_unreachable("Unmatched intrinsic"); 2249 } 2250 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2251 if (Value < 0 || Value > Max) 2252 report_fatal_error("Immediate out of range"); 2253 return SDValue(); 2254 } 2255 case Intrinsic::mips_subv_b: 2256 case Intrinsic::mips_subv_h: 2257 case Intrinsic::mips_subv_w: 2258 case Intrinsic::mips_subv_d: 2259 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), 2260 Op->getOperand(2)); 2261 case Intrinsic::mips_subvi_b: 2262 case Intrinsic::mips_subvi_h: 2263 case Intrinsic::mips_subvi_w: 2264 case Intrinsic::mips_subvi_d: 2265 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), 2266 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2267 case Intrinsic::mips_vshf_b: 2268 case Intrinsic::mips_vshf_h: 2269 case Intrinsic::mips_vshf_w: 2270 case Intrinsic::mips_vshf_d: 2271 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2272 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 2273 case Intrinsic::mips_xor_v: 2274 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), 2275 Op->getOperand(2)); 2276 case Intrinsic::mips_xori_b: 2277 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), 2278 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2279 case Intrinsic::thread_pointer: { 2280 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2281 return DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT); 2282 } 2283 } 2284 } 2285 2286 static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2287 const MipsSubtarget &Subtarget) { 2288 SDLoc DL(Op); 2289 SDValue ChainIn = Op->getOperand(0); 2290 SDValue Address = Op->getOperand(2); 2291 SDValue Offset = Op->getOperand(3); 2292 EVT ResTy = Op->getValueType(0); 2293 EVT PtrTy = Address->getValueType(0); 2294 2295 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2296 // however takes an i32 signed constant offset. The actual type of the 2297 // intrinsic is a scaled signed i10. 2298 if (Subtarget.isABI_N64()) 2299 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2300 2301 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2302 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), 2303 Align(16)); 2304 } 2305 2306 SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 2307 SelectionDAG &DAG) const { 2308 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2309 switch (Intr) { 2310 default: 2311 return SDValue(); 2312 case Intrinsic::mips_extp: 2313 return lowerDSPIntr(Op, DAG, MipsISD::EXTP); 2314 case Intrinsic::mips_extpdp: 2315 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); 2316 case Intrinsic::mips_extr_w: 2317 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); 2318 case Intrinsic::mips_extr_r_w: 2319 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); 2320 case Intrinsic::mips_extr_rs_w: 2321 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); 2322 case Intrinsic::mips_extr_s_h: 2323 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); 2324 case Intrinsic::mips_mthlip: 2325 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); 2326 case Intrinsic::mips_mulsaq_s_w_ph: 2327 return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); 2328 case Intrinsic::mips_maq_s_w_phl: 2329 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); 2330 case Intrinsic::mips_maq_s_w_phr: 2331 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); 2332 case Intrinsic::mips_maq_sa_w_phl: 2333 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); 2334 case Intrinsic::mips_maq_sa_w_phr: 2335 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); 2336 case Intrinsic::mips_dpaq_s_w_ph: 2337 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); 2338 case Intrinsic::mips_dpsq_s_w_ph: 2339 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); 2340 case Intrinsic::mips_dpaq_sa_l_w: 2341 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); 2342 case Intrinsic::mips_dpsq_sa_l_w: 2343 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); 2344 case Intrinsic::mips_dpaqx_s_w_ph: 2345 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); 2346 case Intrinsic::mips_dpaqx_sa_w_ph: 2347 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); 2348 case Intrinsic::mips_dpsqx_s_w_ph: 2349 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); 2350 case Intrinsic::mips_dpsqx_sa_w_ph: 2351 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); 2352 case Intrinsic::mips_ld_b: 2353 case Intrinsic::mips_ld_h: 2354 case Intrinsic::mips_ld_w: 2355 case Intrinsic::mips_ld_d: 2356 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget); 2357 } 2358 } 2359 2360 static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2361 const MipsSubtarget &Subtarget) { 2362 SDLoc DL(Op); 2363 SDValue ChainIn = Op->getOperand(0); 2364 SDValue Value = Op->getOperand(2); 2365 SDValue Address = Op->getOperand(3); 2366 SDValue Offset = Op->getOperand(4); 2367 EVT PtrTy = Address->getValueType(0); 2368 2369 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2370 // however takes an i32 signed constant offset. The actual type of the 2371 // intrinsic is a scaled signed i10. 2372 if (Subtarget.isABI_N64()) 2373 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2374 2375 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2376 2377 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), 2378 Align(16)); 2379 } 2380 2381 SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, 2382 SelectionDAG &DAG) const { 2383 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2384 switch (Intr) { 2385 default: 2386 return SDValue(); 2387 case Intrinsic::mips_st_b: 2388 case Intrinsic::mips_st_h: 2389 case Intrinsic::mips_st_w: 2390 case Intrinsic::mips_st_d: 2391 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget); 2392 } 2393 } 2394 2395 // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. 2396 // 2397 // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We 2398 // choose to sign-extend but we could have equally chosen zero-extend. The 2399 // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT 2400 // result into this node later (possibly changing it to a zero-extend in the 2401 // process). 2402 SDValue MipsSETargetLowering:: 2403 lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { 2404 SDLoc DL(Op); 2405 EVT ResTy = Op->getValueType(0); 2406 SDValue Op0 = Op->getOperand(0); 2407 EVT VecTy = Op0->getValueType(0); 2408 2409 if (!VecTy.is128BitVector()) 2410 return SDValue(); 2411 2412 if (ResTy.isInteger()) { 2413 SDValue Op1 = Op->getOperand(1); 2414 EVT EltTy = VecTy.getVectorElementType(); 2415 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, 2416 DAG.getValueType(EltTy)); 2417 } 2418 2419 return Op; 2420 } 2421 2422 static bool isConstantOrUndef(const SDValue Op) { 2423 if (Op->isUndef()) 2424 return true; 2425 if (isa<ConstantSDNode>(Op)) 2426 return true; 2427 if (isa<ConstantFPSDNode>(Op)) 2428 return true; 2429 return false; 2430 } 2431 2432 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 2433 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 2434 if (isConstantOrUndef(Op->getOperand(i))) 2435 return true; 2436 return false; 2437 } 2438 2439 // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the 2440 // backend. 2441 // 2442 // Lowers according to the following rules: 2443 // - Constant splats are legal as-is as long as the SplatBitSize is a power of 2444 // 2 less than or equal to 64 and the value fits into a signed 10-bit 2445 // immediate 2446 // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize 2447 // is a power of 2 less than or equal to 64 and the value does not fit into a 2448 // signed 10-bit immediate 2449 // - Non-constant splats are legal as-is. 2450 // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. 2451 // - All others are illegal and must be expanded. 2452 SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, 2453 SelectionDAG &DAG) const { 2454 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 2455 EVT ResTy = Op->getValueType(0); 2456 SDLoc DL(Op); 2457 APInt SplatValue, SplatUndef; 2458 unsigned SplatBitSize; 2459 bool HasAnyUndefs; 2460 2461 if (!Subtarget.hasMSA() || !ResTy.is128BitVector()) 2462 return SDValue(); 2463 2464 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 2465 HasAnyUndefs, 8, 2466 !Subtarget.isLittle()) && SplatBitSize <= 64) { 2467 // We can only cope with 8, 16, 32, or 64-bit elements 2468 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 2469 SplatBitSize != 64) 2470 return SDValue(); 2471 2472 // If the value isn't an integer type we will have to bitcast 2473 // from an integer type first. Also, if there are any undefs, we must 2474 // lower them to defined values first. 2475 if (ResTy.isInteger() && !HasAnyUndefs) 2476 return Op; 2477 2478 EVT ViaVecTy; 2479 2480 switch (SplatBitSize) { 2481 default: 2482 return SDValue(); 2483 case 8: 2484 ViaVecTy = MVT::v16i8; 2485 break; 2486 case 16: 2487 ViaVecTy = MVT::v8i16; 2488 break; 2489 case 32: 2490 ViaVecTy = MVT::v4i32; 2491 break; 2492 case 64: 2493 // There's no fill.d to fall back on for 64-bit values 2494 return SDValue(); 2495 } 2496 2497 // SelectionDAG::getConstant will promote SplatValue appropriately. 2498 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); 2499 2500 // Bitcast to the type we originally wanted 2501 if (ViaVecTy != ResTy) 2502 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 2503 2504 return Result; 2505 } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false)) 2506 return Op; 2507 else if (!isConstantOrUndefBUILD_VECTOR(Node)) { 2508 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 2509 // The resulting code is the same length as the expansion, but it doesn't 2510 // use memory operations 2511 EVT ResTy = Node->getValueType(0); 2512 2513 assert(ResTy.isVector()); 2514 2515 unsigned NumElts = ResTy.getVectorNumElements(); 2516 SDValue Vector = DAG.getUNDEF(ResTy); 2517 for (unsigned i = 0; i < NumElts; ++i) { 2518 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 2519 Node->getOperand(i), 2520 DAG.getConstant(i, DL, MVT::i32)); 2521 } 2522 return Vector; 2523 } 2524 2525 return SDValue(); 2526 } 2527 2528 // Lower VECTOR_SHUFFLE into SHF (if possible). 2529 // 2530 // SHF splits the vector into blocks of four elements, then shuffles these 2531 // elements according to a <4 x i2> constant (encoded as an integer immediate). 2532 // 2533 // It is therefore possible to lower into SHF when the mask takes the form: 2534 // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> 2535 // When undef's appear they are treated as if they were whatever value is 2536 // necessary in order to fit the above forms. 2537 // 2538 // For example: 2539 // %2 = shufflevector <8 x i16> %0, <8 x i16> undef, 2540 // <8 x i32> <i32 3, i32 2, i32 1, i32 0, 2541 // i32 7, i32 6, i32 5, i32 4> 2542 // is lowered to: 2543 // (SHF_H $w0, $w1, 27) 2544 // where the 27 comes from: 2545 // 3 + (2 << 2) + (1 << 4) + (0 << 6) 2546 static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, 2547 SmallVector<int, 16> Indices, 2548 SelectionDAG &DAG) { 2549 int SHFIndices[4] = { -1, -1, -1, -1 }; 2550 2551 if (Indices.size() < 4) 2552 return SDValue(); 2553 2554 for (unsigned i = 0; i < 4; ++i) { 2555 for (unsigned j = i; j < Indices.size(); j += 4) { 2556 int Idx = Indices[j]; 2557 2558 // Convert from vector index to 4-element subvector index 2559 // If an index refers to an element outside of the subvector then give up 2560 if (Idx != -1) { 2561 Idx -= 4 * (j / 4); 2562 if (Idx < 0 || Idx >= 4) 2563 return SDValue(); 2564 } 2565 2566 // If the mask has an undef, replace it with the current index. 2567 // Note that it might still be undef if the current index is also undef 2568 if (SHFIndices[i] == -1) 2569 SHFIndices[i] = Idx; 2570 2571 // Check that non-undef values are the same as in the mask. If they 2572 // aren't then give up 2573 if (!(Idx == -1 || Idx == SHFIndices[i])) 2574 return SDValue(); 2575 } 2576 } 2577 2578 // Calculate the immediate. Replace any remaining undefs with zero 2579 APInt Imm(32, 0); 2580 for (int i = 3; i >= 0; --i) { 2581 int Idx = SHFIndices[i]; 2582 2583 if (Idx == -1) 2584 Idx = 0; 2585 2586 Imm <<= 2; 2587 Imm |= Idx & 0x3; 2588 } 2589 2590 SDLoc DL(Op); 2591 return DAG.getNode(MipsISD::SHF, DL, ResTy, 2592 DAG.getTargetConstant(Imm, DL, MVT::i32), 2593 Op->getOperand(0)); 2594 } 2595 2596 /// Determine whether a range fits a regular pattern of values. 2597 /// This function accounts for the possibility of jumping over the End iterator. 2598 template <typename ValType> 2599 static bool 2600 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, 2601 unsigned CheckStride, 2602 typename SmallVectorImpl<ValType>::const_iterator End, 2603 ValType ExpectedIndex, unsigned ExpectedIndexStride) { 2604 auto &I = Begin; 2605 2606 while (I != End) { 2607 if (*I != -1 && *I != ExpectedIndex) 2608 return false; 2609 ExpectedIndex += ExpectedIndexStride; 2610 2611 // Incrementing past End is undefined behaviour so we must increment one 2612 // step at a time and check for End at each step. 2613 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) 2614 ; // Empty loop body. 2615 } 2616 return true; 2617 } 2618 2619 // Determine whether VECTOR_SHUFFLE is a SPLATI. 2620 // 2621 // It is a SPLATI when the mask is: 2622 // <x, x, x, ...> 2623 // where x is any valid index. 2624 // 2625 // When undef's appear in the mask they are treated as if they were whatever 2626 // value is necessary in order to fit the above form. 2627 static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, 2628 SmallVector<int, 16> Indices, 2629 SelectionDAG &DAG) { 2630 assert((Indices.size() % 2) == 0); 2631 2632 int SplatIndex = -1; 2633 for (const auto &V : Indices) { 2634 if (V != -1) { 2635 SplatIndex = V; 2636 break; 2637 } 2638 } 2639 2640 return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex, 2641 0); 2642 } 2643 2644 // Lower VECTOR_SHUFFLE into ILVEV (if possible). 2645 // 2646 // ILVEV interleaves the even elements from each vector. 2647 // 2648 // It is possible to lower into ILVEV when the mask consists of two of the 2649 // following forms interleaved: 2650 // <0, 2, 4, ...> 2651 // <n, n+2, n+4, ...> 2652 // where n is the number of elements in the vector. 2653 // For example: 2654 // <0, 0, 2, 2, 4, 4, ...> 2655 // <0, n, 2, n+2, 4, n+4, ...> 2656 // 2657 // When undef's appear in the mask they are treated as if they were whatever 2658 // value is necessary in order to fit the above forms. 2659 static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, 2660 SmallVector<int, 16> Indices, 2661 SelectionDAG &DAG) { 2662 assert((Indices.size() % 2) == 0); 2663 2664 SDValue Wt; 2665 SDValue Ws; 2666 const auto &Begin = Indices.begin(); 2667 const auto &End = Indices.end(); 2668 2669 // Check even elements are taken from the even elements of one half or the 2670 // other and pick an operand accordingly. 2671 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2)) 2672 Wt = Op->getOperand(0); 2673 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2)) 2674 Wt = Op->getOperand(1); 2675 else 2676 return SDValue(); 2677 2678 // Check odd elements are taken from the even elements of one half or the 2679 // other and pick an operand accordingly. 2680 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2)) 2681 Ws = Op->getOperand(0); 2682 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2)) 2683 Ws = Op->getOperand(1); 2684 else 2685 return SDValue(); 2686 2687 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt); 2688 } 2689 2690 // Lower VECTOR_SHUFFLE into ILVOD (if possible). 2691 // 2692 // ILVOD interleaves the odd elements from each vector. 2693 // 2694 // It is possible to lower into ILVOD when the mask consists of two of the 2695 // following forms interleaved: 2696 // <1, 3, 5, ...> 2697 // <n+1, n+3, n+5, ...> 2698 // where n is the number of elements in the vector. 2699 // For example: 2700 // <1, 1, 3, 3, 5, 5, ...> 2701 // <1, n+1, 3, n+3, 5, n+5, ...> 2702 // 2703 // When undef's appear in the mask they are treated as if they were whatever 2704 // value is necessary in order to fit the above forms. 2705 static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, 2706 SmallVector<int, 16> Indices, 2707 SelectionDAG &DAG) { 2708 assert((Indices.size() % 2) == 0); 2709 2710 SDValue Wt; 2711 SDValue Ws; 2712 const auto &Begin = Indices.begin(); 2713 const auto &End = Indices.end(); 2714 2715 // Check even elements are taken from the odd elements of one half or the 2716 // other and pick an operand accordingly. 2717 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2)) 2718 Wt = Op->getOperand(0); 2719 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2)) 2720 Wt = Op->getOperand(1); 2721 else 2722 return SDValue(); 2723 2724 // Check odd elements are taken from the odd elements of one half or the 2725 // other and pick an operand accordingly. 2726 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2)) 2727 Ws = Op->getOperand(0); 2728 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2)) 2729 Ws = Op->getOperand(1); 2730 else 2731 return SDValue(); 2732 2733 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Wt, Ws); 2734 } 2735 2736 // Lower VECTOR_SHUFFLE into ILVR (if possible). 2737 // 2738 // ILVR interleaves consecutive elements from the right (lowest-indexed) half of 2739 // each vector. 2740 // 2741 // It is possible to lower into ILVR when the mask consists of two of the 2742 // following forms interleaved: 2743 // <0, 1, 2, ...> 2744 // <n, n+1, n+2, ...> 2745 // where n is the number of elements in the vector. 2746 // For example: 2747 // <0, 0, 1, 1, 2, 2, ...> 2748 // <0, n, 1, n+1, 2, n+2, ...> 2749 // 2750 // When undef's appear in the mask they are treated as if they were whatever 2751 // value is necessary in order to fit the above forms. 2752 static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, 2753 SmallVector<int, 16> Indices, 2754 SelectionDAG &DAG) { 2755 assert((Indices.size() % 2) == 0); 2756 2757 SDValue Wt; 2758 SDValue Ws; 2759 const auto &Begin = Indices.begin(); 2760 const auto &End = Indices.end(); 2761 2762 // Check even elements are taken from the right (lowest-indexed) elements of 2763 // one half or the other and pick an operand accordingly. 2764 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1)) 2765 Wt = Op->getOperand(0); 2766 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1)) 2767 Wt = Op->getOperand(1); 2768 else 2769 return SDValue(); 2770 2771 // Check odd elements are taken from the right (lowest-indexed) elements of 2772 // one half or the other and pick an operand accordingly. 2773 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1)) 2774 Ws = Op->getOperand(0); 2775 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1)) 2776 Ws = Op->getOperand(1); 2777 else 2778 return SDValue(); 2779 2780 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt); 2781 } 2782 2783 // Lower VECTOR_SHUFFLE into ILVL (if possible). 2784 // 2785 // ILVL interleaves consecutive elements from the left (highest-indexed) half 2786 // of each vector. 2787 // 2788 // It is possible to lower into ILVL when the mask consists of two of the 2789 // following forms interleaved: 2790 // <x, x+1, x+2, ...> 2791 // <n+x, n+x+1, n+x+2, ...> 2792 // where n is the number of elements in the vector and x is half n. 2793 // For example: 2794 // <x, x, x+1, x+1, x+2, x+2, ...> 2795 // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> 2796 // 2797 // When undef's appear in the mask they are treated as if they were whatever 2798 // value is necessary in order to fit the above forms. 2799 static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, 2800 SmallVector<int, 16> Indices, 2801 SelectionDAG &DAG) { 2802 assert((Indices.size() % 2) == 0); 2803 2804 unsigned HalfSize = Indices.size() / 2; 2805 SDValue Wt; 2806 SDValue Ws; 2807 const auto &Begin = Indices.begin(); 2808 const auto &End = Indices.end(); 2809 2810 // Check even elements are taken from the left (highest-indexed) elements of 2811 // one half or the other and pick an operand accordingly. 2812 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1)) 2813 Wt = Op->getOperand(0); 2814 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1)) 2815 Wt = Op->getOperand(1); 2816 else 2817 return SDValue(); 2818 2819 // Check odd elements are taken from the left (highest-indexed) elements of 2820 // one half or the other and pick an operand accordingly. 2821 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1)) 2822 Ws = Op->getOperand(0); 2823 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize, 2824 1)) 2825 Ws = Op->getOperand(1); 2826 else 2827 return SDValue(); 2828 2829 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt); 2830 } 2831 2832 // Lower VECTOR_SHUFFLE into PCKEV (if possible). 2833 // 2834 // PCKEV copies the even elements of each vector into the result vector. 2835 // 2836 // It is possible to lower into PCKEV when the mask consists of two of the 2837 // following forms concatenated: 2838 // <0, 2, 4, ...> 2839 // <n, n+2, n+4, ...> 2840 // where n is the number of elements in the vector. 2841 // For example: 2842 // <0, 2, 4, ..., 0, 2, 4, ...> 2843 // <0, 2, 4, ..., n, n+2, n+4, ...> 2844 // 2845 // When undef's appear in the mask they are treated as if they were whatever 2846 // value is necessary in order to fit the above forms. 2847 static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, 2848 SmallVector<int, 16> Indices, 2849 SelectionDAG &DAG) { 2850 assert((Indices.size() % 2) == 0); 2851 2852 SDValue Wt; 2853 SDValue Ws; 2854 const auto &Begin = Indices.begin(); 2855 const auto &Mid = Indices.begin() + Indices.size() / 2; 2856 const auto &End = Indices.end(); 2857 2858 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2)) 2859 Wt = Op->getOperand(0); 2860 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2)) 2861 Wt = Op->getOperand(1); 2862 else 2863 return SDValue(); 2864 2865 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2)) 2866 Ws = Op->getOperand(0); 2867 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2)) 2868 Ws = Op->getOperand(1); 2869 else 2870 return SDValue(); 2871 2872 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt); 2873 } 2874 2875 // Lower VECTOR_SHUFFLE into PCKOD (if possible). 2876 // 2877 // PCKOD copies the odd elements of each vector into the result vector. 2878 // 2879 // It is possible to lower into PCKOD when the mask consists of two of the 2880 // following forms concatenated: 2881 // <1, 3, 5, ...> 2882 // <n+1, n+3, n+5, ...> 2883 // where n is the number of elements in the vector. 2884 // For example: 2885 // <1, 3, 5, ..., 1, 3, 5, ...> 2886 // <1, 3, 5, ..., n+1, n+3, n+5, ...> 2887 // 2888 // When undef's appear in the mask they are treated as if they were whatever 2889 // value is necessary in order to fit the above forms. 2890 static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, 2891 SmallVector<int, 16> Indices, 2892 SelectionDAG &DAG) { 2893 assert((Indices.size() % 2) == 0); 2894 2895 SDValue Wt; 2896 SDValue Ws; 2897 const auto &Begin = Indices.begin(); 2898 const auto &Mid = Indices.begin() + Indices.size() / 2; 2899 const auto &End = Indices.end(); 2900 2901 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2)) 2902 Wt = Op->getOperand(0); 2903 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2)) 2904 Wt = Op->getOperand(1); 2905 else 2906 return SDValue(); 2907 2908 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2)) 2909 Ws = Op->getOperand(0); 2910 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2)) 2911 Ws = Op->getOperand(1); 2912 else 2913 return SDValue(); 2914 2915 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt); 2916 } 2917 2918 // Lower VECTOR_SHUFFLE into VSHF. 2919 // 2920 // This mostly consists of converting the shuffle indices in Indices into a 2921 // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is 2922 // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, 2923 // if the type is v8i16 and all the indices are less than 8 then the second 2924 // operand is unused and can be replaced with anything. We choose to replace it 2925 // with the used operand since this reduces the number of instructions overall. 2926 static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, 2927 const SmallVector<int, 16> &Indices, 2928 SelectionDAG &DAG) { 2929 SmallVector<SDValue, 16> Ops; 2930 SDValue Op0; 2931 SDValue Op1; 2932 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); 2933 EVT MaskEltTy = MaskVecTy.getVectorElementType(); 2934 bool Using1stVec = false; 2935 bool Using2ndVec = false; 2936 SDLoc DL(Op); 2937 int ResTyNumElts = ResTy.getVectorNumElements(); 2938 2939 for (int i = 0; i < ResTyNumElts; ++i) { 2940 // Idx == -1 means UNDEF 2941 int Idx = Indices[i]; 2942 2943 if (0 <= Idx && Idx < ResTyNumElts) 2944 Using1stVec = true; 2945 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) 2946 Using2ndVec = true; 2947 } 2948 2949 for (int Idx : Indices) 2950 Ops.push_back(DAG.getTargetConstant(Idx, DL, MaskEltTy)); 2951 2952 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); 2953 2954 if (Using1stVec && Using2ndVec) { 2955 Op0 = Op->getOperand(0); 2956 Op1 = Op->getOperand(1); 2957 } else if (Using1stVec) 2958 Op0 = Op1 = Op->getOperand(0); 2959 else if (Using2ndVec) 2960 Op0 = Op1 = Op->getOperand(1); 2961 else 2962 llvm_unreachable("shuffle vector mask references neither vector operand?"); 2963 2964 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. 2965 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> 2966 // VSHF concatenates the vectors in a bitwise fashion: 2967 // <0b00, 0b01> + <0b10, 0b11> -> 2968 // 0b0100 + 0b1110 -> 0b01001110 2969 // <0b10, 0b11, 0b00, 0b01> 2970 // We must therefore swap the operands to get the correct result. 2971 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0); 2972 } 2973 2974 // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the 2975 // indices in the shuffle. 2976 SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 2977 SelectionDAG &DAG) const { 2978 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op); 2979 EVT ResTy = Op->getValueType(0); 2980 2981 if (!ResTy.is128BitVector()) 2982 return SDValue(); 2983 2984 int ResTyNumElts = ResTy.getVectorNumElements(); 2985 SmallVector<int, 16> Indices; 2986 2987 for (int i = 0; i < ResTyNumElts; ++i) 2988 Indices.push_back(Node->getMaskElt(i)); 2989 2990 // splati.[bhwd] is preferable to the others but is matched from 2991 // MipsISD::VSHF. 2992 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG)) 2993 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 2994 SDValue Result; 2995 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG))) 2996 return Result; 2997 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG))) 2998 return Result; 2999 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG))) 3000 return Result; 3001 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG))) 3002 return Result; 3003 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG))) 3004 return Result; 3005 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG))) 3006 return Result; 3007 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG))) 3008 return Result; 3009 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 3010 } 3011 3012 MachineBasicBlock * 3013 MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI, 3014 MachineBasicBlock *BB) const { 3015 // $bb: 3016 // bposge32_pseudo $vr0 3017 // => 3018 // $bb: 3019 // bposge32 $tbb 3020 // $fbb: 3021 // li $vr2, 0 3022 // b $sink 3023 // $tbb: 3024 // li $vr1, 1 3025 // $sink: 3026 // $vr0 = phi($vr2, $fbb, $vr1, $tbb) 3027 3028 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3029 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3030 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3031 DebugLoc DL = MI.getDebugLoc(); 3032 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3033 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3034 MachineFunction *F = BB->getParent(); 3035 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3036 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3037 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3038 F->insert(It, FBB); 3039 F->insert(It, TBB); 3040 F->insert(It, Sink); 3041 3042 // Transfer the remainder of BB and its successor edges to Sink. 3043 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3044 BB->end()); 3045 Sink->transferSuccessorsAndUpdatePHIs(BB); 3046 3047 // Add successors. 3048 BB->addSuccessor(FBB); 3049 BB->addSuccessor(TBB); 3050 FBB->addSuccessor(Sink); 3051 TBB->addSuccessor(Sink); 3052 3053 // Insert the real bposge32 instruction to $BB. 3054 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); 3055 // Insert the real bposge32c instruction to $BB. 3056 BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB); 3057 3058 // Fill $FBB. 3059 Register VR2 = RegInfo.createVirtualRegister(RC); 3060 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) 3061 .addReg(Mips::ZERO).addImm(0); 3062 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3063 3064 // Fill $TBB. 3065 Register VR1 = RegInfo.createVirtualRegister(RC); 3066 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) 3067 .addReg(Mips::ZERO).addImm(1); 3068 3069 // Insert phi function to $Sink. 3070 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3071 MI.getOperand(0).getReg()) 3072 .addReg(VR2) 3073 .addMBB(FBB) 3074 .addReg(VR1) 3075 .addMBB(TBB); 3076 3077 MI.eraseFromParent(); // The pseudo instruction is gone now. 3078 return Sink; 3079 } 3080 3081 MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo( 3082 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { 3083 // $bb: 3084 // vany_nonzero $rd, $ws 3085 // => 3086 // $bb: 3087 // bnz.b $ws, $tbb 3088 // b $fbb 3089 // $fbb: 3090 // li $rd1, 0 3091 // b $sink 3092 // $tbb: 3093 // li $rd2, 1 3094 // $sink: 3095 // $rd = phi($rd1, $fbb, $rd2, $tbb) 3096 3097 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3098 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3099 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3100 DebugLoc DL = MI.getDebugLoc(); 3101 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3102 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3103 MachineFunction *F = BB->getParent(); 3104 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3105 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3106 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3107 F->insert(It, FBB); 3108 F->insert(It, TBB); 3109 F->insert(It, Sink); 3110 3111 // Transfer the remainder of BB and its successor edges to Sink. 3112 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3113 BB->end()); 3114 Sink->transferSuccessorsAndUpdatePHIs(BB); 3115 3116 // Add successors. 3117 BB->addSuccessor(FBB); 3118 BB->addSuccessor(TBB); 3119 FBB->addSuccessor(Sink); 3120 TBB->addSuccessor(Sink); 3121 3122 // Insert the real bnz.b instruction to $BB. 3123 BuildMI(BB, DL, TII->get(BranchOp)) 3124 .addReg(MI.getOperand(1).getReg()) 3125 .addMBB(TBB); 3126 3127 // Fill $FBB. 3128 Register RD1 = RegInfo.createVirtualRegister(RC); 3129 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) 3130 .addReg(Mips::ZERO).addImm(0); 3131 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3132 3133 // Fill $TBB. 3134 Register RD2 = RegInfo.createVirtualRegister(RC); 3135 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) 3136 .addReg(Mips::ZERO).addImm(1); 3137 3138 // Insert phi function to $Sink. 3139 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3140 MI.getOperand(0).getReg()) 3141 .addReg(RD1) 3142 .addMBB(FBB) 3143 .addReg(RD2) 3144 .addMBB(TBB); 3145 3146 MI.eraseFromParent(); // The pseudo instruction is gone now. 3147 return Sink; 3148 } 3149 3150 // Emit the COPY_FW pseudo instruction. 3151 // 3152 // copy_fw_pseudo $fd, $ws, n 3153 // => 3154 // copy_u_w $rt, $ws, $n 3155 // mtc1 $rt, $fd 3156 // 3157 // When n is zero, the equivalent operation can be performed with (potentially) 3158 // zero instructions due to register overlaps. This optimization is never valid 3159 // for lane 1 because it would require FR=0 mode which isn't supported by MSA. 3160 MachineBasicBlock * 3161 MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI, 3162 MachineBasicBlock *BB) const { 3163 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3164 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3165 DebugLoc DL = MI.getDebugLoc(); 3166 Register Fd = MI.getOperand(0).getReg(); 3167 Register Ws = MI.getOperand(1).getReg(); 3168 unsigned Lane = MI.getOperand(2).getImm(); 3169 3170 if (Lane == 0) { 3171 unsigned Wt = Ws; 3172 if (!Subtarget.useOddSPReg()) { 3173 // We must copy to an even-numbered MSA register so that the 3174 // single-precision sub-register is also guaranteed to be even-numbered. 3175 Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass); 3176 3177 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws); 3178 } 3179 3180 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3181 } else { 3182 Register Wt = RegInfo.createVirtualRegister( 3183 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3184 : &Mips::MSA128WEvensRegClass); 3185 3186 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); 3187 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3188 } 3189 3190 MI.eraseFromParent(); // The pseudo instruction is gone now. 3191 return BB; 3192 } 3193 3194 // Emit the COPY_FD pseudo instruction. 3195 // 3196 // copy_fd_pseudo $fd, $ws, n 3197 // => 3198 // splati.d $wt, $ws, $n 3199 // copy $fd, $wt:sub_64 3200 // 3201 // When n is zero, the equivalent operation can be performed with (potentially) 3202 // zero instructions due to register overlaps. This optimization is always 3203 // valid because FR=1 mode which is the only supported mode in MSA. 3204 MachineBasicBlock * 3205 MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI, 3206 MachineBasicBlock *BB) const { 3207 assert(Subtarget.isFP64bit()); 3208 3209 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3210 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3211 Register Fd = MI.getOperand(0).getReg(); 3212 Register Ws = MI.getOperand(1).getReg(); 3213 unsigned Lane = MI.getOperand(2).getImm() * 2; 3214 DebugLoc DL = MI.getDebugLoc(); 3215 3216 if (Lane == 0) 3217 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64); 3218 else { 3219 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3220 3221 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1); 3222 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64); 3223 } 3224 3225 MI.eraseFromParent(); // The pseudo instruction is gone now. 3226 return BB; 3227 } 3228 3229 // Emit the INSERT_FW pseudo instruction. 3230 // 3231 // insert_fw_pseudo $wd, $wd_in, $n, $fs 3232 // => 3233 // subreg_to_reg $wt:sub_lo, $fs 3234 // insve_w $wd[$n], $wd_in, $wt[0] 3235 MachineBasicBlock * 3236 MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI, 3237 MachineBasicBlock *BB) const { 3238 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3239 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3240 DebugLoc DL = MI.getDebugLoc(); 3241 Register Wd = MI.getOperand(0).getReg(); 3242 Register Wd_in = MI.getOperand(1).getReg(); 3243 unsigned Lane = MI.getOperand(2).getImm(); 3244 Register Fs = MI.getOperand(3).getReg(); 3245 Register Wt = RegInfo.createVirtualRegister( 3246 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3247 : &Mips::MSA128WEvensRegClass); 3248 3249 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3250 .addImm(0) 3251 .addReg(Fs) 3252 .addImm(Mips::sub_lo); 3253 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd) 3254 .addReg(Wd_in) 3255 .addImm(Lane) 3256 .addReg(Wt) 3257 .addImm(0); 3258 3259 MI.eraseFromParent(); // The pseudo instruction is gone now. 3260 return BB; 3261 } 3262 3263 // Emit the INSERT_FD pseudo instruction. 3264 // 3265 // insert_fd_pseudo $wd, $fs, n 3266 // => 3267 // subreg_to_reg $wt:sub_64, $fs 3268 // insve_d $wd[$n], $wd_in, $wt[0] 3269 MachineBasicBlock * 3270 MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI, 3271 MachineBasicBlock *BB) const { 3272 assert(Subtarget.isFP64bit()); 3273 3274 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3275 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3276 DebugLoc DL = MI.getDebugLoc(); 3277 Register Wd = MI.getOperand(0).getReg(); 3278 Register Wd_in = MI.getOperand(1).getReg(); 3279 unsigned Lane = MI.getOperand(2).getImm(); 3280 Register Fs = MI.getOperand(3).getReg(); 3281 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3282 3283 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3284 .addImm(0) 3285 .addReg(Fs) 3286 .addImm(Mips::sub_64); 3287 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd) 3288 .addReg(Wd_in) 3289 .addImm(Lane) 3290 .addReg(Wt) 3291 .addImm(0); 3292 3293 MI.eraseFromParent(); // The pseudo instruction is gone now. 3294 return BB; 3295 } 3296 3297 // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. 3298 // 3299 // For integer: 3300 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) 3301 // => 3302 // (SLL $lanetmp1, $lane, <log2size) 3303 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3304 // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs) 3305 // (NEG $lanetmp2, $lanetmp1) 3306 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3307 // 3308 // For floating point: 3309 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs) 3310 // => 3311 // (SUBREG_TO_REG $wt, $fs, <subreg>) 3312 // (SLL $lanetmp1, $lane, <log2size) 3313 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3314 // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0) 3315 // (NEG $lanetmp2, $lanetmp1) 3316 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3317 MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( 3318 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes, 3319 bool IsFP) const { 3320 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3321 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3322 DebugLoc DL = MI.getDebugLoc(); 3323 Register Wd = MI.getOperand(0).getReg(); 3324 Register SrcVecReg = MI.getOperand(1).getReg(); 3325 Register LaneReg = MI.getOperand(2).getReg(); 3326 Register SrcValReg = MI.getOperand(3).getReg(); 3327 3328 const TargetRegisterClass *VecRC = nullptr; 3329 // FIXME: This should be true for N32 too. 3330 const TargetRegisterClass *GPRRC = 3331 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3332 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0; 3333 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL; 3334 unsigned EltLog2Size; 3335 unsigned InsertOp = 0; 3336 unsigned InsveOp = 0; 3337 switch (EltSizeInBytes) { 3338 default: 3339 llvm_unreachable("Unexpected size"); 3340 case 1: 3341 EltLog2Size = 0; 3342 InsertOp = Mips::INSERT_B; 3343 InsveOp = Mips::INSVE_B; 3344 VecRC = &Mips::MSA128BRegClass; 3345 break; 3346 case 2: 3347 EltLog2Size = 1; 3348 InsertOp = Mips::INSERT_H; 3349 InsveOp = Mips::INSVE_H; 3350 VecRC = &Mips::MSA128HRegClass; 3351 break; 3352 case 4: 3353 EltLog2Size = 2; 3354 InsertOp = Mips::INSERT_W; 3355 InsveOp = Mips::INSVE_W; 3356 VecRC = &Mips::MSA128WRegClass; 3357 break; 3358 case 8: 3359 EltLog2Size = 3; 3360 InsertOp = Mips::INSERT_D; 3361 InsveOp = Mips::INSVE_D; 3362 VecRC = &Mips::MSA128DRegClass; 3363 break; 3364 } 3365 3366 if (IsFP) { 3367 Register Wt = RegInfo.createVirtualRegister(VecRC); 3368 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3369 .addImm(0) 3370 .addReg(SrcValReg) 3371 .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo); 3372 SrcValReg = Wt; 3373 } 3374 3375 // Convert the lane index into a byte index 3376 if (EltSizeInBytes != 1) { 3377 Register LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); 3378 BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1) 3379 .addReg(LaneReg) 3380 .addImm(EltLog2Size); 3381 LaneReg = LaneTmp1; 3382 } 3383 3384 // Rotate bytes around so that the desired lane is element zero 3385 Register WdTmp1 = RegInfo.createVirtualRegister(VecRC); 3386 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1) 3387 .addReg(SrcVecReg) 3388 .addReg(SrcVecReg) 3389 .addReg(LaneReg, 0, SubRegIdx); 3390 3391 Register WdTmp2 = RegInfo.createVirtualRegister(VecRC); 3392 if (IsFP) { 3393 // Use insve.df to insert to element zero 3394 BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2) 3395 .addReg(WdTmp1) 3396 .addImm(0) 3397 .addReg(SrcValReg) 3398 .addImm(0); 3399 } else { 3400 // Use insert.df to insert to element zero 3401 BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2) 3402 .addReg(WdTmp1) 3403 .addReg(SrcValReg) 3404 .addImm(0); 3405 } 3406 3407 // Rotate elements the rest of the way for a full rotation. 3408 // sld.df inteprets $rt modulo the number of columns so we only need to negate 3409 // the lane index to do this. 3410 Register LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); 3411 BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB), 3412 LaneTmp2) 3413 .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO) 3414 .addReg(LaneReg); 3415 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd) 3416 .addReg(WdTmp2) 3417 .addReg(WdTmp2) 3418 .addReg(LaneTmp2, 0, SubRegIdx); 3419 3420 MI.eraseFromParent(); // The pseudo instruction is gone now. 3421 return BB; 3422 } 3423 3424 // Emit the FILL_FW pseudo instruction. 3425 // 3426 // fill_fw_pseudo $wd, $fs 3427 // => 3428 // implicit_def $wt1 3429 // insert_subreg $wt2:subreg_lo, $wt1, $fs 3430 // splati.w $wd, $wt2[0] 3431 MachineBasicBlock * 3432 MipsSETargetLowering::emitFILL_FW(MachineInstr &MI, 3433 MachineBasicBlock *BB) const { 3434 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3435 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3436 DebugLoc DL = MI.getDebugLoc(); 3437 Register Wd = MI.getOperand(0).getReg(); 3438 Register Fs = MI.getOperand(1).getReg(); 3439 Register Wt1 = RegInfo.createVirtualRegister( 3440 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3441 : &Mips::MSA128WEvensRegClass); 3442 Register Wt2 = RegInfo.createVirtualRegister( 3443 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3444 : &Mips::MSA128WEvensRegClass); 3445 3446 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3447 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3448 .addReg(Wt1) 3449 .addReg(Fs) 3450 .addImm(Mips::sub_lo); 3451 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0); 3452 3453 MI.eraseFromParent(); // The pseudo instruction is gone now. 3454 return BB; 3455 } 3456 3457 // Emit the FILL_FD pseudo instruction. 3458 // 3459 // fill_fd_pseudo $wd, $fs 3460 // => 3461 // implicit_def $wt1 3462 // insert_subreg $wt2:subreg_64, $wt1, $fs 3463 // splati.d $wd, $wt2[0] 3464 MachineBasicBlock * 3465 MipsSETargetLowering::emitFILL_FD(MachineInstr &MI, 3466 MachineBasicBlock *BB) const { 3467 assert(Subtarget.isFP64bit()); 3468 3469 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3470 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3471 DebugLoc DL = MI.getDebugLoc(); 3472 Register Wd = MI.getOperand(0).getReg(); 3473 Register Fs = MI.getOperand(1).getReg(); 3474 Register Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3475 Register Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3476 3477 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3478 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3479 .addReg(Wt1) 3480 .addReg(Fs) 3481 .addImm(Mips::sub_64); 3482 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0); 3483 3484 MI.eraseFromParent(); // The pseudo instruction is gone now. 3485 return BB; 3486 } 3487 3488 // Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA 3489 // register. 3490 // 3491 // STF16 MSA128F16:$wd, mem_simm10:$addr 3492 // => 3493 // copy_u.h $rtemp,$wd[0] 3494 // sh $rtemp, $addr 3495 // 3496 // Safety: We can't use st.h & co as they would over write the memory after 3497 // the destination. It would require half floats be allocated 16 bytes(!) of 3498 // space. 3499 MachineBasicBlock * 3500 MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, 3501 MachineBasicBlock *BB) const { 3502 3503 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3504 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3505 DebugLoc DL = MI.getDebugLoc(); 3506 Register Ws = MI.getOperand(0).getReg(); 3507 Register Rt = MI.getOperand(1).getReg(); 3508 const MachineMemOperand &MMO = **MI.memoperands_begin(); 3509 unsigned Imm = MMO.getOffset(); 3510 3511 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3512 // spill and reload can expand as a GPR64 operand. Examine the 3513 // operand in detail and default to ABI. 3514 const TargetRegisterClass *RC = 3515 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3516 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3517 : &Mips::GPR64RegClass); 3518 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3519 Register Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); 3520 3521 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0); 3522 if(!UsingMips32) { 3523 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); 3524 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp) 3525 .addImm(0) 3526 .addReg(Rs) 3527 .addImm(Mips::sub_32); 3528 Rs = Tmp; 3529 } 3530 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64)) 3531 .addReg(Rs) 3532 .addReg(Rt) 3533 .addImm(Imm) 3534 .addMemOperand(BB->getParent()->getMachineMemOperand( 3535 &MMO, MMO.getOffset(), MMO.getSize())); 3536 3537 MI.eraseFromParent(); 3538 return BB; 3539 } 3540 3541 // Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register. 3542 // 3543 // LD_F16 MSA128F16:$wd, mem_simm10:$addr 3544 // => 3545 // lh $rtemp, $addr 3546 // fill.h $wd, $rtemp 3547 // 3548 // Safety: We can't use ld.h & co as they over-read from the source. 3549 // Additionally, if the address is not modulo 16, 2 cases can occur: 3550 // a) Segmentation fault as the load instruction reads from a memory page 3551 // memory it's not supposed to. 3552 // b) The load crosses an implementation specific boundary, requiring OS 3553 // intervention. 3554 MachineBasicBlock * 3555 MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, 3556 MachineBasicBlock *BB) const { 3557 3558 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3559 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3560 DebugLoc DL = MI.getDebugLoc(); 3561 Register Wd = MI.getOperand(0).getReg(); 3562 3563 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3564 // spill and reload can expand as a GPR64 operand. Examine the 3565 // operand in detail and default to ABI. 3566 const TargetRegisterClass *RC = 3567 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3568 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3569 : &Mips::GPR64RegClass); 3570 3571 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3572 Register Rt = RegInfo.createVirtualRegister(RC); 3573 3574 MachineInstrBuilder MIB = 3575 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt); 3576 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) 3577 MIB.add(MO); 3578 3579 if(!UsingMips32) { 3580 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); 3581 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32); 3582 Rt = Tmp; 3583 } 3584 3585 BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt); 3586 3587 MI.eraseFromParent(); 3588 return BB; 3589 } 3590 3591 // Emit the FPROUND_PSEUDO instruction. 3592 // 3593 // Round an FGR64Opnd, FGR32Opnd to an f16. 3594 // 3595 // Safety: Cycle the operand through the GPRs so the result always ends up 3596 // the correct MSA register. 3597 // 3598 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs 3599 // / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register 3600 // (which they can be, as the MSA registers are defined to alias the 3601 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3602 // the correct register class. That requires operands be tie-able across 3603 // register classes which have a sub/super register class relationship. 3604 // 3605 // For FPG32Opnd: 3606 // 3607 // FPROUND MSA128F16:$wd, FGR32Opnd:$fs 3608 // => 3609 // mfc1 $rtemp, $fs 3610 // fill.w $rtemp, $wtemp 3611 // fexdo.w $wd, $wtemp, $wtemp 3612 // 3613 // For FPG64Opnd on mips32r2+: 3614 // 3615 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3616 // => 3617 // mfc1 $rtemp, $fs 3618 // fill.w $rtemp, $wtemp 3619 // mfhc1 $rtemp2, $fs 3620 // insert.w $wtemp[1], $rtemp2 3621 // insert.w $wtemp[3], $rtemp2 3622 // fexdo.w $wtemp2, $wtemp, $wtemp 3623 // fexdo.h $wd, $temp2, $temp2 3624 // 3625 // For FGR64Opnd on mips64r2+: 3626 // 3627 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3628 // => 3629 // dmfc1 $rtemp, $fs 3630 // fill.d $rtemp, $wtemp 3631 // fexdo.w $wtemp2, $wtemp, $wtemp 3632 // fexdo.h $wd, $wtemp2, $wtemp2 3633 // 3634 // Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the 3635 // undef bits are "just right" and the exception enable bits are 3636 // set. By using fill.w to replicate $fs into all elements over 3637 // insert.w for one element, we avoid that potiential case. If 3638 // fexdo.[hw] causes an exception in, the exception is valid and it 3639 // occurs for all elements. 3640 MachineBasicBlock * 3641 MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, 3642 MachineBasicBlock *BB, 3643 bool IsFGR64) const { 3644 3645 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3646 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3647 // it. 3648 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3649 3650 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3651 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; 3652 3653 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3654 DebugLoc DL = MI.getDebugLoc(); 3655 Register Wd = MI.getOperand(0).getReg(); 3656 Register Fs = MI.getOperand(1).getReg(); 3657 3658 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3659 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3660 const TargetRegisterClass *GPRRC = 3661 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3662 unsigned MFC1Opc = IsFGR64onMips64 3663 ? Mips::DMFC1 3664 : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1); 3665 unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; 3666 3667 // Perform the register class copy as mentioned above. 3668 Register Rtemp = RegInfo.createVirtualRegister(GPRRC); 3669 BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs); 3670 BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp); 3671 unsigned WPHI = Wtemp; 3672 3673 if (IsFGR64onMips32) { 3674 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3675 BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs); 3676 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3677 Register Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3678 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2) 3679 .addReg(Wtemp) 3680 .addReg(Rtemp2) 3681 .addImm(1); 3682 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3) 3683 .addReg(Wtemp2) 3684 .addReg(Rtemp2) 3685 .addImm(3); 3686 WPHI = Wtemp3; 3687 } 3688 3689 if (IsFGR64) { 3690 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3691 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2) 3692 .addReg(WPHI) 3693 .addReg(WPHI); 3694 WPHI = Wtemp2; 3695 } 3696 3697 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI); 3698 3699 MI.eraseFromParent(); 3700 return BB; 3701 } 3702 3703 // Emit the FPEXTEND_PSEUDO instruction. 3704 // 3705 // Expand an f16 to either a FGR32Opnd or FGR64Opnd. 3706 // 3707 // Safety: Cycle the result through the GPRs so the result always ends up 3708 // the correct floating point register. 3709 // 3710 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd 3711 // / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register 3712 // (which they can be, as the MSA registers are defined to alias the 3713 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3714 // the correct register class. That requires operands be tie-able across 3715 // register classes which have a sub/super register class relationship. I 3716 // haven't checked. 3717 // 3718 // For FGR32Opnd: 3719 // 3720 // FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws 3721 // => 3722 // fexupr.w $wtemp, $ws 3723 // copy_s.w $rtemp, $ws[0] 3724 // mtc1 $rtemp, $fd 3725 // 3726 // For FGR64Opnd on Mips64: 3727 // 3728 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3729 // => 3730 // fexupr.w $wtemp, $ws 3731 // fexupr.d $wtemp2, $wtemp 3732 // copy_s.d $rtemp, $wtemp2s[0] 3733 // dmtc1 $rtemp, $fd 3734 // 3735 // For FGR64Opnd on Mips32: 3736 // 3737 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3738 // => 3739 // fexupr.w $wtemp, $ws 3740 // fexupr.d $wtemp2, $wtemp 3741 // copy_s.w $rtemp, $wtemp2[0] 3742 // mtc1 $rtemp, $ftemp 3743 // copy_s.w $rtemp2, $wtemp2[1] 3744 // $fd = mthc1 $rtemp2, $ftemp 3745 MachineBasicBlock * 3746 MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI, 3747 MachineBasicBlock *BB, 3748 bool IsFGR64) const { 3749 3750 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3751 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3752 // it. 3753 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3754 3755 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3756 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; 3757 3758 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3759 DebugLoc DL = MI.getDebugLoc(); 3760 Register Fd = MI.getOperand(0).getReg(); 3761 Register Ws = MI.getOperand(1).getReg(); 3762 3763 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3764 const TargetRegisterClass *GPRRC = 3765 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3766 unsigned MTC1Opc = IsFGR64onMips64 3767 ? Mips::DMTC1 3768 : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1); 3769 Register COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W; 3770 3771 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3772 Register WPHI = Wtemp; 3773 3774 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws); 3775 if (IsFGR64) { 3776 WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3777 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp); 3778 } 3779 3780 // Perform the safety regclass copy mentioned above. 3781 Register Rtemp = RegInfo.createVirtualRegister(GPRRC); 3782 Register FPRPHI = IsFGR64onMips32 3783 ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass) 3784 : Fd; 3785 BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0); 3786 BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp); 3787 3788 if (IsFGR64onMips32) { 3789 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3790 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2) 3791 .addReg(WPHI) 3792 .addImm(1); 3793 BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd) 3794 .addReg(FPRPHI) 3795 .addReg(Rtemp2); 3796 } 3797 3798 MI.eraseFromParent(); 3799 return BB; 3800 } 3801 3802 // Emit the FEXP2_W_1 pseudo instructions. 3803 // 3804 // fexp2_w_1_pseudo $wd, $wt 3805 // => 3806 // ldi.w $ws, 1 3807 // fexp2.w $wd, $ws, $wt 3808 MachineBasicBlock * 3809 MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI, 3810 MachineBasicBlock *BB) const { 3811 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3812 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3813 const TargetRegisterClass *RC = &Mips::MSA128WRegClass; 3814 Register Ws1 = RegInfo.createVirtualRegister(RC); 3815 Register Ws2 = RegInfo.createVirtualRegister(RC); 3816 DebugLoc DL = MI.getDebugLoc(); 3817 3818 // Splat 1.0 into a vector 3819 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1); 3820 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1); 3821 3822 // Emit 1.0 * fexp2(Wt) 3823 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI.getOperand(0).getReg()) 3824 .addReg(Ws2) 3825 .addReg(MI.getOperand(1).getReg()); 3826 3827 MI.eraseFromParent(); // The pseudo instruction is gone now. 3828 return BB; 3829 } 3830 3831 // Emit the FEXP2_D_1 pseudo instructions. 3832 // 3833 // fexp2_d_1_pseudo $wd, $wt 3834 // => 3835 // ldi.d $ws, 1 3836 // fexp2.d $wd, $ws, $wt 3837 MachineBasicBlock * 3838 MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI, 3839 MachineBasicBlock *BB) const { 3840 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3841 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3842 const TargetRegisterClass *RC = &Mips::MSA128DRegClass; 3843 Register Ws1 = RegInfo.createVirtualRegister(RC); 3844 Register Ws2 = RegInfo.createVirtualRegister(RC); 3845 DebugLoc DL = MI.getDebugLoc(); 3846 3847 // Splat 1.0 into a vector 3848 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1); 3849 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1); 3850 3851 // Emit 1.0 * fexp2(Wt) 3852 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI.getOperand(0).getReg()) 3853 .addReg(Ws2) 3854 .addReg(MI.getOperand(1).getReg()); 3855 3856 MI.eraseFromParent(); // The pseudo instruction is gone now. 3857 return BB; 3858 } 3859