1 //===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Subclass of MipsTargetLowering specialized for mips32/64. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MipsSEISelLowering.h" 15 #include "MipsMachineFunction.h" 16 #include "MipsRegisterInfo.h" 17 #include "MipsSubtarget.h" 18 #include "llvm/ADT/APInt.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include "llvm/ADT/Triple.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/ISDOpcodes.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineInstr.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineMemOperand.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/CodeGen/SelectionDAG.h" 32 #include "llvm/CodeGen/SelectionDAGNodes.h" 33 #include "llvm/CodeGen/TargetInstrInfo.h" 34 #include "llvm/CodeGen/TargetSubtargetInfo.h" 35 #include "llvm/CodeGen/ValueTypes.h" 36 #include "llvm/IR/DebugLoc.h" 37 #include "llvm/IR/Intrinsics.h" 38 #include "llvm/Support/Casting.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/ErrorHandling.h" 42 #include "llvm/Support/MachineValueType.h" 43 #include "llvm/Support/MathExtras.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include <algorithm> 46 #include <cassert> 47 #include <cstdint> 48 #include <iterator> 49 #include <utility> 50 51 using namespace llvm; 52 53 #define DEBUG_TYPE "mips-isel" 54 55 static cl::opt<bool> 56 UseMipsTailCalls("mips-tail-calls", cl::Hidden, 57 cl::desc("MIPS: permit tail calls."), cl::init(false)); 58 59 static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), 60 cl::desc("Expand double precision loads and " 61 "stores to their single precision " 62 "counterparts")); 63 64 MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, 65 const MipsSubtarget &STI) 66 : MipsTargetLowering(TM, STI) { 67 // Set up the register classes 68 addRegisterClass(MVT::i32, &Mips::GPR32RegClass); 69 70 if (Subtarget.isGP64bit()) 71 addRegisterClass(MVT::i64, &Mips::GPR64RegClass); 72 73 if (Subtarget.hasDSP() || Subtarget.hasMSA()) { 74 // Expand all truncating stores and extending loads. 75 for (MVT VT0 : MVT::vector_valuetypes()) { 76 for (MVT VT1 : MVT::vector_valuetypes()) { 77 setTruncStoreAction(VT0, VT1, Expand); 78 setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); 79 setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); 80 setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); 81 } 82 } 83 } 84 85 if (Subtarget.hasDSP()) { 86 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; 87 88 for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { 89 addRegisterClass(VecTys[i], &Mips::DSPRRegClass); 90 91 // Expand all builtin opcodes. 92 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 93 setOperationAction(Opc, VecTys[i], Expand); 94 95 setOperationAction(ISD::ADD, VecTys[i], Legal); 96 setOperationAction(ISD::SUB, VecTys[i], Legal); 97 setOperationAction(ISD::LOAD, VecTys[i], Legal); 98 setOperationAction(ISD::STORE, VecTys[i], Legal); 99 setOperationAction(ISD::BITCAST, VecTys[i], Legal); 100 } 101 102 setTargetDAGCombine(ISD::SHL); 103 setTargetDAGCombine(ISD::SRA); 104 setTargetDAGCombine(ISD::SRL); 105 setTargetDAGCombine(ISD::SETCC); 106 setTargetDAGCombine(ISD::VSELECT); 107 108 if (Subtarget.hasMips32r2()) { 109 setOperationAction(ISD::ADDC, MVT::i32, Legal); 110 setOperationAction(ISD::ADDE, MVT::i32, Legal); 111 } 112 } 113 114 if (Subtarget.hasDSPR2()) 115 setOperationAction(ISD::MUL, MVT::v2i16, Legal); 116 117 if (Subtarget.hasMSA()) { 118 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); 119 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); 120 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); 121 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass); 122 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass); 123 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); 124 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); 125 126 // f16 is a storage-only type, always promote it to f32. 127 addRegisterClass(MVT::f16, &Mips::MSA128HRegClass); 128 setOperationAction(ISD::SETCC, MVT::f16, Promote); 129 setOperationAction(ISD::BR_CC, MVT::f16, Promote); 130 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); 131 setOperationAction(ISD::SELECT, MVT::f16, Promote); 132 setOperationAction(ISD::FADD, MVT::f16, Promote); 133 setOperationAction(ISD::FSUB, MVT::f16, Promote); 134 setOperationAction(ISD::FMUL, MVT::f16, Promote); 135 setOperationAction(ISD::FDIV, MVT::f16, Promote); 136 setOperationAction(ISD::FREM, MVT::f16, Promote); 137 setOperationAction(ISD::FMA, MVT::f16, Promote); 138 setOperationAction(ISD::FNEG, MVT::f16, Promote); 139 setOperationAction(ISD::FABS, MVT::f16, Promote); 140 setOperationAction(ISD::FCEIL, MVT::f16, Promote); 141 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); 142 setOperationAction(ISD::FCOS, MVT::f16, Promote); 143 setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); 144 setOperationAction(ISD::FFLOOR, MVT::f16, Promote); 145 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); 146 setOperationAction(ISD::FPOW, MVT::f16, Promote); 147 setOperationAction(ISD::FPOWI, MVT::f16, Promote); 148 setOperationAction(ISD::FRINT, MVT::f16, Promote); 149 setOperationAction(ISD::FSIN, MVT::f16, Promote); 150 setOperationAction(ISD::FSINCOS, MVT::f16, Promote); 151 setOperationAction(ISD::FSQRT, MVT::f16, Promote); 152 setOperationAction(ISD::FEXP, MVT::f16, Promote); 153 setOperationAction(ISD::FEXP2, MVT::f16, Promote); 154 setOperationAction(ISD::FLOG, MVT::f16, Promote); 155 setOperationAction(ISD::FLOG2, MVT::f16, Promote); 156 setOperationAction(ISD::FLOG10, MVT::f16, Promote); 157 setOperationAction(ISD::FROUND, MVT::f16, Promote); 158 setOperationAction(ISD::FTRUNC, MVT::f16, Promote); 159 setOperationAction(ISD::FMINNUM, MVT::f16, Promote); 160 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); 161 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); 162 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); 163 164 setTargetDAGCombine(ISD::AND); 165 setTargetDAGCombine(ISD::OR); 166 setTargetDAGCombine(ISD::SRA); 167 setTargetDAGCombine(ISD::VSELECT); 168 setTargetDAGCombine(ISD::XOR); 169 } 170 171 if (!Subtarget.useSoftFloat()) { 172 addRegisterClass(MVT::f32, &Mips::FGR32RegClass); 173 174 // When dealing with single precision only, use libcalls 175 if (!Subtarget.isSingleFloat()) { 176 if (Subtarget.isFP64bit()) 177 addRegisterClass(MVT::f64, &Mips::FGR64RegClass); 178 else 179 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); 180 } 181 } 182 183 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); 184 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); 185 setOperationAction(ISD::MULHS, MVT::i32, Custom); 186 setOperationAction(ISD::MULHU, MVT::i32, Custom); 187 188 if (Subtarget.hasCnMips()) 189 setOperationAction(ISD::MUL, MVT::i64, Legal); 190 else if (Subtarget.isGP64bit()) 191 setOperationAction(ISD::MUL, MVT::i64, Custom); 192 193 if (Subtarget.isGP64bit()) { 194 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); 195 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); 196 setOperationAction(ISD::MULHS, MVT::i64, Custom); 197 setOperationAction(ISD::MULHU, MVT::i64, Custom); 198 setOperationAction(ISD::SDIVREM, MVT::i64, Custom); 199 setOperationAction(ISD::UDIVREM, MVT::i64, Custom); 200 } 201 202 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 203 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 204 205 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 206 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 207 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 208 setOperationAction(ISD::LOAD, MVT::i32, Custom); 209 setOperationAction(ISD::STORE, MVT::i32, Custom); 210 211 setTargetDAGCombine(ISD::MUL); 212 213 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 214 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 215 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 216 217 if (NoDPLoadStore) { 218 setOperationAction(ISD::LOAD, MVT::f64, Custom); 219 setOperationAction(ISD::STORE, MVT::f64, Custom); 220 } 221 222 if (Subtarget.hasMips32r6()) { 223 // MIPS32r6 replaces the accumulator-based multiplies with a three register 224 // instruction 225 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 226 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 227 setOperationAction(ISD::MUL, MVT::i32, Legal); 228 setOperationAction(ISD::MULHS, MVT::i32, Legal); 229 setOperationAction(ISD::MULHU, MVT::i32, Legal); 230 231 // MIPS32r6 replaces the accumulator-based division/remainder with separate 232 // three register division and remainder instructions. 233 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 234 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 235 setOperationAction(ISD::SDIV, MVT::i32, Legal); 236 setOperationAction(ISD::UDIV, MVT::i32, Legal); 237 setOperationAction(ISD::SREM, MVT::i32, Legal); 238 setOperationAction(ISD::UREM, MVT::i32, Legal); 239 240 // MIPS32r6 replaces conditional moves with an equivalent that removes the 241 // need for three GPR read ports. 242 setOperationAction(ISD::SETCC, MVT::i32, Legal); 243 setOperationAction(ISD::SELECT, MVT::i32, Legal); 244 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 245 246 setOperationAction(ISD::SETCC, MVT::f32, Legal); 247 setOperationAction(ISD::SELECT, MVT::f32, Legal); 248 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 249 250 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6"); 251 setOperationAction(ISD::SETCC, MVT::f64, Legal); 252 setOperationAction(ISD::SELECT, MVT::f64, Custom); 253 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 254 255 setOperationAction(ISD::BRCOND, MVT::Other, Legal); 256 257 // Floating point > and >= are supported via < and <= 258 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 259 setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); 260 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); 261 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 262 263 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 264 setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); 265 setCondCodeAction(ISD::SETUGE, MVT::f64, Expand); 266 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 267 } 268 269 if (Subtarget.hasMips64r6()) { 270 // MIPS64r6 replaces the accumulator-based multiplies with a three register 271 // instruction 272 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 273 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 274 setOperationAction(ISD::MUL, MVT::i64, Legal); 275 setOperationAction(ISD::MULHS, MVT::i64, Legal); 276 setOperationAction(ISD::MULHU, MVT::i64, Legal); 277 278 // MIPS32r6 replaces the accumulator-based division/remainder with separate 279 // three register division and remainder instructions. 280 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 281 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 282 setOperationAction(ISD::SDIV, MVT::i64, Legal); 283 setOperationAction(ISD::UDIV, MVT::i64, Legal); 284 setOperationAction(ISD::SREM, MVT::i64, Legal); 285 setOperationAction(ISD::UREM, MVT::i64, Legal); 286 287 // MIPS64r6 replaces conditional moves with an equivalent that removes the 288 // need for three GPR read ports. 289 setOperationAction(ISD::SETCC, MVT::i64, Legal); 290 setOperationAction(ISD::SELECT, MVT::i64, Legal); 291 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 292 } 293 294 computeRegisterProperties(Subtarget.getRegisterInfo()); 295 } 296 297 const MipsTargetLowering * 298 llvm::createMipsSETargetLowering(const MipsTargetMachine &TM, 299 const MipsSubtarget &STI) { 300 return new MipsSETargetLowering(TM, STI); 301 } 302 303 const TargetRegisterClass * 304 MipsSETargetLowering::getRepRegClassFor(MVT VT) const { 305 if (VT == MVT::Untyped) 306 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass; 307 308 return TargetLowering::getRepRegClassFor(VT); 309 } 310 311 // Enable MSA support for the given integer type and Register class. 312 void MipsSETargetLowering:: 313 addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 314 addRegisterClass(Ty, RC); 315 316 // Expand all builtin opcodes. 317 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 318 setOperationAction(Opc, Ty, Expand); 319 320 setOperationAction(ISD::BITCAST, Ty, Legal); 321 setOperationAction(ISD::LOAD, Ty, Legal); 322 setOperationAction(ISD::STORE, Ty, Legal); 323 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); 324 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 325 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 326 327 setOperationAction(ISD::ADD, Ty, Legal); 328 setOperationAction(ISD::AND, Ty, Legal); 329 setOperationAction(ISD::CTLZ, Ty, Legal); 330 setOperationAction(ISD::CTPOP, Ty, Legal); 331 setOperationAction(ISD::MUL, Ty, Legal); 332 setOperationAction(ISD::OR, Ty, Legal); 333 setOperationAction(ISD::SDIV, Ty, Legal); 334 setOperationAction(ISD::SREM, Ty, Legal); 335 setOperationAction(ISD::SHL, Ty, Legal); 336 setOperationAction(ISD::SRA, Ty, Legal); 337 setOperationAction(ISD::SRL, Ty, Legal); 338 setOperationAction(ISD::SUB, Ty, Legal); 339 setOperationAction(ISD::SMAX, Ty, Legal); 340 setOperationAction(ISD::SMIN, Ty, Legal); 341 setOperationAction(ISD::UDIV, Ty, Legal); 342 setOperationAction(ISD::UREM, Ty, Legal); 343 setOperationAction(ISD::UMAX, Ty, Legal); 344 setOperationAction(ISD::UMIN, Ty, Legal); 345 setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); 346 setOperationAction(ISD::VSELECT, Ty, Legal); 347 setOperationAction(ISD::XOR, Ty, Legal); 348 349 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { 350 setOperationAction(ISD::FP_TO_SINT, Ty, Legal); 351 setOperationAction(ISD::FP_TO_UINT, Ty, Legal); 352 setOperationAction(ISD::SINT_TO_FP, Ty, Legal); 353 setOperationAction(ISD::UINT_TO_FP, Ty, Legal); 354 } 355 356 setOperationAction(ISD::SETCC, Ty, Legal); 357 setCondCodeAction(ISD::SETNE, Ty, Expand); 358 setCondCodeAction(ISD::SETGE, Ty, Expand); 359 setCondCodeAction(ISD::SETGT, Ty, Expand); 360 setCondCodeAction(ISD::SETUGE, Ty, Expand); 361 setCondCodeAction(ISD::SETUGT, Ty, Expand); 362 } 363 364 // Enable MSA support for the given floating-point type and Register class. 365 void MipsSETargetLowering:: 366 addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 367 addRegisterClass(Ty, RC); 368 369 // Expand all builtin opcodes. 370 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 371 setOperationAction(Opc, Ty, Expand); 372 373 setOperationAction(ISD::LOAD, Ty, Legal); 374 setOperationAction(ISD::STORE, Ty, Legal); 375 setOperationAction(ISD::BITCAST, Ty, Legal); 376 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); 377 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 378 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 379 380 if (Ty != MVT::v8f16) { 381 setOperationAction(ISD::FABS, Ty, Legal); 382 setOperationAction(ISD::FADD, Ty, Legal); 383 setOperationAction(ISD::FDIV, Ty, Legal); 384 setOperationAction(ISD::FEXP2, Ty, Legal); 385 setOperationAction(ISD::FLOG2, Ty, Legal); 386 setOperationAction(ISD::FMA, Ty, Legal); 387 setOperationAction(ISD::FMUL, Ty, Legal); 388 setOperationAction(ISD::FRINT, Ty, Legal); 389 setOperationAction(ISD::FSQRT, Ty, Legal); 390 setOperationAction(ISD::FSUB, Ty, Legal); 391 setOperationAction(ISD::VSELECT, Ty, Legal); 392 393 setOperationAction(ISD::SETCC, Ty, Legal); 394 setCondCodeAction(ISD::SETOGE, Ty, Expand); 395 setCondCodeAction(ISD::SETOGT, Ty, Expand); 396 setCondCodeAction(ISD::SETUGE, Ty, Expand); 397 setCondCodeAction(ISD::SETUGT, Ty, Expand); 398 setCondCodeAction(ISD::SETGE, Ty, Expand); 399 setCondCodeAction(ISD::SETGT, Ty, Expand); 400 } 401 } 402 403 SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 404 if(!Subtarget.hasMips32r6()) 405 return MipsTargetLowering::LowerOperation(Op, DAG); 406 407 EVT ResTy = Op->getValueType(0); 408 SDLoc DL(Op); 409 410 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the 411 // floating point register are undefined. Not really an issue as sel.d, which 412 // is produced from an FSELECT node, only looks at bit 0. 413 SDValue Tmp = DAG.getNode(MipsISD::MTC1_D64, DL, MVT::f64, Op->getOperand(0)); 414 return DAG.getNode(MipsISD::FSELECT, DL, ResTy, Tmp, Op->getOperand(1), 415 Op->getOperand(2)); 416 } 417 418 bool 419 MipsSETargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 420 unsigned, 421 unsigned, 422 bool *Fast) const { 423 MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; 424 425 if (Subtarget.systemSupportsUnalignedAccess()) { 426 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's 427 // implementation defined whether this is handled by hardware, software, or 428 // a hybrid of the two but it's expected that most implementations will 429 // handle the majority of cases in hardware. 430 if (Fast) 431 *Fast = true; 432 return true; 433 } 434 435 switch (SVT) { 436 case MVT::i64: 437 case MVT::i32: 438 if (Fast) 439 *Fast = true; 440 return true; 441 default: 442 return false; 443 } 444 } 445 446 SDValue MipsSETargetLowering::LowerOperation(SDValue Op, 447 SelectionDAG &DAG) const { 448 switch(Op.getOpcode()) { 449 case ISD::LOAD: return lowerLOAD(Op, DAG); 450 case ISD::STORE: return lowerSTORE(Op, DAG); 451 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); 452 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); 453 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); 454 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG); 455 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG); 456 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG); 457 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, 458 DAG); 459 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); 460 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); 461 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); 462 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); 463 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); 464 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); 465 case ISD::SELECT: return lowerSELECT(Op, DAG); 466 } 467 468 return MipsTargetLowering::LowerOperation(Op, DAG); 469 } 470 471 // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT 472 // 473 // Performs the following transformations: 474 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its 475 // sign/zero-extension is completely overwritten by the new one performed by 476 // the ISD::AND. 477 // - Removes redundant zero extensions performed by an ISD::AND. 478 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 479 TargetLowering::DAGCombinerInfo &DCI, 480 const MipsSubtarget &Subtarget) { 481 if (!Subtarget.hasMSA()) 482 return SDValue(); 483 484 SDValue Op0 = N->getOperand(0); 485 SDValue Op1 = N->getOperand(1); 486 unsigned Op0Opcode = Op0->getOpcode(); 487 488 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) 489 // where $d + 1 == 2^n and n == 32 490 // or $d + 1 == 2^n and n <= 32 and ZExt 491 // -> (MipsVExtractZExt $a, $b, $c) 492 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || 493 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { 494 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1); 495 496 if (!Mask) 497 return SDValue(); 498 499 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); 500 501 if (Log2IfPositive <= 0) 502 return SDValue(); // Mask+1 is not a power of 2 503 504 SDValue Op0Op2 = Op0->getOperand(2); 505 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT(); 506 unsigned ExtendTySize = ExtendTy.getSizeInBits(); 507 unsigned Log2 = Log2IfPositive; 508 509 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || 510 Log2 == ExtendTySize) { 511 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; 512 return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0), 513 Op0->getVTList(), 514 makeArrayRef(Ops, Op0->getNumOperands())); 515 } 516 } 517 518 return SDValue(); 519 } 520 521 // Determine if the specified node is a constant vector splat. 522 // 523 // Returns true and sets Imm if: 524 // * N is a ISD::BUILD_VECTOR representing a constant splat 525 // 526 // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The 527 // differences are that it assumes the MSA has already been checked and the 528 // arbitrary requirement for a maximum of 32-bit integers isn't applied (and 529 // must not be in order for binsri.d to be selectable). 530 static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { 531 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode()); 532 533 if (!Node) 534 return false; 535 536 APInt SplatValue, SplatUndef; 537 unsigned SplatBitSize; 538 bool HasAnyUndefs; 539 540 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 541 8, !IsLittleEndian)) 542 return false; 543 544 Imm = SplatValue; 545 546 return true; 547 } 548 549 // Test whether the given node is an all-ones build_vector. 550 static bool isVectorAllOnes(SDValue N) { 551 // Look through bitcasts. Endianness doesn't matter because we are looking 552 // for an all-ones value. 553 if (N->getOpcode() == ISD::BITCAST) 554 N = N->getOperand(0); 555 556 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); 557 558 if (!BVN) 559 return false; 560 561 APInt SplatValue, SplatUndef; 562 unsigned SplatBitSize; 563 bool HasAnyUndefs; 564 565 // Endianness doesn't matter in this context because we are looking for 566 // an all-ones value. 567 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) 568 return SplatValue.isAllOnesValue(); 569 570 return false; 571 } 572 573 // Test whether N is the bitwise inverse of OfNode. 574 static bool isBitwiseInverse(SDValue N, SDValue OfNode) { 575 if (N->getOpcode() != ISD::XOR) 576 return false; 577 578 if (isVectorAllOnes(N->getOperand(0))) 579 return N->getOperand(1) == OfNode; 580 581 if (isVectorAllOnes(N->getOperand(1))) 582 return N->getOperand(0) == OfNode; 583 584 return false; 585 } 586 587 // Perform combines where ISD::OR is the root node. 588 // 589 // Performs the following transformations: 590 // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) 591 // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit 592 // vector type. 593 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 594 TargetLowering::DAGCombinerInfo &DCI, 595 const MipsSubtarget &Subtarget) { 596 if (!Subtarget.hasMSA()) 597 return SDValue(); 598 599 EVT Ty = N->getValueType(0); 600 601 if (!Ty.is128BitVector()) 602 return SDValue(); 603 604 SDValue Op0 = N->getOperand(0); 605 SDValue Op1 = N->getOperand(1); 606 607 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { 608 SDValue Op0Op0 = Op0->getOperand(0); 609 SDValue Op0Op1 = Op0->getOperand(1); 610 SDValue Op1Op0 = Op1->getOperand(0); 611 SDValue Op1Op1 = Op1->getOperand(1); 612 bool IsLittleEndian = !Subtarget.isLittle(); 613 614 SDValue IfSet, IfClr, Cond; 615 bool IsConstantMask = false; 616 APInt Mask, InvMask; 617 618 // If Op0Op0 is an appropriate mask, try to find it's inverse in either 619 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while 620 // looking. 621 // IfClr will be set if we find a valid match. 622 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) { 623 Cond = Op0Op0; 624 IfSet = Op0Op1; 625 626 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 627 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 628 IfClr = Op1Op1; 629 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 630 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 631 IfClr = Op1Op0; 632 633 IsConstantMask = true; 634 } 635 636 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same 637 // thing again using this mask. 638 // IfClr will be set if we find a valid match. 639 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) { 640 Cond = Op0Op1; 641 IfSet = Op0Op0; 642 643 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 644 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 645 IfClr = Op1Op1; 646 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 647 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 648 IfClr = Op1Op0; 649 650 IsConstantMask = true; 651 } 652 653 // If IfClr is not yet set, try looking for a non-constant match. 654 // IfClr will be set if we find a valid match amongst the eight 655 // possibilities. 656 if (!IfClr.getNode()) { 657 if (isBitwiseInverse(Op0Op0, Op1Op0)) { 658 Cond = Op1Op0; 659 IfSet = Op1Op1; 660 IfClr = Op0Op1; 661 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { 662 Cond = Op1Op0; 663 IfSet = Op1Op1; 664 IfClr = Op0Op0; 665 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { 666 Cond = Op1Op1; 667 IfSet = Op1Op0; 668 IfClr = Op0Op1; 669 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { 670 Cond = Op1Op1; 671 IfSet = Op1Op0; 672 IfClr = Op0Op0; 673 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { 674 Cond = Op0Op0; 675 IfSet = Op0Op1; 676 IfClr = Op1Op1; 677 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { 678 Cond = Op0Op0; 679 IfSet = Op0Op1; 680 IfClr = Op1Op0; 681 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { 682 Cond = Op0Op1; 683 IfSet = Op0Op0; 684 IfClr = Op1Op1; 685 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { 686 Cond = Op0Op1; 687 IfSet = Op0Op0; 688 IfClr = Op1Op0; 689 } 690 } 691 692 // At this point, IfClr will be set if we have a valid match. 693 if (!IfClr.getNode()) 694 return SDValue(); 695 696 assert(Cond.getNode() && IfSet.getNode()); 697 698 // Fold degenerate cases. 699 if (IsConstantMask) { 700 if (Mask.isAllOnesValue()) 701 return IfSet; 702 else if (Mask == 0) 703 return IfClr; 704 } 705 706 // Transform the DAG into an equivalent VSELECT. 707 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr); 708 } 709 710 return SDValue(); 711 } 712 713 static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, 714 SelectionDAG &DAG, 715 const MipsSubtarget &Subtarget) { 716 // Estimate the number of operations the below transform will turn a 717 // constant multiply into. The number is approximately how many powers 718 // of two summed together that the constant can be broken down into. 719 720 SmallVector<APInt, 16> WorkStack(1, C); 721 unsigned Steps = 0; 722 unsigned BitWidth = C.getBitWidth(); 723 724 while (!WorkStack.empty()) { 725 APInt Val = WorkStack.pop_back_val(); 726 727 if (Val == 0 || Val == 1) 728 continue; 729 730 if (Val.isPowerOf2()) { 731 ++Steps; 732 continue; 733 } 734 735 APInt Floor = APInt(BitWidth, 1) << Val.logBase2(); 736 APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0) 737 : APInt(BitWidth, 1) << C.ceilLogBase2(); 738 739 if ((Val - Floor).ule(Ceil - Val)) { 740 WorkStack.push_back(Floor); 741 WorkStack.push_back(Val - Floor); 742 ++Steps; 743 continue; 744 } 745 746 WorkStack.push_back(Ceil); 747 WorkStack.push_back(Ceil - Val); 748 ++Steps; 749 750 // If we have taken more than 12[1] / 8[2] steps to attempt the 751 // optimization for a native sized value, it is more than likely that this 752 // optimization will make things worse. 753 // 754 // [1] MIPS64 requires 6 instructions at most to materialize any constant, 755 // multiplication requires at least 4 cycles, but another cycle (or two) 756 // to retrieve the result from the HI/LO registers. 757 // 758 // [2] For MIPS32, more than 8 steps is expensive as the constant could be 759 // materialized in 2 instructions, multiplication requires at least 4 760 // cycles, but another cycle (or two) to retrieve the result from the 761 // HI/LO registers. 762 763 if (Steps > 12 && (Subtarget.isABI_N32() || Subtarget.isABI_N64())) 764 return false; 765 766 if (Steps > 8 && Subtarget.isABI_O32()) 767 return false; 768 } 769 770 // If the value being multiplied is not supported natively, we have to pay 771 // an additional legalization cost, conservatively assume an increase in the 772 // cost of 3 instructions per step. This values for this heuristic were 773 // determined experimentally. 774 unsigned RegisterSize = DAG.getTargetLoweringInfo() 775 .getRegisterType(*DAG.getContext(), VT) 776 .getSizeInBits(); 777 Steps *= (VT.getSizeInBits() != RegisterSize) * 3; 778 if (Steps > 27) 779 return false; 780 781 return true; 782 } 783 784 static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, 785 EVT ShiftTy, SelectionDAG &DAG) { 786 // Return 0. 787 if (C == 0) 788 return DAG.getConstant(0, DL, VT); 789 790 // Return x. 791 if (C == 1) 792 return X; 793 794 // If c is power of 2, return (shl x, log2(c)). 795 if (C.isPowerOf2()) 796 return DAG.getNode(ISD::SHL, DL, VT, X, 797 DAG.getConstant(C.logBase2(), DL, ShiftTy)); 798 799 unsigned BitWidth = C.getBitWidth(); 800 APInt Floor = APInt(BitWidth, 1) << C.logBase2(); 801 APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) : 802 APInt(BitWidth, 1) << C.ceilLogBase2(); 803 804 // If |c - floor_c| <= |c - ceil_c|, 805 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), 806 // return (add constMult(x, floor_c), constMult(x, c - floor_c)). 807 if ((C - Floor).ule(Ceil - C)) { 808 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); 809 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); 810 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); 811 } 812 813 // If |c - floor_c| > |c - ceil_c|, 814 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). 815 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); 816 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); 817 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); 818 } 819 820 static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, 821 const TargetLowering::DAGCombinerInfo &DCI, 822 const MipsSETargetLowering *TL, 823 const MipsSubtarget &Subtarget) { 824 EVT VT = N->getValueType(0); 825 826 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 827 if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs( 828 C->getAPIntValue(), VT, DAG, Subtarget)) 829 return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT, 830 TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), 831 DAG); 832 833 return SDValue(N, 0); 834 } 835 836 static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, 837 SelectionDAG &DAG, 838 const MipsSubtarget &Subtarget) { 839 // See if this is a vector splat immediate node. 840 APInt SplatValue, SplatUndef; 841 unsigned SplatBitSize; 842 bool HasAnyUndefs; 843 unsigned EltSize = Ty.getScalarSizeInBits(); 844 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 845 846 if (!Subtarget.hasDSP()) 847 return SDValue(); 848 849 if (!BV || 850 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 851 EltSize, !Subtarget.isLittle()) || 852 (SplatBitSize != EltSize) || 853 (SplatValue.getZExtValue() >= EltSize)) 854 return SDValue(); 855 856 SDLoc DL(N); 857 return DAG.getNode(Opc, DL, Ty, N->getOperand(0), 858 DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32)); 859 } 860 861 static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, 862 TargetLowering::DAGCombinerInfo &DCI, 863 const MipsSubtarget &Subtarget) { 864 EVT Ty = N->getValueType(0); 865 866 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 867 return SDValue(); 868 869 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); 870 } 871 872 // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold 873 // constant splats into MipsISD::SHRA_DSP for DSPr2. 874 // 875 // Performs the following transformations: 876 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its 877 // sign/zero-extension is completely overwritten by the new one performed by 878 // the ISD::SRA and ISD::SHL nodes. 879 // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL 880 // sequence. 881 // 882 // See performDSPShiftCombine for more information about the transformation 883 // used for DSPr2. 884 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 885 TargetLowering::DAGCombinerInfo &DCI, 886 const MipsSubtarget &Subtarget) { 887 EVT Ty = N->getValueType(0); 888 889 if (Subtarget.hasMSA()) { 890 SDValue Op0 = N->getOperand(0); 891 SDValue Op1 = N->getOperand(1); 892 893 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) 894 // where $d + sizeof($c) == 32 895 // or $d + sizeof($c) <= 32 and SExt 896 // -> (MipsVExtractSExt $a, $b, $c) 897 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { 898 SDValue Op0Op0 = Op0->getOperand(0); 899 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1); 900 901 if (!ShAmount) 902 return SDValue(); 903 904 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && 905 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) 906 return SDValue(); 907 908 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT(); 909 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); 910 911 if (TotalBits == 32 || 912 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && 913 TotalBits <= 32)) { 914 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), 915 Op0Op0->getOperand(2) }; 916 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0), 917 Op0Op0->getVTList(), 918 makeArrayRef(Ops, Op0Op0->getNumOperands())); 919 } 920 } 921 } 922 923 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2())) 924 return SDValue(); 925 926 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); 927 } 928 929 930 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 931 TargetLowering::DAGCombinerInfo &DCI, 932 const MipsSubtarget &Subtarget) { 933 EVT Ty = N->getValueType(0); 934 935 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8)) 936 return SDValue(); 937 938 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); 939 } 940 941 static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { 942 bool IsV216 = (Ty == MVT::v2i16); 943 944 switch (CC) { 945 case ISD::SETEQ: 946 case ISD::SETNE: return true; 947 case ISD::SETLT: 948 case ISD::SETLE: 949 case ISD::SETGT: 950 case ISD::SETGE: return IsV216; 951 case ISD::SETULT: 952 case ISD::SETULE: 953 case ISD::SETUGT: 954 case ISD::SETUGE: return !IsV216; 955 default: return false; 956 } 957 } 958 959 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { 960 EVT Ty = N->getValueType(0); 961 962 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 963 return SDValue(); 964 965 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get())) 966 return SDValue(); 967 968 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0), 969 N->getOperand(1), N->getOperand(2)); 970 } 971 972 static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { 973 EVT Ty = N->getValueType(0); 974 975 if (Ty == MVT::v2i16 || Ty == MVT::v4i8) { 976 SDValue SetCC = N->getOperand(0); 977 978 if (SetCC.getOpcode() != MipsISD::SETCC_DSP) 979 return SDValue(); 980 981 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, 982 SetCC.getOperand(0), SetCC.getOperand(1), 983 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); 984 } 985 986 return SDValue(); 987 } 988 989 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 990 const MipsSubtarget &Subtarget) { 991 EVT Ty = N->getValueType(0); 992 993 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { 994 // Try the following combines: 995 // (xor (or $a, $b), (build_vector allones)) 996 // (xor (or $a, $b), (bitcast (build_vector allones))) 997 SDValue Op0 = N->getOperand(0); 998 SDValue Op1 = N->getOperand(1); 999 SDValue NotOp; 1000 1001 if (ISD::isBuildVectorAllOnes(Op0.getNode())) 1002 NotOp = Op1; 1003 else if (ISD::isBuildVectorAllOnes(Op1.getNode())) 1004 NotOp = Op0; 1005 else 1006 return SDValue(); 1007 1008 if (NotOp->getOpcode() == ISD::OR) 1009 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), 1010 NotOp->getOperand(1)); 1011 } 1012 1013 return SDValue(); 1014 } 1015 1016 SDValue 1017 MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 1018 SelectionDAG &DAG = DCI.DAG; 1019 SDValue Val; 1020 1021 switch (N->getOpcode()) { 1022 case ISD::AND: 1023 Val = performANDCombine(N, DAG, DCI, Subtarget); 1024 break; 1025 case ISD::OR: 1026 Val = performORCombine(N, DAG, DCI, Subtarget); 1027 break; 1028 case ISD::MUL: 1029 return performMULCombine(N, DAG, DCI, this, Subtarget); 1030 case ISD::SHL: 1031 Val = performSHLCombine(N, DAG, DCI, Subtarget); 1032 break; 1033 case ISD::SRA: 1034 return performSRACombine(N, DAG, DCI, Subtarget); 1035 case ISD::SRL: 1036 return performSRLCombine(N, DAG, DCI, Subtarget); 1037 case ISD::VSELECT: 1038 return performVSELECTCombine(N, DAG); 1039 case ISD::XOR: 1040 Val = performXORCombine(N, DAG, Subtarget); 1041 break; 1042 case ISD::SETCC: 1043 Val = performSETCCCombine(N, DAG); 1044 break; 1045 } 1046 1047 if (Val.getNode()) { 1048 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n"; 1049 N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n"; 1050 Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n"); 1051 return Val; 1052 } 1053 1054 return MipsTargetLowering::PerformDAGCombine(N, DCI); 1055 } 1056 1057 MachineBasicBlock * 1058 MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 1059 MachineBasicBlock *BB) const { 1060 switch (MI.getOpcode()) { 1061 default: 1062 return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); 1063 case Mips::BPOSGE32_PSEUDO: 1064 return emitBPOSGE32(MI, BB); 1065 case Mips::SNZ_B_PSEUDO: 1066 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B); 1067 case Mips::SNZ_H_PSEUDO: 1068 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H); 1069 case Mips::SNZ_W_PSEUDO: 1070 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W); 1071 case Mips::SNZ_D_PSEUDO: 1072 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D); 1073 case Mips::SNZ_V_PSEUDO: 1074 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V); 1075 case Mips::SZ_B_PSEUDO: 1076 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B); 1077 case Mips::SZ_H_PSEUDO: 1078 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H); 1079 case Mips::SZ_W_PSEUDO: 1080 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W); 1081 case Mips::SZ_D_PSEUDO: 1082 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D); 1083 case Mips::SZ_V_PSEUDO: 1084 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V); 1085 case Mips::COPY_FW_PSEUDO: 1086 return emitCOPY_FW(MI, BB); 1087 case Mips::COPY_FD_PSEUDO: 1088 return emitCOPY_FD(MI, BB); 1089 case Mips::INSERT_FW_PSEUDO: 1090 return emitINSERT_FW(MI, BB); 1091 case Mips::INSERT_FD_PSEUDO: 1092 return emitINSERT_FD(MI, BB); 1093 case Mips::INSERT_B_VIDX_PSEUDO: 1094 case Mips::INSERT_B_VIDX64_PSEUDO: 1095 return emitINSERT_DF_VIDX(MI, BB, 1, false); 1096 case Mips::INSERT_H_VIDX_PSEUDO: 1097 case Mips::INSERT_H_VIDX64_PSEUDO: 1098 return emitINSERT_DF_VIDX(MI, BB, 2, false); 1099 case Mips::INSERT_W_VIDX_PSEUDO: 1100 case Mips::INSERT_W_VIDX64_PSEUDO: 1101 return emitINSERT_DF_VIDX(MI, BB, 4, false); 1102 case Mips::INSERT_D_VIDX_PSEUDO: 1103 case Mips::INSERT_D_VIDX64_PSEUDO: 1104 return emitINSERT_DF_VIDX(MI, BB, 8, false); 1105 case Mips::INSERT_FW_VIDX_PSEUDO: 1106 case Mips::INSERT_FW_VIDX64_PSEUDO: 1107 return emitINSERT_DF_VIDX(MI, BB, 4, true); 1108 case Mips::INSERT_FD_VIDX_PSEUDO: 1109 case Mips::INSERT_FD_VIDX64_PSEUDO: 1110 return emitINSERT_DF_VIDX(MI, BB, 8, true); 1111 case Mips::FILL_FW_PSEUDO: 1112 return emitFILL_FW(MI, BB); 1113 case Mips::FILL_FD_PSEUDO: 1114 return emitFILL_FD(MI, BB); 1115 case Mips::FEXP2_W_1_PSEUDO: 1116 return emitFEXP2_W_1(MI, BB); 1117 case Mips::FEXP2_D_1_PSEUDO: 1118 return emitFEXP2_D_1(MI, BB); 1119 case Mips::ST_F16: 1120 return emitST_F16_PSEUDO(MI, BB); 1121 case Mips::LD_F16: 1122 return emitLD_F16_PSEUDO(MI, BB); 1123 case Mips::MSA_FP_EXTEND_W_PSEUDO: 1124 return emitFPEXTEND_PSEUDO(MI, BB, false); 1125 case Mips::MSA_FP_ROUND_W_PSEUDO: 1126 return emitFPROUND_PSEUDO(MI, BB, false); 1127 case Mips::MSA_FP_EXTEND_D_PSEUDO: 1128 return emitFPEXTEND_PSEUDO(MI, BB, true); 1129 case Mips::MSA_FP_ROUND_D_PSEUDO: 1130 return emitFPROUND_PSEUDO(MI, BB, true); 1131 } 1132 } 1133 1134 bool MipsSETargetLowering::isEligibleForTailCallOptimization( 1135 const CCState &CCInfo, unsigned NextStackOffset, 1136 const MipsFunctionInfo &FI) const { 1137 if (!UseMipsTailCalls) 1138 return false; 1139 1140 // Exception has to be cleared with eret. 1141 if (FI.isISR()) 1142 return false; 1143 1144 // Return false if either the callee or caller has a byval argument. 1145 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) 1146 return false; 1147 1148 // Return true if the callee's argument area is no larger than the 1149 // caller's. 1150 return NextStackOffset <= FI.getIncomingArgSize(); 1151 } 1152 1153 void MipsSETargetLowering:: 1154 getOpndList(SmallVectorImpl<SDValue> &Ops, 1155 std::deque<std::pair<unsigned, SDValue>> &RegsToPass, 1156 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, 1157 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, 1158 SDValue Chain) const { 1159 Ops.push_back(Callee); 1160 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, 1161 InternalLinkage, IsCallReloc, CLI, Callee, 1162 Chain); 1163 } 1164 1165 SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1166 LoadSDNode &Nd = *cast<LoadSDNode>(Op); 1167 1168 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1169 return MipsTargetLowering::lowerLOAD(Op, DAG); 1170 1171 // Replace a double precision load with two i32 loads and a buildpair64. 1172 SDLoc DL(Op); 1173 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1174 EVT PtrVT = Ptr.getValueType(); 1175 1176 // i32 load from lower address. 1177 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(), 1178 Nd.getAlignment(), Nd.getMemOperand()->getFlags()); 1179 1180 // i32 load from higher address. 1181 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1182 SDValue Hi = DAG.getLoad( 1183 MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(), 1184 std::min(Nd.getAlignment(), 4U), Nd.getMemOperand()->getFlags()); 1185 1186 if (!Subtarget.isLittle()) 1187 std::swap(Lo, Hi); 1188 1189 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1190 SDValue Ops[2] = {BP, Hi.getValue(1)}; 1191 return DAG.getMergeValues(Ops, DL); 1192 } 1193 1194 SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1195 StoreSDNode &Nd = *cast<StoreSDNode>(Op); 1196 1197 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1198 return MipsTargetLowering::lowerSTORE(Op, DAG); 1199 1200 // Replace a double precision store with two extractelement64s and i32 stores. 1201 SDLoc DL(Op); 1202 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1203 EVT PtrVT = Ptr.getValueType(); 1204 SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1205 Val, DAG.getConstant(0, DL, MVT::i32)); 1206 SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1207 Val, DAG.getConstant(1, DL, MVT::i32)); 1208 1209 if (!Subtarget.isLittle()) 1210 std::swap(Lo, Hi); 1211 1212 // i32 store to lower address. 1213 Chain = 1214 DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlignment(), 1215 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1216 1217 // i32 store to higher address. 1218 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1219 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), 1220 std::min(Nd.getAlignment(), 4U), 1221 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1222 } 1223 1224 SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, 1225 bool HasLo, bool HasHi, 1226 SelectionDAG &DAG) const { 1227 // MIPS32r6/MIPS64r6 removed accumulator based multiplies. 1228 assert(!Subtarget.hasMips32r6()); 1229 1230 EVT Ty = Op.getOperand(0).getValueType(); 1231 SDLoc DL(Op); 1232 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, 1233 Op.getOperand(0), Op.getOperand(1)); 1234 SDValue Lo, Hi; 1235 1236 if (HasLo) 1237 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult); 1238 if (HasHi) 1239 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult); 1240 1241 if (!HasLo || !HasHi) 1242 return HasLo ? Lo : Hi; 1243 1244 SDValue Vals[] = { Lo, Hi }; 1245 return DAG.getMergeValues(Vals, DL); 1246 } 1247 1248 static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) { 1249 SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1250 DAG.getConstant(0, DL, MVT::i32)); 1251 SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1252 DAG.getConstant(1, DL, MVT::i32)); 1253 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi); 1254 } 1255 1256 static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) { 1257 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op); 1258 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op); 1259 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1260 } 1261 1262 // This function expands mips intrinsic nodes which have 64-bit input operands 1263 // or output values. 1264 // 1265 // out64 = intrinsic-node in64 1266 // => 1267 // lo = copy (extract-element (in64, 0)) 1268 // hi = copy (extract-element (in64, 1)) 1269 // mips-specific-node 1270 // v0 = copy lo 1271 // v1 = copy hi 1272 // out64 = merge-values (v0, v1) 1273 // 1274 static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1275 SDLoc DL(Op); 1276 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; 1277 SmallVector<SDValue, 3> Ops; 1278 unsigned OpNo = 0; 1279 1280 // See if Op has a chain input. 1281 if (HasChainIn) 1282 Ops.push_back(Op->getOperand(OpNo++)); 1283 1284 // The next operand is the intrinsic opcode. 1285 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); 1286 1287 // See if the next operand has type i64. 1288 SDValue Opnd = Op->getOperand(++OpNo), In64; 1289 1290 if (Opnd.getValueType() == MVT::i64) 1291 In64 = initAccumulator(Opnd, DL, DAG); 1292 else 1293 Ops.push_back(Opnd); 1294 1295 // Push the remaining operands. 1296 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) 1297 Ops.push_back(Op->getOperand(OpNo)); 1298 1299 // Add In64 to the end of the list. 1300 if (In64.getNode()) 1301 Ops.push_back(In64); 1302 1303 // Scan output. 1304 SmallVector<EVT, 2> ResTys; 1305 1306 for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end(); 1307 I != E; ++I) 1308 ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I); 1309 1310 // Create node. 1311 SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops); 1312 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; 1313 1314 if (!HasChainIn) 1315 return Out; 1316 1317 assert(Val->getValueType(1) == MVT::Other); 1318 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; 1319 return DAG.getMergeValues(Vals, DL); 1320 } 1321 1322 // Lower an MSA copy intrinsic into the specified SelectionDAG node 1323 static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1324 SDLoc DL(Op); 1325 SDValue Vec = Op->getOperand(1); 1326 SDValue Idx = Op->getOperand(2); 1327 EVT ResTy = Op->getValueType(0); 1328 EVT EltTy = Vec->getValueType(0).getVectorElementType(); 1329 1330 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx, 1331 DAG.getValueType(EltTy)); 1332 1333 return Result; 1334 } 1335 1336 static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { 1337 EVT ResVecTy = Op->getValueType(0); 1338 EVT ViaVecTy = ResVecTy; 1339 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); 1340 SDLoc DL(Op); 1341 1342 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and 1343 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating 1344 // lanes. 1345 SDValue LaneA = Op->getOperand(OpNr); 1346 SDValue LaneB; 1347 1348 if (ResVecTy == MVT::v2i64) { 1349 // In case of the index being passed as an immediate value, set the upper 1350 // lane to 0 so that the splati.d instruction can be matched. 1351 if (isa<ConstantSDNode>(LaneA)) 1352 LaneB = DAG.getConstant(0, DL, MVT::i32); 1353 // Having the index passed in a register, set the upper lane to the same 1354 // value as the lower - this results in the BUILD_VECTOR node not being 1355 // expanded through stack. This way we are able to pattern match the set of 1356 // nodes created here to splat.d. 1357 else 1358 LaneB = LaneA; 1359 ViaVecTy = MVT::v4i32; 1360 if(BigEndian) 1361 std::swap(LaneA, LaneB); 1362 } else 1363 LaneB = LaneA; 1364 1365 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, 1366 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; 1367 1368 SDValue Result = DAG.getBuildVector( 1369 ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1370 1371 if (ViaVecTy != ResVecTy) { 1372 SDValue One = DAG.getConstant(1, DL, ViaVecTy); 1373 Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, 1374 DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One)); 1375 } 1376 1377 return Result; 1378 } 1379 1380 static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, 1381 bool IsSigned = false) { 1382 return DAG.getConstant( 1383 APInt(Op->getValueType(0).getScalarType().getSizeInBits(), 1384 Op->getConstantOperandVal(ImmOp), IsSigned), 1385 SDLoc(Op), Op->getValueType(0)); 1386 } 1387 1388 static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, 1389 bool BigEndian, SelectionDAG &DAG) { 1390 EVT ViaVecTy = VecTy; 1391 SDValue SplatValueA = SplatValue; 1392 SDValue SplatValueB = SplatValue; 1393 SDLoc DL(SplatValue); 1394 1395 if (VecTy == MVT::v2i64) { 1396 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. 1397 ViaVecTy = MVT::v4i32; 1398 1399 SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); 1400 SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, 1401 DAG.getConstant(32, DL, MVT::i32)); 1402 SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); 1403 } 1404 1405 // We currently hold the parts in little endian order. Swap them if 1406 // necessary. 1407 if (BigEndian) 1408 std::swap(SplatValueA, SplatValueB); 1409 1410 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1411 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1412 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1413 SplatValueA, SplatValueB, SplatValueA, SplatValueB }; 1414 1415 SDValue Result = DAG.getBuildVector( 1416 ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1417 1418 if (VecTy != ViaVecTy) 1419 Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); 1420 1421 return Result; 1422 } 1423 1424 static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, 1425 unsigned Opc, SDValue Imm, 1426 bool BigEndian) { 1427 EVT VecTy = Op->getValueType(0); 1428 SDValue Exp2Imm; 1429 SDLoc DL(Op); 1430 1431 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it 1432 // here for now. 1433 if (VecTy == MVT::v2i64) { 1434 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) { 1435 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue(); 1436 1437 SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL, 1438 MVT::i32); 1439 SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32); 1440 1441 if (BigEndian) 1442 std::swap(BitImmLoOp, BitImmHiOp); 1443 1444 Exp2Imm = DAG.getNode( 1445 ISD::BITCAST, DL, MVT::v2i64, 1446 DAG.getBuildVector(MVT::v4i32, DL, 1447 {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp})); 1448 } 1449 } 1450 1451 if (!Exp2Imm.getNode()) { 1452 // We couldnt constant fold, do a vector shift instead 1453 1454 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since 1455 // only values 0-63 are valid. 1456 if (VecTy == MVT::v2i64) 1457 Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm); 1458 1459 Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG); 1460 1461 Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy), 1462 Exp2Imm); 1463 } 1464 1465 return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); 1466 } 1467 1468 static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { 1469 SDLoc DL(Op); 1470 EVT ResTy = Op->getValueType(0); 1471 SDValue Vec = Op->getOperand(2); 1472 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); 1473 MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32; 1474 SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, 1475 DL, ResEltTy); 1476 SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG); 1477 1478 return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec); 1479 } 1480 1481 static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { 1482 EVT ResTy = Op->getValueType(0); 1483 SDLoc DL(Op); 1484 SDValue One = DAG.getConstant(1, DL, ResTy); 1485 SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG)); 1486 1487 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), 1488 DAG.getNOT(DL, Bit, ResTy)); 1489 } 1490 1491 static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { 1492 SDLoc DL(Op); 1493 EVT ResTy = Op->getValueType(0); 1494 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) 1495 << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue(); 1496 SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy); 1497 1498 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask); 1499 } 1500 1501 SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 1502 SelectionDAG &DAG) const { 1503 SDLoc DL(Op); 1504 unsigned Intrinsic = cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue(); 1505 switch (Intrinsic) { 1506 default: 1507 return SDValue(); 1508 case Intrinsic::mips_shilo: 1509 return lowerDSPIntr(Op, DAG, MipsISD::SHILO); 1510 case Intrinsic::mips_dpau_h_qbl: 1511 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); 1512 case Intrinsic::mips_dpau_h_qbr: 1513 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); 1514 case Intrinsic::mips_dpsu_h_qbl: 1515 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); 1516 case Intrinsic::mips_dpsu_h_qbr: 1517 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); 1518 case Intrinsic::mips_dpa_w_ph: 1519 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); 1520 case Intrinsic::mips_dps_w_ph: 1521 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); 1522 case Intrinsic::mips_dpax_w_ph: 1523 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); 1524 case Intrinsic::mips_dpsx_w_ph: 1525 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); 1526 case Intrinsic::mips_mulsa_w_ph: 1527 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); 1528 case Intrinsic::mips_mult: 1529 return lowerDSPIntr(Op, DAG, MipsISD::Mult); 1530 case Intrinsic::mips_multu: 1531 return lowerDSPIntr(Op, DAG, MipsISD::Multu); 1532 case Intrinsic::mips_madd: 1533 return lowerDSPIntr(Op, DAG, MipsISD::MAdd); 1534 case Intrinsic::mips_maddu: 1535 return lowerDSPIntr(Op, DAG, MipsISD::MAddu); 1536 case Intrinsic::mips_msub: 1537 return lowerDSPIntr(Op, DAG, MipsISD::MSub); 1538 case Intrinsic::mips_msubu: 1539 return lowerDSPIntr(Op, DAG, MipsISD::MSubu); 1540 case Intrinsic::mips_addv_b: 1541 case Intrinsic::mips_addv_h: 1542 case Intrinsic::mips_addv_w: 1543 case Intrinsic::mips_addv_d: 1544 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1545 Op->getOperand(2)); 1546 case Intrinsic::mips_addvi_b: 1547 case Intrinsic::mips_addvi_h: 1548 case Intrinsic::mips_addvi_w: 1549 case Intrinsic::mips_addvi_d: 1550 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1551 lowerMSASplatImm(Op, 2, DAG)); 1552 case Intrinsic::mips_and_v: 1553 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1554 Op->getOperand(2)); 1555 case Intrinsic::mips_andi_b: 1556 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1557 lowerMSASplatImm(Op, 2, DAG)); 1558 case Intrinsic::mips_bclr_b: 1559 case Intrinsic::mips_bclr_h: 1560 case Intrinsic::mips_bclr_w: 1561 case Intrinsic::mips_bclr_d: 1562 return lowerMSABitClear(Op, DAG); 1563 case Intrinsic::mips_bclri_b: 1564 case Intrinsic::mips_bclri_h: 1565 case Intrinsic::mips_bclri_w: 1566 case Intrinsic::mips_bclri_d: 1567 return lowerMSABitClearImm(Op, DAG); 1568 case Intrinsic::mips_binsli_b: 1569 case Intrinsic::mips_binsli_h: 1570 case Intrinsic::mips_binsli_w: 1571 case Intrinsic::mips_binsli_d: { 1572 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear) 1573 EVT VecTy = Op->getValueType(0); 1574 EVT EltTy = VecTy.getVectorElementType(); 1575 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1576 report_fatal_error("Immediate out of range"); 1577 APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), 1578 Op->getConstantOperandVal(3) + 1); 1579 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1580 DAG.getConstant(Mask, DL, VecTy, true), 1581 Op->getOperand(2), Op->getOperand(1)); 1582 } 1583 case Intrinsic::mips_binsri_b: 1584 case Intrinsic::mips_binsri_h: 1585 case Intrinsic::mips_binsri_w: 1586 case Intrinsic::mips_binsri_d: { 1587 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear) 1588 EVT VecTy = Op->getValueType(0); 1589 EVT EltTy = VecTy.getVectorElementType(); 1590 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1591 report_fatal_error("Immediate out of range"); 1592 APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), 1593 Op->getConstantOperandVal(3) + 1); 1594 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1595 DAG.getConstant(Mask, DL, VecTy, true), 1596 Op->getOperand(2), Op->getOperand(1)); 1597 } 1598 case Intrinsic::mips_bmnz_v: 1599 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1600 Op->getOperand(2), Op->getOperand(1)); 1601 case Intrinsic::mips_bmnzi_b: 1602 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1603 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2), 1604 Op->getOperand(1)); 1605 case Intrinsic::mips_bmz_v: 1606 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1607 Op->getOperand(1), Op->getOperand(2)); 1608 case Intrinsic::mips_bmzi_b: 1609 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1610 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1), 1611 Op->getOperand(2)); 1612 case Intrinsic::mips_bneg_b: 1613 case Intrinsic::mips_bneg_h: 1614 case Intrinsic::mips_bneg_w: 1615 case Intrinsic::mips_bneg_d: { 1616 EVT VecTy = Op->getValueType(0); 1617 SDValue One = DAG.getConstant(1, DL, VecTy); 1618 1619 return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), 1620 DAG.getNode(ISD::SHL, DL, VecTy, One, 1621 truncateVecElts(Op, DAG))); 1622 } 1623 case Intrinsic::mips_bnegi_b: 1624 case Intrinsic::mips_bnegi_h: 1625 case Intrinsic::mips_bnegi_w: 1626 case Intrinsic::mips_bnegi_d: 1627 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2), 1628 !Subtarget.isLittle()); 1629 case Intrinsic::mips_bnz_b: 1630 case Intrinsic::mips_bnz_h: 1631 case Intrinsic::mips_bnz_w: 1632 case Intrinsic::mips_bnz_d: 1633 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0), 1634 Op->getOperand(1)); 1635 case Intrinsic::mips_bnz_v: 1636 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0), 1637 Op->getOperand(1)); 1638 case Intrinsic::mips_bsel_v: 1639 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1640 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1641 Op->getOperand(1), Op->getOperand(3), 1642 Op->getOperand(2)); 1643 case Intrinsic::mips_bseli_b: 1644 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1645 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1646 Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG), 1647 Op->getOperand(2)); 1648 case Intrinsic::mips_bset_b: 1649 case Intrinsic::mips_bset_h: 1650 case Intrinsic::mips_bset_w: 1651 case Intrinsic::mips_bset_d: { 1652 EVT VecTy = Op->getValueType(0); 1653 SDValue One = DAG.getConstant(1, DL, VecTy); 1654 1655 return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), 1656 DAG.getNode(ISD::SHL, DL, VecTy, One, 1657 truncateVecElts(Op, DAG))); 1658 } 1659 case Intrinsic::mips_bseti_b: 1660 case Intrinsic::mips_bseti_h: 1661 case Intrinsic::mips_bseti_w: 1662 case Intrinsic::mips_bseti_d: 1663 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2), 1664 !Subtarget.isLittle()); 1665 case Intrinsic::mips_bz_b: 1666 case Intrinsic::mips_bz_h: 1667 case Intrinsic::mips_bz_w: 1668 case Intrinsic::mips_bz_d: 1669 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0), 1670 Op->getOperand(1)); 1671 case Intrinsic::mips_bz_v: 1672 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0), 1673 Op->getOperand(1)); 1674 case Intrinsic::mips_ceq_b: 1675 case Intrinsic::mips_ceq_h: 1676 case Intrinsic::mips_ceq_w: 1677 case Intrinsic::mips_ceq_d: 1678 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1679 Op->getOperand(2), ISD::SETEQ); 1680 case Intrinsic::mips_ceqi_b: 1681 case Intrinsic::mips_ceqi_h: 1682 case Intrinsic::mips_ceqi_w: 1683 case Intrinsic::mips_ceqi_d: 1684 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1685 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ); 1686 case Intrinsic::mips_cle_s_b: 1687 case Intrinsic::mips_cle_s_h: 1688 case Intrinsic::mips_cle_s_w: 1689 case Intrinsic::mips_cle_s_d: 1690 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1691 Op->getOperand(2), ISD::SETLE); 1692 case Intrinsic::mips_clei_s_b: 1693 case Intrinsic::mips_clei_s_h: 1694 case Intrinsic::mips_clei_s_w: 1695 case Intrinsic::mips_clei_s_d: 1696 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1697 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE); 1698 case Intrinsic::mips_cle_u_b: 1699 case Intrinsic::mips_cle_u_h: 1700 case Intrinsic::mips_cle_u_w: 1701 case Intrinsic::mips_cle_u_d: 1702 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1703 Op->getOperand(2), ISD::SETULE); 1704 case Intrinsic::mips_clei_u_b: 1705 case Intrinsic::mips_clei_u_h: 1706 case Intrinsic::mips_clei_u_w: 1707 case Intrinsic::mips_clei_u_d: 1708 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1709 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE); 1710 case Intrinsic::mips_clt_s_b: 1711 case Intrinsic::mips_clt_s_h: 1712 case Intrinsic::mips_clt_s_w: 1713 case Intrinsic::mips_clt_s_d: 1714 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1715 Op->getOperand(2), ISD::SETLT); 1716 case Intrinsic::mips_clti_s_b: 1717 case Intrinsic::mips_clti_s_h: 1718 case Intrinsic::mips_clti_s_w: 1719 case Intrinsic::mips_clti_s_d: 1720 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1721 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT); 1722 case Intrinsic::mips_clt_u_b: 1723 case Intrinsic::mips_clt_u_h: 1724 case Intrinsic::mips_clt_u_w: 1725 case Intrinsic::mips_clt_u_d: 1726 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1727 Op->getOperand(2), ISD::SETULT); 1728 case Intrinsic::mips_clti_u_b: 1729 case Intrinsic::mips_clti_u_h: 1730 case Intrinsic::mips_clti_u_w: 1731 case Intrinsic::mips_clti_u_d: 1732 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1733 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT); 1734 case Intrinsic::mips_copy_s_b: 1735 case Intrinsic::mips_copy_s_h: 1736 case Intrinsic::mips_copy_s_w: 1737 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1738 case Intrinsic::mips_copy_s_d: 1739 if (Subtarget.hasMips64()) 1740 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64. 1741 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1742 else { 1743 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1744 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1745 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1746 Op->getValueType(0), Op->getOperand(1), 1747 Op->getOperand(2)); 1748 } 1749 case Intrinsic::mips_copy_u_b: 1750 case Intrinsic::mips_copy_u_h: 1751 case Intrinsic::mips_copy_u_w: 1752 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1753 case Intrinsic::mips_copy_u_d: 1754 if (Subtarget.hasMips64()) 1755 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64. 1756 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1757 else { 1758 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1759 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1760 // Note: When i64 is illegal, this results in copy_s.w instructions 1761 // instead of copy_u.w instructions. This makes no difference to the 1762 // behaviour since i64 is only illegal when the register file is 32-bit. 1763 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1764 Op->getValueType(0), Op->getOperand(1), 1765 Op->getOperand(2)); 1766 } 1767 case Intrinsic::mips_div_s_b: 1768 case Intrinsic::mips_div_s_h: 1769 case Intrinsic::mips_div_s_w: 1770 case Intrinsic::mips_div_s_d: 1771 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), 1772 Op->getOperand(2)); 1773 case Intrinsic::mips_div_u_b: 1774 case Intrinsic::mips_div_u_h: 1775 case Intrinsic::mips_div_u_w: 1776 case Intrinsic::mips_div_u_d: 1777 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), 1778 Op->getOperand(2)); 1779 case Intrinsic::mips_fadd_w: 1780 case Intrinsic::mips_fadd_d: 1781 // TODO: If intrinsics have fast-math-flags, propagate them. 1782 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), 1783 Op->getOperand(2)); 1784 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away 1785 case Intrinsic::mips_fceq_w: 1786 case Intrinsic::mips_fceq_d: 1787 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1788 Op->getOperand(2), ISD::SETOEQ); 1789 case Intrinsic::mips_fcle_w: 1790 case Intrinsic::mips_fcle_d: 1791 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1792 Op->getOperand(2), ISD::SETOLE); 1793 case Intrinsic::mips_fclt_w: 1794 case Intrinsic::mips_fclt_d: 1795 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1796 Op->getOperand(2), ISD::SETOLT); 1797 case Intrinsic::mips_fcne_w: 1798 case Intrinsic::mips_fcne_d: 1799 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1800 Op->getOperand(2), ISD::SETONE); 1801 case Intrinsic::mips_fcor_w: 1802 case Intrinsic::mips_fcor_d: 1803 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1804 Op->getOperand(2), ISD::SETO); 1805 case Intrinsic::mips_fcueq_w: 1806 case Intrinsic::mips_fcueq_d: 1807 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1808 Op->getOperand(2), ISD::SETUEQ); 1809 case Intrinsic::mips_fcule_w: 1810 case Intrinsic::mips_fcule_d: 1811 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1812 Op->getOperand(2), ISD::SETULE); 1813 case Intrinsic::mips_fcult_w: 1814 case Intrinsic::mips_fcult_d: 1815 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1816 Op->getOperand(2), ISD::SETULT); 1817 case Intrinsic::mips_fcun_w: 1818 case Intrinsic::mips_fcun_d: 1819 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1820 Op->getOperand(2), ISD::SETUO); 1821 case Intrinsic::mips_fcune_w: 1822 case Intrinsic::mips_fcune_d: 1823 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1824 Op->getOperand(2), ISD::SETUNE); 1825 case Intrinsic::mips_fdiv_w: 1826 case Intrinsic::mips_fdiv_d: 1827 // TODO: If intrinsics have fast-math-flags, propagate them. 1828 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), 1829 Op->getOperand(2)); 1830 case Intrinsic::mips_ffint_u_w: 1831 case Intrinsic::mips_ffint_u_d: 1832 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), 1833 Op->getOperand(1)); 1834 case Intrinsic::mips_ffint_s_w: 1835 case Intrinsic::mips_ffint_s_d: 1836 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), 1837 Op->getOperand(1)); 1838 case Intrinsic::mips_fill_b: 1839 case Intrinsic::mips_fill_h: 1840 case Intrinsic::mips_fill_w: 1841 case Intrinsic::mips_fill_d: { 1842 EVT ResTy = Op->getValueType(0); 1843 SmallVector<SDValue, 16> Ops(ResTy.getVectorNumElements(), 1844 Op->getOperand(1)); 1845 1846 // If ResTy is v2i64 then the type legalizer will break this node down into 1847 // an equivalent v4i32. 1848 return DAG.getBuildVector(ResTy, DL, Ops); 1849 } 1850 case Intrinsic::mips_fexp2_w: 1851 case Intrinsic::mips_fexp2_d: { 1852 // TODO: If intrinsics have fast-math-flags, propagate them. 1853 EVT ResTy = Op->getValueType(0); 1854 return DAG.getNode( 1855 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1), 1856 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2))); 1857 } 1858 case Intrinsic::mips_flog2_w: 1859 case Intrinsic::mips_flog2_d: 1860 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1)); 1861 case Intrinsic::mips_fmadd_w: 1862 case Intrinsic::mips_fmadd_d: 1863 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0), 1864 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1865 case Intrinsic::mips_fmul_w: 1866 case Intrinsic::mips_fmul_d: 1867 // TODO: If intrinsics have fast-math-flags, propagate them. 1868 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), 1869 Op->getOperand(2)); 1870 case Intrinsic::mips_fmsub_w: 1871 case Intrinsic::mips_fmsub_d: { 1872 // TODO: If intrinsics have fast-math-flags, propagate them. 1873 return DAG.getNode(MipsISD::FMS, SDLoc(Op), Op->getValueType(0), 1874 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1875 } 1876 case Intrinsic::mips_frint_w: 1877 case Intrinsic::mips_frint_d: 1878 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); 1879 case Intrinsic::mips_fsqrt_w: 1880 case Intrinsic::mips_fsqrt_d: 1881 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); 1882 case Intrinsic::mips_fsub_w: 1883 case Intrinsic::mips_fsub_d: 1884 // TODO: If intrinsics have fast-math-flags, propagate them. 1885 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), 1886 Op->getOperand(2)); 1887 case Intrinsic::mips_ftrunc_u_w: 1888 case Intrinsic::mips_ftrunc_u_d: 1889 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), 1890 Op->getOperand(1)); 1891 case Intrinsic::mips_ftrunc_s_w: 1892 case Intrinsic::mips_ftrunc_s_d: 1893 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0), 1894 Op->getOperand(1)); 1895 case Intrinsic::mips_ilvev_b: 1896 case Intrinsic::mips_ilvev_h: 1897 case Intrinsic::mips_ilvev_w: 1898 case Intrinsic::mips_ilvev_d: 1899 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0), 1900 Op->getOperand(1), Op->getOperand(2)); 1901 case Intrinsic::mips_ilvl_b: 1902 case Intrinsic::mips_ilvl_h: 1903 case Intrinsic::mips_ilvl_w: 1904 case Intrinsic::mips_ilvl_d: 1905 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0), 1906 Op->getOperand(1), Op->getOperand(2)); 1907 case Intrinsic::mips_ilvod_b: 1908 case Intrinsic::mips_ilvod_h: 1909 case Intrinsic::mips_ilvod_w: 1910 case Intrinsic::mips_ilvod_d: 1911 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0), 1912 Op->getOperand(1), Op->getOperand(2)); 1913 case Intrinsic::mips_ilvr_b: 1914 case Intrinsic::mips_ilvr_h: 1915 case Intrinsic::mips_ilvr_w: 1916 case Intrinsic::mips_ilvr_d: 1917 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0), 1918 Op->getOperand(1), Op->getOperand(2)); 1919 case Intrinsic::mips_insert_b: 1920 case Intrinsic::mips_insert_h: 1921 case Intrinsic::mips_insert_w: 1922 case Intrinsic::mips_insert_d: 1923 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 1924 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2)); 1925 case Intrinsic::mips_insve_b: 1926 case Intrinsic::mips_insve_h: 1927 case Intrinsic::mips_insve_w: 1928 case Intrinsic::mips_insve_d: { 1929 // Report an error for out of range values. 1930 int64_t Max; 1931 switch (Intrinsic) { 1932 case Intrinsic::mips_insve_b: Max = 15; break; 1933 case Intrinsic::mips_insve_h: Max = 7; break; 1934 case Intrinsic::mips_insve_w: Max = 3; break; 1935 case Intrinsic::mips_insve_d: Max = 1; break; 1936 default: llvm_unreachable("Unmatched intrinsic"); 1937 } 1938 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 1939 if (Value < 0 || Value > Max) 1940 report_fatal_error("Immediate out of range"); 1941 return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0), 1942 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3), 1943 DAG.getConstant(0, DL, MVT::i32)); 1944 } 1945 case Intrinsic::mips_ldi_b: 1946 case Intrinsic::mips_ldi_h: 1947 case Intrinsic::mips_ldi_w: 1948 case Intrinsic::mips_ldi_d: 1949 return lowerMSASplatImm(Op, 1, DAG, true); 1950 case Intrinsic::mips_lsa: 1951 case Intrinsic::mips_dlsa: { 1952 EVT ResTy = Op->getValueType(0); 1953 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1954 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy, 1955 Op->getOperand(2), Op->getOperand(3))); 1956 } 1957 case Intrinsic::mips_maddv_b: 1958 case Intrinsic::mips_maddv_h: 1959 case Intrinsic::mips_maddv_w: 1960 case Intrinsic::mips_maddv_d: { 1961 EVT ResTy = Op->getValueType(0); 1962 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1963 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 1964 Op->getOperand(2), Op->getOperand(3))); 1965 } 1966 case Intrinsic::mips_max_s_b: 1967 case Intrinsic::mips_max_s_h: 1968 case Intrinsic::mips_max_s_w: 1969 case Intrinsic::mips_max_s_d: 1970 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), 1971 Op->getOperand(1), Op->getOperand(2)); 1972 case Intrinsic::mips_max_u_b: 1973 case Intrinsic::mips_max_u_h: 1974 case Intrinsic::mips_max_u_w: 1975 case Intrinsic::mips_max_u_d: 1976 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), 1977 Op->getOperand(1), Op->getOperand(2)); 1978 case Intrinsic::mips_maxi_s_b: 1979 case Intrinsic::mips_maxi_s_h: 1980 case Intrinsic::mips_maxi_s_w: 1981 case Intrinsic::mips_maxi_s_d: 1982 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), 1983 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 1984 case Intrinsic::mips_maxi_u_b: 1985 case Intrinsic::mips_maxi_u_h: 1986 case Intrinsic::mips_maxi_u_w: 1987 case Intrinsic::mips_maxi_u_d: 1988 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), 1989 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1990 case Intrinsic::mips_min_s_b: 1991 case Intrinsic::mips_min_s_h: 1992 case Intrinsic::mips_min_s_w: 1993 case Intrinsic::mips_min_s_d: 1994 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), 1995 Op->getOperand(1), Op->getOperand(2)); 1996 case Intrinsic::mips_min_u_b: 1997 case Intrinsic::mips_min_u_h: 1998 case Intrinsic::mips_min_u_w: 1999 case Intrinsic::mips_min_u_d: 2000 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), 2001 Op->getOperand(1), Op->getOperand(2)); 2002 case Intrinsic::mips_mini_s_b: 2003 case Intrinsic::mips_mini_s_h: 2004 case Intrinsic::mips_mini_s_w: 2005 case Intrinsic::mips_mini_s_d: 2006 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), 2007 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 2008 case Intrinsic::mips_mini_u_b: 2009 case Intrinsic::mips_mini_u_h: 2010 case Intrinsic::mips_mini_u_w: 2011 case Intrinsic::mips_mini_u_d: 2012 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), 2013 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2014 case Intrinsic::mips_mod_s_b: 2015 case Intrinsic::mips_mod_s_h: 2016 case Intrinsic::mips_mod_s_w: 2017 case Intrinsic::mips_mod_s_d: 2018 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), 2019 Op->getOperand(2)); 2020 case Intrinsic::mips_mod_u_b: 2021 case Intrinsic::mips_mod_u_h: 2022 case Intrinsic::mips_mod_u_w: 2023 case Intrinsic::mips_mod_u_d: 2024 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), 2025 Op->getOperand(2)); 2026 case Intrinsic::mips_mulv_b: 2027 case Intrinsic::mips_mulv_h: 2028 case Intrinsic::mips_mulv_w: 2029 case Intrinsic::mips_mulv_d: 2030 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), 2031 Op->getOperand(2)); 2032 case Intrinsic::mips_msubv_b: 2033 case Intrinsic::mips_msubv_h: 2034 case Intrinsic::mips_msubv_w: 2035 case Intrinsic::mips_msubv_d: { 2036 EVT ResTy = Op->getValueType(0); 2037 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), 2038 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 2039 Op->getOperand(2), Op->getOperand(3))); 2040 } 2041 case Intrinsic::mips_nlzc_b: 2042 case Intrinsic::mips_nlzc_h: 2043 case Intrinsic::mips_nlzc_w: 2044 case Intrinsic::mips_nlzc_d: 2045 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); 2046 case Intrinsic::mips_nor_v: { 2047 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2048 Op->getOperand(1), Op->getOperand(2)); 2049 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2050 } 2051 case Intrinsic::mips_nori_b: { 2052 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2053 Op->getOperand(1), 2054 lowerMSASplatImm(Op, 2, DAG)); 2055 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2056 } 2057 case Intrinsic::mips_or_v: 2058 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), 2059 Op->getOperand(2)); 2060 case Intrinsic::mips_ori_b: 2061 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2062 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2063 case Intrinsic::mips_pckev_b: 2064 case Intrinsic::mips_pckev_h: 2065 case Intrinsic::mips_pckev_w: 2066 case Intrinsic::mips_pckev_d: 2067 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0), 2068 Op->getOperand(1), Op->getOperand(2)); 2069 case Intrinsic::mips_pckod_b: 2070 case Intrinsic::mips_pckod_h: 2071 case Intrinsic::mips_pckod_w: 2072 case Intrinsic::mips_pckod_d: 2073 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0), 2074 Op->getOperand(1), Op->getOperand(2)); 2075 case Intrinsic::mips_pcnt_b: 2076 case Intrinsic::mips_pcnt_h: 2077 case Intrinsic::mips_pcnt_w: 2078 case Intrinsic::mips_pcnt_d: 2079 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); 2080 case Intrinsic::mips_sat_s_b: 2081 case Intrinsic::mips_sat_s_h: 2082 case Intrinsic::mips_sat_s_w: 2083 case Intrinsic::mips_sat_s_d: 2084 case Intrinsic::mips_sat_u_b: 2085 case Intrinsic::mips_sat_u_h: 2086 case Intrinsic::mips_sat_u_w: 2087 case Intrinsic::mips_sat_u_d: { 2088 // Report an error for out of range values. 2089 int64_t Max; 2090 switch (Intrinsic) { 2091 case Intrinsic::mips_sat_s_b: 2092 case Intrinsic::mips_sat_u_b: Max = 7; break; 2093 case Intrinsic::mips_sat_s_h: 2094 case Intrinsic::mips_sat_u_h: Max = 15; break; 2095 case Intrinsic::mips_sat_s_w: 2096 case Intrinsic::mips_sat_u_w: Max = 31; break; 2097 case Intrinsic::mips_sat_s_d: 2098 case Intrinsic::mips_sat_u_d: Max = 63; break; 2099 default: llvm_unreachable("Unmatched intrinsic"); 2100 } 2101 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2102 if (Value < 0 || Value > Max) 2103 report_fatal_error("Immediate out of range"); 2104 return SDValue(); 2105 } 2106 case Intrinsic::mips_shf_b: 2107 case Intrinsic::mips_shf_h: 2108 case Intrinsic::mips_shf_w: { 2109 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2110 if (Value < 0 || Value > 255) 2111 report_fatal_error("Immediate out of range"); 2112 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0), 2113 Op->getOperand(2), Op->getOperand(1)); 2114 } 2115 case Intrinsic::mips_sldi_b: 2116 case Intrinsic::mips_sldi_h: 2117 case Intrinsic::mips_sldi_w: 2118 case Intrinsic::mips_sldi_d: { 2119 // Report an error for out of range values. 2120 int64_t Max; 2121 switch (Intrinsic) { 2122 case Intrinsic::mips_sldi_b: Max = 15; break; 2123 case Intrinsic::mips_sldi_h: Max = 7; break; 2124 case Intrinsic::mips_sldi_w: Max = 3; break; 2125 case Intrinsic::mips_sldi_d: Max = 1; break; 2126 default: llvm_unreachable("Unmatched intrinsic"); 2127 } 2128 int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue(); 2129 if (Value < 0 || Value > Max) 2130 report_fatal_error("Immediate out of range"); 2131 return SDValue(); 2132 } 2133 case Intrinsic::mips_sll_b: 2134 case Intrinsic::mips_sll_h: 2135 case Intrinsic::mips_sll_w: 2136 case Intrinsic::mips_sll_d: 2137 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), 2138 truncateVecElts(Op, DAG)); 2139 case Intrinsic::mips_slli_b: 2140 case Intrinsic::mips_slli_h: 2141 case Intrinsic::mips_slli_w: 2142 case Intrinsic::mips_slli_d: 2143 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), 2144 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2145 case Intrinsic::mips_splat_b: 2146 case Intrinsic::mips_splat_h: 2147 case Intrinsic::mips_splat_w: 2148 case Intrinsic::mips_splat_d: 2149 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle 2150 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because 2151 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32. 2152 // Instead we lower to MipsISD::VSHF and match from there. 2153 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2154 lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1), 2155 Op->getOperand(1)); 2156 case Intrinsic::mips_splati_b: 2157 case Intrinsic::mips_splati_h: 2158 case Intrinsic::mips_splati_w: 2159 case Intrinsic::mips_splati_d: 2160 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2161 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), 2162 Op->getOperand(1)); 2163 case Intrinsic::mips_sra_b: 2164 case Intrinsic::mips_sra_h: 2165 case Intrinsic::mips_sra_w: 2166 case Intrinsic::mips_sra_d: 2167 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), 2168 truncateVecElts(Op, DAG)); 2169 case Intrinsic::mips_srai_b: 2170 case Intrinsic::mips_srai_h: 2171 case Intrinsic::mips_srai_w: 2172 case Intrinsic::mips_srai_d: 2173 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), 2174 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2175 case Intrinsic::mips_srari_b: 2176 case Intrinsic::mips_srari_h: 2177 case Intrinsic::mips_srari_w: 2178 case Intrinsic::mips_srari_d: { 2179 // Report an error for out of range values. 2180 int64_t Max; 2181 switch (Intrinsic) { 2182 case Intrinsic::mips_srari_b: Max = 7; break; 2183 case Intrinsic::mips_srari_h: Max = 15; break; 2184 case Intrinsic::mips_srari_w: Max = 31; break; 2185 case Intrinsic::mips_srari_d: Max = 63; break; 2186 default: llvm_unreachable("Unmatched intrinsic"); 2187 } 2188 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2189 if (Value < 0 || Value > Max) 2190 report_fatal_error("Immediate out of range"); 2191 return SDValue(); 2192 } 2193 case Intrinsic::mips_srl_b: 2194 case Intrinsic::mips_srl_h: 2195 case Intrinsic::mips_srl_w: 2196 case Intrinsic::mips_srl_d: 2197 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), 2198 truncateVecElts(Op, DAG)); 2199 case Intrinsic::mips_srli_b: 2200 case Intrinsic::mips_srli_h: 2201 case Intrinsic::mips_srli_w: 2202 case Intrinsic::mips_srli_d: 2203 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), 2204 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2205 case Intrinsic::mips_srlri_b: 2206 case Intrinsic::mips_srlri_h: 2207 case Intrinsic::mips_srlri_w: 2208 case Intrinsic::mips_srlri_d: { 2209 // Report an error for out of range values. 2210 int64_t Max; 2211 switch (Intrinsic) { 2212 case Intrinsic::mips_srlri_b: Max = 7; break; 2213 case Intrinsic::mips_srlri_h: Max = 15; break; 2214 case Intrinsic::mips_srlri_w: Max = 31; break; 2215 case Intrinsic::mips_srlri_d: Max = 63; break; 2216 default: llvm_unreachable("Unmatched intrinsic"); 2217 } 2218 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2219 if (Value < 0 || Value > Max) 2220 report_fatal_error("Immediate out of range"); 2221 return SDValue(); 2222 } 2223 case Intrinsic::mips_subv_b: 2224 case Intrinsic::mips_subv_h: 2225 case Intrinsic::mips_subv_w: 2226 case Intrinsic::mips_subv_d: 2227 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), 2228 Op->getOperand(2)); 2229 case Intrinsic::mips_subvi_b: 2230 case Intrinsic::mips_subvi_h: 2231 case Intrinsic::mips_subvi_w: 2232 case Intrinsic::mips_subvi_d: 2233 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), 2234 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2235 case Intrinsic::mips_vshf_b: 2236 case Intrinsic::mips_vshf_h: 2237 case Intrinsic::mips_vshf_w: 2238 case Intrinsic::mips_vshf_d: 2239 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2240 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 2241 case Intrinsic::mips_xor_v: 2242 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), 2243 Op->getOperand(2)); 2244 case Intrinsic::mips_xori_b: 2245 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), 2246 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2247 case Intrinsic::thread_pointer: { 2248 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2249 return DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT); 2250 } 2251 } 2252 } 2253 2254 static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2255 const MipsSubtarget &Subtarget) { 2256 SDLoc DL(Op); 2257 SDValue ChainIn = Op->getOperand(0); 2258 SDValue Address = Op->getOperand(2); 2259 SDValue Offset = Op->getOperand(3); 2260 EVT ResTy = Op->getValueType(0); 2261 EVT PtrTy = Address->getValueType(0); 2262 2263 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2264 // however takes an i32 signed constant offset. The actual type of the 2265 // intrinsic is a scaled signed i10. 2266 if (Subtarget.isABI_N64()) 2267 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2268 2269 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2270 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), 2271 /* Alignment = */ 16); 2272 } 2273 2274 SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 2275 SelectionDAG &DAG) const { 2276 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2277 switch (Intr) { 2278 default: 2279 return SDValue(); 2280 case Intrinsic::mips_extp: 2281 return lowerDSPIntr(Op, DAG, MipsISD::EXTP); 2282 case Intrinsic::mips_extpdp: 2283 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); 2284 case Intrinsic::mips_extr_w: 2285 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); 2286 case Intrinsic::mips_extr_r_w: 2287 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); 2288 case Intrinsic::mips_extr_rs_w: 2289 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); 2290 case Intrinsic::mips_extr_s_h: 2291 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); 2292 case Intrinsic::mips_mthlip: 2293 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); 2294 case Intrinsic::mips_mulsaq_s_w_ph: 2295 return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); 2296 case Intrinsic::mips_maq_s_w_phl: 2297 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); 2298 case Intrinsic::mips_maq_s_w_phr: 2299 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); 2300 case Intrinsic::mips_maq_sa_w_phl: 2301 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); 2302 case Intrinsic::mips_maq_sa_w_phr: 2303 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); 2304 case Intrinsic::mips_dpaq_s_w_ph: 2305 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); 2306 case Intrinsic::mips_dpsq_s_w_ph: 2307 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); 2308 case Intrinsic::mips_dpaq_sa_l_w: 2309 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); 2310 case Intrinsic::mips_dpsq_sa_l_w: 2311 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); 2312 case Intrinsic::mips_dpaqx_s_w_ph: 2313 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); 2314 case Intrinsic::mips_dpaqx_sa_w_ph: 2315 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); 2316 case Intrinsic::mips_dpsqx_s_w_ph: 2317 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); 2318 case Intrinsic::mips_dpsqx_sa_w_ph: 2319 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); 2320 case Intrinsic::mips_ld_b: 2321 case Intrinsic::mips_ld_h: 2322 case Intrinsic::mips_ld_w: 2323 case Intrinsic::mips_ld_d: 2324 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget); 2325 } 2326 } 2327 2328 static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2329 const MipsSubtarget &Subtarget) { 2330 SDLoc DL(Op); 2331 SDValue ChainIn = Op->getOperand(0); 2332 SDValue Value = Op->getOperand(2); 2333 SDValue Address = Op->getOperand(3); 2334 SDValue Offset = Op->getOperand(4); 2335 EVT PtrTy = Address->getValueType(0); 2336 2337 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2338 // however takes an i32 signed constant offset. The actual type of the 2339 // intrinsic is a scaled signed i10. 2340 if (Subtarget.isABI_N64()) 2341 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2342 2343 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2344 2345 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), 2346 /* Alignment = */ 16); 2347 } 2348 2349 SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, 2350 SelectionDAG &DAG) const { 2351 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2352 switch (Intr) { 2353 default: 2354 return SDValue(); 2355 case Intrinsic::mips_st_b: 2356 case Intrinsic::mips_st_h: 2357 case Intrinsic::mips_st_w: 2358 case Intrinsic::mips_st_d: 2359 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget); 2360 } 2361 } 2362 2363 /// Check if the given BuildVectorSDNode is a splat. 2364 /// This method currently relies on DAG nodes being reused when equivalent, 2365 /// so it's possible for this to return false even when isConstantSplat returns 2366 /// true. 2367 static bool isSplatVector(const BuildVectorSDNode *N) { 2368 unsigned int nOps = N->getNumOperands(); 2369 assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector"); 2370 2371 SDValue Operand0 = N->getOperand(0); 2372 2373 for (unsigned int i = 1; i < nOps; ++i) { 2374 if (N->getOperand(i) != Operand0) 2375 return false; 2376 } 2377 2378 return true; 2379 } 2380 2381 // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. 2382 // 2383 // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We 2384 // choose to sign-extend but we could have equally chosen zero-extend. The 2385 // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT 2386 // result into this node later (possibly changing it to a zero-extend in the 2387 // process). 2388 SDValue MipsSETargetLowering:: 2389 lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { 2390 SDLoc DL(Op); 2391 EVT ResTy = Op->getValueType(0); 2392 SDValue Op0 = Op->getOperand(0); 2393 EVT VecTy = Op0->getValueType(0); 2394 2395 if (!VecTy.is128BitVector()) 2396 return SDValue(); 2397 2398 if (ResTy.isInteger()) { 2399 SDValue Op1 = Op->getOperand(1); 2400 EVT EltTy = VecTy.getVectorElementType(); 2401 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, 2402 DAG.getValueType(EltTy)); 2403 } 2404 2405 return Op; 2406 } 2407 2408 static bool isConstantOrUndef(const SDValue Op) { 2409 if (Op->isUndef()) 2410 return true; 2411 if (isa<ConstantSDNode>(Op)) 2412 return true; 2413 if (isa<ConstantFPSDNode>(Op)) 2414 return true; 2415 return false; 2416 } 2417 2418 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 2419 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 2420 if (isConstantOrUndef(Op->getOperand(i))) 2421 return true; 2422 return false; 2423 } 2424 2425 // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the 2426 // backend. 2427 // 2428 // Lowers according to the following rules: 2429 // - Constant splats are legal as-is as long as the SplatBitSize is a power of 2430 // 2 less than or equal to 64 and the value fits into a signed 10-bit 2431 // immediate 2432 // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize 2433 // is a power of 2 less than or equal to 64 and the value does not fit into a 2434 // signed 10-bit immediate 2435 // - Non-constant splats are legal as-is. 2436 // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. 2437 // - All others are illegal and must be expanded. 2438 SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, 2439 SelectionDAG &DAG) const { 2440 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 2441 EVT ResTy = Op->getValueType(0); 2442 SDLoc DL(Op); 2443 APInt SplatValue, SplatUndef; 2444 unsigned SplatBitSize; 2445 bool HasAnyUndefs; 2446 2447 if (!Subtarget.hasMSA() || !ResTy.is128BitVector()) 2448 return SDValue(); 2449 2450 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 2451 HasAnyUndefs, 8, 2452 !Subtarget.isLittle()) && SplatBitSize <= 64) { 2453 // We can only cope with 8, 16, 32, or 64-bit elements 2454 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 2455 SplatBitSize != 64) 2456 return SDValue(); 2457 2458 // If the value isn't an integer type we will have to bitcast 2459 // from an integer type first. Also, if there are any undefs, we must 2460 // lower them to defined values first. 2461 if (ResTy.isInteger() && !HasAnyUndefs) 2462 return Op; 2463 2464 EVT ViaVecTy; 2465 2466 switch (SplatBitSize) { 2467 default: 2468 return SDValue(); 2469 case 8: 2470 ViaVecTy = MVT::v16i8; 2471 break; 2472 case 16: 2473 ViaVecTy = MVT::v8i16; 2474 break; 2475 case 32: 2476 ViaVecTy = MVT::v4i32; 2477 break; 2478 case 64: 2479 // There's no fill.d to fall back on for 64-bit values 2480 return SDValue(); 2481 } 2482 2483 // SelectionDAG::getConstant will promote SplatValue appropriately. 2484 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); 2485 2486 // Bitcast to the type we originally wanted 2487 if (ViaVecTy != ResTy) 2488 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 2489 2490 return Result; 2491 } else if (isSplatVector(Node)) 2492 return Op; 2493 else if (!isConstantOrUndefBUILD_VECTOR(Node)) { 2494 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 2495 // The resulting code is the same length as the expansion, but it doesn't 2496 // use memory operations 2497 EVT ResTy = Node->getValueType(0); 2498 2499 assert(ResTy.isVector()); 2500 2501 unsigned NumElts = ResTy.getVectorNumElements(); 2502 SDValue Vector = DAG.getUNDEF(ResTy); 2503 for (unsigned i = 0; i < NumElts; ++i) { 2504 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 2505 Node->getOperand(i), 2506 DAG.getConstant(i, DL, MVT::i32)); 2507 } 2508 return Vector; 2509 } 2510 2511 return SDValue(); 2512 } 2513 2514 // Lower VECTOR_SHUFFLE into SHF (if possible). 2515 // 2516 // SHF splits the vector into blocks of four elements, then shuffles these 2517 // elements according to a <4 x i2> constant (encoded as an integer immediate). 2518 // 2519 // It is therefore possible to lower into SHF when the mask takes the form: 2520 // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> 2521 // When undef's appear they are treated as if they were whatever value is 2522 // necessary in order to fit the above forms. 2523 // 2524 // For example: 2525 // %2 = shufflevector <8 x i16> %0, <8 x i16> undef, 2526 // <8 x i32> <i32 3, i32 2, i32 1, i32 0, 2527 // i32 7, i32 6, i32 5, i32 4> 2528 // is lowered to: 2529 // (SHF_H $w0, $w1, 27) 2530 // where the 27 comes from: 2531 // 3 + (2 << 2) + (1 << 4) + (0 << 6) 2532 static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, 2533 SmallVector<int, 16> Indices, 2534 SelectionDAG &DAG) { 2535 int SHFIndices[4] = { -1, -1, -1, -1 }; 2536 2537 if (Indices.size() < 4) 2538 return SDValue(); 2539 2540 for (unsigned i = 0; i < 4; ++i) { 2541 for (unsigned j = i; j < Indices.size(); j += 4) { 2542 int Idx = Indices[j]; 2543 2544 // Convert from vector index to 4-element subvector index 2545 // If an index refers to an element outside of the subvector then give up 2546 if (Idx != -1) { 2547 Idx -= 4 * (j / 4); 2548 if (Idx < 0 || Idx >= 4) 2549 return SDValue(); 2550 } 2551 2552 // If the mask has an undef, replace it with the current index. 2553 // Note that it might still be undef if the current index is also undef 2554 if (SHFIndices[i] == -1) 2555 SHFIndices[i] = Idx; 2556 2557 // Check that non-undef values are the same as in the mask. If they 2558 // aren't then give up 2559 if (!(Idx == -1 || Idx == SHFIndices[i])) 2560 return SDValue(); 2561 } 2562 } 2563 2564 // Calculate the immediate. Replace any remaining undefs with zero 2565 APInt Imm(32, 0); 2566 for (int i = 3; i >= 0; --i) { 2567 int Idx = SHFIndices[i]; 2568 2569 if (Idx == -1) 2570 Idx = 0; 2571 2572 Imm <<= 2; 2573 Imm |= Idx & 0x3; 2574 } 2575 2576 SDLoc DL(Op); 2577 return DAG.getNode(MipsISD::SHF, DL, ResTy, 2578 DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0)); 2579 } 2580 2581 /// Determine whether a range fits a regular pattern of values. 2582 /// This function accounts for the possibility of jumping over the End iterator. 2583 template <typename ValType> 2584 static bool 2585 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, 2586 unsigned CheckStride, 2587 typename SmallVectorImpl<ValType>::const_iterator End, 2588 ValType ExpectedIndex, unsigned ExpectedIndexStride) { 2589 auto &I = Begin; 2590 2591 while (I != End) { 2592 if (*I != -1 && *I != ExpectedIndex) 2593 return false; 2594 ExpectedIndex += ExpectedIndexStride; 2595 2596 // Incrementing past End is undefined behaviour so we must increment one 2597 // step at a time and check for End at each step. 2598 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) 2599 ; // Empty loop body. 2600 } 2601 return true; 2602 } 2603 2604 // Determine whether VECTOR_SHUFFLE is a SPLATI. 2605 // 2606 // It is a SPLATI when the mask is: 2607 // <x, x, x, ...> 2608 // where x is any valid index. 2609 // 2610 // When undef's appear in the mask they are treated as if they were whatever 2611 // value is necessary in order to fit the above form. 2612 static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, 2613 SmallVector<int, 16> Indices, 2614 SelectionDAG &DAG) { 2615 assert((Indices.size() % 2) == 0); 2616 2617 int SplatIndex = -1; 2618 for (const auto &V : Indices) { 2619 if (V != -1) { 2620 SplatIndex = V; 2621 break; 2622 } 2623 } 2624 2625 return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex, 2626 0); 2627 } 2628 2629 // Lower VECTOR_SHUFFLE into ILVEV (if possible). 2630 // 2631 // ILVEV interleaves the even elements from each vector. 2632 // 2633 // It is possible to lower into ILVEV when the mask consists of two of the 2634 // following forms interleaved: 2635 // <0, 2, 4, ...> 2636 // <n, n+2, n+4, ...> 2637 // where n is the number of elements in the vector. 2638 // For example: 2639 // <0, 0, 2, 2, 4, 4, ...> 2640 // <0, n, 2, n+2, 4, n+4, ...> 2641 // 2642 // When undef's appear in the mask they are treated as if they were whatever 2643 // value is necessary in order to fit the above forms. 2644 static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, 2645 SmallVector<int, 16> Indices, 2646 SelectionDAG &DAG) { 2647 assert((Indices.size() % 2) == 0); 2648 2649 SDValue Wt; 2650 SDValue Ws; 2651 const auto &Begin = Indices.begin(); 2652 const auto &End = Indices.end(); 2653 2654 // Check even elements are taken from the even elements of one half or the 2655 // other and pick an operand accordingly. 2656 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2)) 2657 Wt = Op->getOperand(0); 2658 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2)) 2659 Wt = Op->getOperand(1); 2660 else 2661 return SDValue(); 2662 2663 // Check odd elements are taken from the even elements of one half or the 2664 // other and pick an operand accordingly. 2665 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2)) 2666 Ws = Op->getOperand(0); 2667 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2)) 2668 Ws = Op->getOperand(1); 2669 else 2670 return SDValue(); 2671 2672 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt); 2673 } 2674 2675 // Lower VECTOR_SHUFFLE into ILVOD (if possible). 2676 // 2677 // ILVOD interleaves the odd elements from each vector. 2678 // 2679 // It is possible to lower into ILVOD when the mask consists of two of the 2680 // following forms interleaved: 2681 // <1, 3, 5, ...> 2682 // <n+1, n+3, n+5, ...> 2683 // where n is the number of elements in the vector. 2684 // For example: 2685 // <1, 1, 3, 3, 5, 5, ...> 2686 // <1, n+1, 3, n+3, 5, n+5, ...> 2687 // 2688 // When undef's appear in the mask they are treated as if they were whatever 2689 // value is necessary in order to fit the above forms. 2690 static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, 2691 SmallVector<int, 16> Indices, 2692 SelectionDAG &DAG) { 2693 assert((Indices.size() % 2) == 0); 2694 2695 SDValue Wt; 2696 SDValue Ws; 2697 const auto &Begin = Indices.begin(); 2698 const auto &End = Indices.end(); 2699 2700 // Check even elements are taken from the odd elements of one half or the 2701 // other and pick an operand accordingly. 2702 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2)) 2703 Wt = Op->getOperand(0); 2704 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2)) 2705 Wt = Op->getOperand(1); 2706 else 2707 return SDValue(); 2708 2709 // Check odd elements are taken from the odd elements of one half or the 2710 // other and pick an operand accordingly. 2711 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2)) 2712 Ws = Op->getOperand(0); 2713 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2)) 2714 Ws = Op->getOperand(1); 2715 else 2716 return SDValue(); 2717 2718 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Wt, Ws); 2719 } 2720 2721 // Lower VECTOR_SHUFFLE into ILVR (if possible). 2722 // 2723 // ILVR interleaves consecutive elements from the right (lowest-indexed) half of 2724 // each vector. 2725 // 2726 // It is possible to lower into ILVR when the mask consists of two of the 2727 // following forms interleaved: 2728 // <0, 1, 2, ...> 2729 // <n, n+1, n+2, ...> 2730 // where n is the number of elements in the vector. 2731 // For example: 2732 // <0, 0, 1, 1, 2, 2, ...> 2733 // <0, n, 1, n+1, 2, n+2, ...> 2734 // 2735 // When undef's appear in the mask they are treated as if they were whatever 2736 // value is necessary in order to fit the above forms. 2737 static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, 2738 SmallVector<int, 16> Indices, 2739 SelectionDAG &DAG) { 2740 assert((Indices.size() % 2) == 0); 2741 2742 SDValue Wt; 2743 SDValue Ws; 2744 const auto &Begin = Indices.begin(); 2745 const auto &End = Indices.end(); 2746 2747 // Check even elements are taken from the right (lowest-indexed) elements of 2748 // one half or the other and pick an operand accordingly. 2749 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1)) 2750 Wt = Op->getOperand(0); 2751 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1)) 2752 Wt = Op->getOperand(1); 2753 else 2754 return SDValue(); 2755 2756 // Check odd elements are taken from the right (lowest-indexed) elements of 2757 // one half or the other and pick an operand accordingly. 2758 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1)) 2759 Ws = Op->getOperand(0); 2760 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1)) 2761 Ws = Op->getOperand(1); 2762 else 2763 return SDValue(); 2764 2765 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt); 2766 } 2767 2768 // Lower VECTOR_SHUFFLE into ILVL (if possible). 2769 // 2770 // ILVL interleaves consecutive elements from the left (highest-indexed) half 2771 // of each vector. 2772 // 2773 // It is possible to lower into ILVL when the mask consists of two of the 2774 // following forms interleaved: 2775 // <x, x+1, x+2, ...> 2776 // <n+x, n+x+1, n+x+2, ...> 2777 // where n is the number of elements in the vector and x is half n. 2778 // For example: 2779 // <x, x, x+1, x+1, x+2, x+2, ...> 2780 // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> 2781 // 2782 // When undef's appear in the mask they are treated as if they were whatever 2783 // value is necessary in order to fit the above forms. 2784 static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, 2785 SmallVector<int, 16> Indices, 2786 SelectionDAG &DAG) { 2787 assert((Indices.size() % 2) == 0); 2788 2789 unsigned HalfSize = Indices.size() / 2; 2790 SDValue Wt; 2791 SDValue Ws; 2792 const auto &Begin = Indices.begin(); 2793 const auto &End = Indices.end(); 2794 2795 // Check even elements are taken from the left (highest-indexed) elements of 2796 // one half or the other and pick an operand accordingly. 2797 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1)) 2798 Wt = Op->getOperand(0); 2799 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1)) 2800 Wt = Op->getOperand(1); 2801 else 2802 return SDValue(); 2803 2804 // Check odd elements are taken from the left (highest-indexed) elements of 2805 // one half or the other and pick an operand accordingly. 2806 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1)) 2807 Ws = Op->getOperand(0); 2808 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize, 2809 1)) 2810 Ws = Op->getOperand(1); 2811 else 2812 return SDValue(); 2813 2814 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt); 2815 } 2816 2817 // Lower VECTOR_SHUFFLE into PCKEV (if possible). 2818 // 2819 // PCKEV copies the even elements of each vector into the result vector. 2820 // 2821 // It is possible to lower into PCKEV when the mask consists of two of the 2822 // following forms concatenated: 2823 // <0, 2, 4, ...> 2824 // <n, n+2, n+4, ...> 2825 // where n is the number of elements in the vector. 2826 // For example: 2827 // <0, 2, 4, ..., 0, 2, 4, ...> 2828 // <0, 2, 4, ..., n, n+2, n+4, ...> 2829 // 2830 // When undef's appear in the mask they are treated as if they were whatever 2831 // value is necessary in order to fit the above forms. 2832 static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, 2833 SmallVector<int, 16> Indices, 2834 SelectionDAG &DAG) { 2835 assert((Indices.size() % 2) == 0); 2836 2837 SDValue Wt; 2838 SDValue Ws; 2839 const auto &Begin = Indices.begin(); 2840 const auto &Mid = Indices.begin() + Indices.size() / 2; 2841 const auto &End = Indices.end(); 2842 2843 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2)) 2844 Wt = Op->getOperand(0); 2845 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2)) 2846 Wt = Op->getOperand(1); 2847 else 2848 return SDValue(); 2849 2850 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2)) 2851 Ws = Op->getOperand(0); 2852 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2)) 2853 Ws = Op->getOperand(1); 2854 else 2855 return SDValue(); 2856 2857 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt); 2858 } 2859 2860 // Lower VECTOR_SHUFFLE into PCKOD (if possible). 2861 // 2862 // PCKOD copies the odd elements of each vector into the result vector. 2863 // 2864 // It is possible to lower into PCKOD when the mask consists of two of the 2865 // following forms concatenated: 2866 // <1, 3, 5, ...> 2867 // <n+1, n+3, n+5, ...> 2868 // where n is the number of elements in the vector. 2869 // For example: 2870 // <1, 3, 5, ..., 1, 3, 5, ...> 2871 // <1, 3, 5, ..., n+1, n+3, n+5, ...> 2872 // 2873 // When undef's appear in the mask they are treated as if they were whatever 2874 // value is necessary in order to fit the above forms. 2875 static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, 2876 SmallVector<int, 16> Indices, 2877 SelectionDAG &DAG) { 2878 assert((Indices.size() % 2) == 0); 2879 2880 SDValue Wt; 2881 SDValue Ws; 2882 const auto &Begin = Indices.begin(); 2883 const auto &Mid = Indices.begin() + Indices.size() / 2; 2884 const auto &End = Indices.end(); 2885 2886 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2)) 2887 Wt = Op->getOperand(0); 2888 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2)) 2889 Wt = Op->getOperand(1); 2890 else 2891 return SDValue(); 2892 2893 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2)) 2894 Ws = Op->getOperand(0); 2895 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2)) 2896 Ws = Op->getOperand(1); 2897 else 2898 return SDValue(); 2899 2900 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt); 2901 } 2902 2903 // Lower VECTOR_SHUFFLE into VSHF. 2904 // 2905 // This mostly consists of converting the shuffle indices in Indices into a 2906 // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is 2907 // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, 2908 // if the type is v8i16 and all the indices are less than 8 then the second 2909 // operand is unused and can be replaced with anything. We choose to replace it 2910 // with the used operand since this reduces the number of instructions overall. 2911 static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, 2912 SmallVector<int, 16> Indices, 2913 SelectionDAG &DAG) { 2914 SmallVector<SDValue, 16> Ops; 2915 SDValue Op0; 2916 SDValue Op1; 2917 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); 2918 EVT MaskEltTy = MaskVecTy.getVectorElementType(); 2919 bool Using1stVec = false; 2920 bool Using2ndVec = false; 2921 SDLoc DL(Op); 2922 int ResTyNumElts = ResTy.getVectorNumElements(); 2923 2924 for (int i = 0; i < ResTyNumElts; ++i) { 2925 // Idx == -1 means UNDEF 2926 int Idx = Indices[i]; 2927 2928 if (0 <= Idx && Idx < ResTyNumElts) 2929 Using1stVec = true; 2930 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) 2931 Using2ndVec = true; 2932 } 2933 2934 for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end(); 2935 ++I) 2936 Ops.push_back(DAG.getTargetConstant(*I, DL, MaskEltTy)); 2937 2938 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); 2939 2940 if (Using1stVec && Using2ndVec) { 2941 Op0 = Op->getOperand(0); 2942 Op1 = Op->getOperand(1); 2943 } else if (Using1stVec) 2944 Op0 = Op1 = Op->getOperand(0); 2945 else if (Using2ndVec) 2946 Op0 = Op1 = Op->getOperand(1); 2947 else 2948 llvm_unreachable("shuffle vector mask references neither vector operand?"); 2949 2950 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. 2951 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> 2952 // VSHF concatenates the vectors in a bitwise fashion: 2953 // <0b00, 0b01> + <0b10, 0b11> -> 2954 // 0b0100 + 0b1110 -> 0b01001110 2955 // <0b10, 0b11, 0b00, 0b01> 2956 // We must therefore swap the operands to get the correct result. 2957 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0); 2958 } 2959 2960 // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the 2961 // indices in the shuffle. 2962 SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 2963 SelectionDAG &DAG) const { 2964 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op); 2965 EVT ResTy = Op->getValueType(0); 2966 2967 if (!ResTy.is128BitVector()) 2968 return SDValue(); 2969 2970 int ResTyNumElts = ResTy.getVectorNumElements(); 2971 SmallVector<int, 16> Indices; 2972 2973 for (int i = 0; i < ResTyNumElts; ++i) 2974 Indices.push_back(Node->getMaskElt(i)); 2975 2976 // splati.[bhwd] is preferable to the others but is matched from 2977 // MipsISD::VSHF. 2978 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG)) 2979 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 2980 SDValue Result; 2981 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG))) 2982 return Result; 2983 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG))) 2984 return Result; 2985 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG))) 2986 return Result; 2987 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG))) 2988 return Result; 2989 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG))) 2990 return Result; 2991 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG))) 2992 return Result; 2993 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG))) 2994 return Result; 2995 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 2996 } 2997 2998 MachineBasicBlock * 2999 MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI, 3000 MachineBasicBlock *BB) const { 3001 // $bb: 3002 // bposge32_pseudo $vr0 3003 // => 3004 // $bb: 3005 // bposge32 $tbb 3006 // $fbb: 3007 // li $vr2, 0 3008 // b $sink 3009 // $tbb: 3010 // li $vr1, 1 3011 // $sink: 3012 // $vr0 = phi($vr2, $fbb, $vr1, $tbb) 3013 3014 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3015 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3016 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3017 DebugLoc DL = MI.getDebugLoc(); 3018 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3019 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3020 MachineFunction *F = BB->getParent(); 3021 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3022 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3023 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3024 F->insert(It, FBB); 3025 F->insert(It, TBB); 3026 F->insert(It, Sink); 3027 3028 // Transfer the remainder of BB and its successor edges to Sink. 3029 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3030 BB->end()); 3031 Sink->transferSuccessorsAndUpdatePHIs(BB); 3032 3033 // Add successors. 3034 BB->addSuccessor(FBB); 3035 BB->addSuccessor(TBB); 3036 FBB->addSuccessor(Sink); 3037 TBB->addSuccessor(Sink); 3038 3039 // Insert the real bposge32 instruction to $BB. 3040 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); 3041 // Insert the real bposge32c instruction to $BB. 3042 BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB); 3043 3044 // Fill $FBB. 3045 unsigned VR2 = RegInfo.createVirtualRegister(RC); 3046 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) 3047 .addReg(Mips::ZERO).addImm(0); 3048 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3049 3050 // Fill $TBB. 3051 unsigned VR1 = RegInfo.createVirtualRegister(RC); 3052 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) 3053 .addReg(Mips::ZERO).addImm(1); 3054 3055 // Insert phi function to $Sink. 3056 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3057 MI.getOperand(0).getReg()) 3058 .addReg(VR2) 3059 .addMBB(FBB) 3060 .addReg(VR1) 3061 .addMBB(TBB); 3062 3063 MI.eraseFromParent(); // The pseudo instruction is gone now. 3064 return Sink; 3065 } 3066 3067 MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo( 3068 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { 3069 // $bb: 3070 // vany_nonzero $rd, $ws 3071 // => 3072 // $bb: 3073 // bnz.b $ws, $tbb 3074 // b $fbb 3075 // $fbb: 3076 // li $rd1, 0 3077 // b $sink 3078 // $tbb: 3079 // li $rd2, 1 3080 // $sink: 3081 // $rd = phi($rd1, $fbb, $rd2, $tbb) 3082 3083 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3084 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3085 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3086 DebugLoc DL = MI.getDebugLoc(); 3087 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3088 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3089 MachineFunction *F = BB->getParent(); 3090 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3091 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3092 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3093 F->insert(It, FBB); 3094 F->insert(It, TBB); 3095 F->insert(It, Sink); 3096 3097 // Transfer the remainder of BB and its successor edges to Sink. 3098 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3099 BB->end()); 3100 Sink->transferSuccessorsAndUpdatePHIs(BB); 3101 3102 // Add successors. 3103 BB->addSuccessor(FBB); 3104 BB->addSuccessor(TBB); 3105 FBB->addSuccessor(Sink); 3106 TBB->addSuccessor(Sink); 3107 3108 // Insert the real bnz.b instruction to $BB. 3109 BuildMI(BB, DL, TII->get(BranchOp)) 3110 .addReg(MI.getOperand(1).getReg()) 3111 .addMBB(TBB); 3112 3113 // Fill $FBB. 3114 unsigned RD1 = RegInfo.createVirtualRegister(RC); 3115 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) 3116 .addReg(Mips::ZERO).addImm(0); 3117 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3118 3119 // Fill $TBB. 3120 unsigned RD2 = RegInfo.createVirtualRegister(RC); 3121 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) 3122 .addReg(Mips::ZERO).addImm(1); 3123 3124 // Insert phi function to $Sink. 3125 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3126 MI.getOperand(0).getReg()) 3127 .addReg(RD1) 3128 .addMBB(FBB) 3129 .addReg(RD2) 3130 .addMBB(TBB); 3131 3132 MI.eraseFromParent(); // The pseudo instruction is gone now. 3133 return Sink; 3134 } 3135 3136 // Emit the COPY_FW pseudo instruction. 3137 // 3138 // copy_fw_pseudo $fd, $ws, n 3139 // => 3140 // copy_u_w $rt, $ws, $n 3141 // mtc1 $rt, $fd 3142 // 3143 // When n is zero, the equivalent operation can be performed with (potentially) 3144 // zero instructions due to register overlaps. This optimization is never valid 3145 // for lane 1 because it would require FR=0 mode which isn't supported by MSA. 3146 MachineBasicBlock * 3147 MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI, 3148 MachineBasicBlock *BB) const { 3149 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3150 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3151 DebugLoc DL = MI.getDebugLoc(); 3152 unsigned Fd = MI.getOperand(0).getReg(); 3153 unsigned Ws = MI.getOperand(1).getReg(); 3154 unsigned Lane = MI.getOperand(2).getImm(); 3155 3156 if (Lane == 0) { 3157 unsigned Wt = Ws; 3158 if (!Subtarget.useOddSPReg()) { 3159 // We must copy to an even-numbered MSA register so that the 3160 // single-precision sub-register is also guaranteed to be even-numbered. 3161 Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass); 3162 3163 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws); 3164 } 3165 3166 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3167 } else { 3168 unsigned Wt = RegInfo.createVirtualRegister( 3169 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : 3170 &Mips::MSA128WEvensRegClass); 3171 3172 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); 3173 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3174 } 3175 3176 MI.eraseFromParent(); // The pseudo instruction is gone now. 3177 return BB; 3178 } 3179 3180 // Emit the COPY_FD pseudo instruction. 3181 // 3182 // copy_fd_pseudo $fd, $ws, n 3183 // => 3184 // splati.d $wt, $ws, $n 3185 // copy $fd, $wt:sub_64 3186 // 3187 // When n is zero, the equivalent operation can be performed with (potentially) 3188 // zero instructions due to register overlaps. This optimization is always 3189 // valid because FR=1 mode which is the only supported mode in MSA. 3190 MachineBasicBlock * 3191 MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI, 3192 MachineBasicBlock *BB) const { 3193 assert(Subtarget.isFP64bit()); 3194 3195 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3196 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3197 unsigned Fd = MI.getOperand(0).getReg(); 3198 unsigned Ws = MI.getOperand(1).getReg(); 3199 unsigned Lane = MI.getOperand(2).getImm() * 2; 3200 DebugLoc DL = MI.getDebugLoc(); 3201 3202 if (Lane == 0) 3203 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64); 3204 else { 3205 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3206 3207 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1); 3208 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64); 3209 } 3210 3211 MI.eraseFromParent(); // The pseudo instruction is gone now. 3212 return BB; 3213 } 3214 3215 // Emit the INSERT_FW pseudo instruction. 3216 // 3217 // insert_fw_pseudo $wd, $wd_in, $n, $fs 3218 // => 3219 // subreg_to_reg $wt:sub_lo, $fs 3220 // insve_w $wd[$n], $wd_in, $wt[0] 3221 MachineBasicBlock * 3222 MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI, 3223 MachineBasicBlock *BB) const { 3224 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3225 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3226 DebugLoc DL = MI.getDebugLoc(); 3227 unsigned Wd = MI.getOperand(0).getReg(); 3228 unsigned Wd_in = MI.getOperand(1).getReg(); 3229 unsigned Lane = MI.getOperand(2).getImm(); 3230 unsigned Fs = MI.getOperand(3).getReg(); 3231 unsigned Wt = RegInfo.createVirtualRegister( 3232 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : 3233 &Mips::MSA128WEvensRegClass); 3234 3235 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3236 .addImm(0) 3237 .addReg(Fs) 3238 .addImm(Mips::sub_lo); 3239 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd) 3240 .addReg(Wd_in) 3241 .addImm(Lane) 3242 .addReg(Wt) 3243 .addImm(0); 3244 3245 MI.eraseFromParent(); // The pseudo instruction is gone now. 3246 return BB; 3247 } 3248 3249 // Emit the INSERT_FD pseudo instruction. 3250 // 3251 // insert_fd_pseudo $wd, $fs, n 3252 // => 3253 // subreg_to_reg $wt:sub_64, $fs 3254 // insve_d $wd[$n], $wd_in, $wt[0] 3255 MachineBasicBlock * 3256 MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI, 3257 MachineBasicBlock *BB) const { 3258 assert(Subtarget.isFP64bit()); 3259 3260 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3261 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3262 DebugLoc DL = MI.getDebugLoc(); 3263 unsigned Wd = MI.getOperand(0).getReg(); 3264 unsigned Wd_in = MI.getOperand(1).getReg(); 3265 unsigned Lane = MI.getOperand(2).getImm(); 3266 unsigned Fs = MI.getOperand(3).getReg(); 3267 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3268 3269 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3270 .addImm(0) 3271 .addReg(Fs) 3272 .addImm(Mips::sub_64); 3273 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd) 3274 .addReg(Wd_in) 3275 .addImm(Lane) 3276 .addReg(Wt) 3277 .addImm(0); 3278 3279 MI.eraseFromParent(); // The pseudo instruction is gone now. 3280 return BB; 3281 } 3282 3283 // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. 3284 // 3285 // For integer: 3286 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) 3287 // => 3288 // (SLL $lanetmp1, $lane, <log2size) 3289 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3290 // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs) 3291 // (NEG $lanetmp2, $lanetmp1) 3292 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3293 // 3294 // For floating point: 3295 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs) 3296 // => 3297 // (SUBREG_TO_REG $wt, $fs, <subreg>) 3298 // (SLL $lanetmp1, $lane, <log2size) 3299 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3300 // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0) 3301 // (NEG $lanetmp2, $lanetmp1) 3302 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3303 MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( 3304 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes, 3305 bool IsFP) const { 3306 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3307 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3308 DebugLoc DL = MI.getDebugLoc(); 3309 unsigned Wd = MI.getOperand(0).getReg(); 3310 unsigned SrcVecReg = MI.getOperand(1).getReg(); 3311 unsigned LaneReg = MI.getOperand(2).getReg(); 3312 unsigned SrcValReg = MI.getOperand(3).getReg(); 3313 3314 const TargetRegisterClass *VecRC = nullptr; 3315 // FIXME: This should be true for N32 too. 3316 const TargetRegisterClass *GPRRC = 3317 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3318 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0; 3319 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL; 3320 unsigned EltLog2Size; 3321 unsigned InsertOp = 0; 3322 unsigned InsveOp = 0; 3323 switch (EltSizeInBytes) { 3324 default: 3325 llvm_unreachable("Unexpected size"); 3326 case 1: 3327 EltLog2Size = 0; 3328 InsertOp = Mips::INSERT_B; 3329 InsveOp = Mips::INSVE_B; 3330 VecRC = &Mips::MSA128BRegClass; 3331 break; 3332 case 2: 3333 EltLog2Size = 1; 3334 InsertOp = Mips::INSERT_H; 3335 InsveOp = Mips::INSVE_H; 3336 VecRC = &Mips::MSA128HRegClass; 3337 break; 3338 case 4: 3339 EltLog2Size = 2; 3340 InsertOp = Mips::INSERT_W; 3341 InsveOp = Mips::INSVE_W; 3342 VecRC = &Mips::MSA128WRegClass; 3343 break; 3344 case 8: 3345 EltLog2Size = 3; 3346 InsertOp = Mips::INSERT_D; 3347 InsveOp = Mips::INSVE_D; 3348 VecRC = &Mips::MSA128DRegClass; 3349 break; 3350 } 3351 3352 if (IsFP) { 3353 unsigned Wt = RegInfo.createVirtualRegister(VecRC); 3354 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3355 .addImm(0) 3356 .addReg(SrcValReg) 3357 .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo); 3358 SrcValReg = Wt; 3359 } 3360 3361 // Convert the lane index into a byte index 3362 if (EltSizeInBytes != 1) { 3363 unsigned LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); 3364 BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1) 3365 .addReg(LaneReg) 3366 .addImm(EltLog2Size); 3367 LaneReg = LaneTmp1; 3368 } 3369 3370 // Rotate bytes around so that the desired lane is element zero 3371 unsigned WdTmp1 = RegInfo.createVirtualRegister(VecRC); 3372 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1) 3373 .addReg(SrcVecReg) 3374 .addReg(SrcVecReg) 3375 .addReg(LaneReg, 0, SubRegIdx); 3376 3377 unsigned WdTmp2 = RegInfo.createVirtualRegister(VecRC); 3378 if (IsFP) { 3379 // Use insve.df to insert to element zero 3380 BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2) 3381 .addReg(WdTmp1) 3382 .addImm(0) 3383 .addReg(SrcValReg) 3384 .addImm(0); 3385 } else { 3386 // Use insert.df to insert to element zero 3387 BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2) 3388 .addReg(WdTmp1) 3389 .addReg(SrcValReg) 3390 .addImm(0); 3391 } 3392 3393 // Rotate elements the rest of the way for a full rotation. 3394 // sld.df inteprets $rt modulo the number of columns so we only need to negate 3395 // the lane index to do this. 3396 unsigned LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); 3397 BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB), 3398 LaneTmp2) 3399 .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO) 3400 .addReg(LaneReg); 3401 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd) 3402 .addReg(WdTmp2) 3403 .addReg(WdTmp2) 3404 .addReg(LaneTmp2, 0, SubRegIdx); 3405 3406 MI.eraseFromParent(); // The pseudo instruction is gone now. 3407 return BB; 3408 } 3409 3410 // Emit the FILL_FW pseudo instruction. 3411 // 3412 // fill_fw_pseudo $wd, $fs 3413 // => 3414 // implicit_def $wt1 3415 // insert_subreg $wt2:subreg_lo, $wt1, $fs 3416 // splati.w $wd, $wt2[0] 3417 MachineBasicBlock * 3418 MipsSETargetLowering::emitFILL_FW(MachineInstr &MI, 3419 MachineBasicBlock *BB) const { 3420 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3421 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3422 DebugLoc DL = MI.getDebugLoc(); 3423 unsigned Wd = MI.getOperand(0).getReg(); 3424 unsigned Fs = MI.getOperand(1).getReg(); 3425 unsigned Wt1 = RegInfo.createVirtualRegister( 3426 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3427 : &Mips::MSA128WEvensRegClass); 3428 unsigned Wt2 = RegInfo.createVirtualRegister( 3429 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3430 : &Mips::MSA128WEvensRegClass); 3431 3432 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3433 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3434 .addReg(Wt1) 3435 .addReg(Fs) 3436 .addImm(Mips::sub_lo); 3437 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0); 3438 3439 MI.eraseFromParent(); // The pseudo instruction is gone now. 3440 return BB; 3441 } 3442 3443 // Emit the FILL_FD pseudo instruction. 3444 // 3445 // fill_fd_pseudo $wd, $fs 3446 // => 3447 // implicit_def $wt1 3448 // insert_subreg $wt2:subreg_64, $wt1, $fs 3449 // splati.d $wd, $wt2[0] 3450 MachineBasicBlock * 3451 MipsSETargetLowering::emitFILL_FD(MachineInstr &MI, 3452 MachineBasicBlock *BB) const { 3453 assert(Subtarget.isFP64bit()); 3454 3455 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3456 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3457 DebugLoc DL = MI.getDebugLoc(); 3458 unsigned Wd = MI.getOperand(0).getReg(); 3459 unsigned Fs = MI.getOperand(1).getReg(); 3460 unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3461 unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3462 3463 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3464 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3465 .addReg(Wt1) 3466 .addReg(Fs) 3467 .addImm(Mips::sub_64); 3468 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0); 3469 3470 MI.eraseFromParent(); // The pseudo instruction is gone now. 3471 return BB; 3472 } 3473 3474 // Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA 3475 // register. 3476 // 3477 // STF16 MSA128F16:$wd, mem_simm10:$addr 3478 // => 3479 // copy_u.h $rtemp,$wd[0] 3480 // sh $rtemp, $addr 3481 // 3482 // Safety: We can't use st.h & co as they would over write the memory after 3483 // the destination. It would require half floats be allocated 16 bytes(!) of 3484 // space. 3485 MachineBasicBlock * 3486 MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, 3487 MachineBasicBlock *BB) const { 3488 3489 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3490 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3491 DebugLoc DL = MI.getDebugLoc(); 3492 unsigned Ws = MI.getOperand(0).getReg(); 3493 unsigned Rt = MI.getOperand(1).getReg(); 3494 const MachineMemOperand &MMO = **MI.memoperands_begin(); 3495 unsigned Imm = MMO.getOffset(); 3496 3497 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3498 // spill and reload can expand as a GPR64 operand. Examine the 3499 // operand in detail and default to ABI. 3500 const TargetRegisterClass *RC = 3501 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3502 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3503 : &Mips::GPR64RegClass); 3504 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3505 unsigned Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); 3506 3507 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0); 3508 if(!UsingMips32) { 3509 unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); 3510 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp) 3511 .addImm(0) 3512 .addReg(Rs) 3513 .addImm(Mips::sub_32); 3514 Rs = Tmp; 3515 } 3516 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64)) 3517 .addReg(Rs) 3518 .addReg(Rt) 3519 .addImm(Imm) 3520 .addMemOperand(BB->getParent()->getMachineMemOperand( 3521 &MMO, MMO.getOffset(), MMO.getSize())); 3522 3523 MI.eraseFromParent(); 3524 return BB; 3525 } 3526 3527 // Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register. 3528 // 3529 // LD_F16 MSA128F16:$wd, mem_simm10:$addr 3530 // => 3531 // lh $rtemp, $addr 3532 // fill.h $wd, $rtemp 3533 // 3534 // Safety: We can't use ld.h & co as they over-read from the source. 3535 // Additionally, if the address is not modulo 16, 2 cases can occur: 3536 // a) Segmentation fault as the load instruction reads from a memory page 3537 // memory it's not supposed to. 3538 // b) The load crosses an implementation specific boundary, requiring OS 3539 // intervention. 3540 MachineBasicBlock * 3541 MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, 3542 MachineBasicBlock *BB) const { 3543 3544 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3545 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3546 DebugLoc DL = MI.getDebugLoc(); 3547 unsigned Wd = MI.getOperand(0).getReg(); 3548 3549 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3550 // spill and reload can expand as a GPR64 operand. Examine the 3551 // operand in detail and default to ABI. 3552 const TargetRegisterClass *RC = 3553 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3554 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3555 : &Mips::GPR64RegClass); 3556 3557 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3558 unsigned Rt = RegInfo.createVirtualRegister(RC); 3559 3560 MachineInstrBuilder MIB = 3561 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt); 3562 for (unsigned i = 1; i < MI.getNumOperands(); i++) 3563 MIB.add(MI.getOperand(i)); 3564 3565 if(!UsingMips32) { 3566 unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); 3567 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32); 3568 Rt = Tmp; 3569 } 3570 3571 BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt); 3572 3573 MI.eraseFromParent(); 3574 return BB; 3575 } 3576 3577 // Emit the FPROUND_PSEUDO instruction. 3578 // 3579 // Round an FGR64Opnd, FGR32Opnd to an f16. 3580 // 3581 // Safety: Cycle the operand through the GPRs so the result always ends up 3582 // the correct MSA register. 3583 // 3584 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs 3585 // / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register 3586 // (which they can be, as the MSA registers are defined to alias the 3587 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3588 // the correct register class. That requires operands be tie-able across 3589 // register classes which have a sub/super register class relationship. 3590 // 3591 // For FPG32Opnd: 3592 // 3593 // FPROUND MSA128F16:$wd, FGR32Opnd:$fs 3594 // => 3595 // mfc1 $rtemp, $fs 3596 // fill.w $rtemp, $wtemp 3597 // fexdo.w $wd, $wtemp, $wtemp 3598 // 3599 // For FPG64Opnd on mips32r2+: 3600 // 3601 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3602 // => 3603 // mfc1 $rtemp, $fs 3604 // fill.w $rtemp, $wtemp 3605 // mfhc1 $rtemp2, $fs 3606 // insert.w $wtemp[1], $rtemp2 3607 // insert.w $wtemp[3], $rtemp2 3608 // fexdo.w $wtemp2, $wtemp, $wtemp 3609 // fexdo.h $wd, $temp2, $temp2 3610 // 3611 // For FGR64Opnd on mips64r2+: 3612 // 3613 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3614 // => 3615 // dmfc1 $rtemp, $fs 3616 // fill.d $rtemp, $wtemp 3617 // fexdo.w $wtemp2, $wtemp, $wtemp 3618 // fexdo.h $wd, $wtemp2, $wtemp2 3619 // 3620 // Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the 3621 // undef bits are "just right" and the exception enable bits are 3622 // set. By using fill.w to replicate $fs into all elements over 3623 // insert.w for one element, we avoid that potiential case. If 3624 // fexdo.[hw] causes an exception in, the exception is valid and it 3625 // occurs for all elements. 3626 MachineBasicBlock * 3627 MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, 3628 MachineBasicBlock *BB, 3629 bool IsFGR64) const { 3630 3631 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3632 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3633 // it. 3634 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3635 3636 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3637 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; 3638 3639 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3640 DebugLoc DL = MI.getDebugLoc(); 3641 unsigned Wd = MI.getOperand(0).getReg(); 3642 unsigned Fs = MI.getOperand(1).getReg(); 3643 3644 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3645 unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3646 const TargetRegisterClass *GPRRC = 3647 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3648 unsigned MFC1Opc = IsFGR64onMips64 3649 ? Mips::DMFC1 3650 : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1); 3651 unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; 3652 3653 // Perform the register class copy as mentioned above. 3654 unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC); 3655 BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs); 3656 BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp); 3657 unsigned WPHI = Wtemp; 3658 3659 if (IsFGR64onMips32) { 3660 unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3661 BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs); 3662 unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3663 unsigned Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3664 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2) 3665 .addReg(Wtemp) 3666 .addReg(Rtemp2) 3667 .addImm(1); 3668 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3) 3669 .addReg(Wtemp2) 3670 .addReg(Rtemp2) 3671 .addImm(3); 3672 WPHI = Wtemp3; 3673 } 3674 3675 if (IsFGR64) { 3676 unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3677 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2) 3678 .addReg(WPHI) 3679 .addReg(WPHI); 3680 WPHI = Wtemp2; 3681 } 3682 3683 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI); 3684 3685 MI.eraseFromParent(); 3686 return BB; 3687 } 3688 3689 // Emit the FPEXTEND_PSEUDO instruction. 3690 // 3691 // Expand an f16 to either a FGR32Opnd or FGR64Opnd. 3692 // 3693 // Safety: Cycle the result through the GPRs so the result always ends up 3694 // the correct floating point register. 3695 // 3696 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd 3697 // / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register 3698 // (which they can be, as the MSA registers are defined to alias the 3699 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3700 // the correct register class. That requires operands be tie-able across 3701 // register classes which have a sub/super register class relationship. I 3702 // haven't checked. 3703 // 3704 // For FGR32Opnd: 3705 // 3706 // FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws 3707 // => 3708 // fexupr.w $wtemp, $ws 3709 // copy_s.w $rtemp, $ws[0] 3710 // mtc1 $rtemp, $fd 3711 // 3712 // For FGR64Opnd on Mips64: 3713 // 3714 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3715 // => 3716 // fexupr.w $wtemp, $ws 3717 // fexupr.d $wtemp2, $wtemp 3718 // copy_s.d $rtemp, $wtemp2s[0] 3719 // dmtc1 $rtemp, $fd 3720 // 3721 // For FGR64Opnd on Mips32: 3722 // 3723 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3724 // => 3725 // fexupr.w $wtemp, $ws 3726 // fexupr.d $wtemp2, $wtemp 3727 // copy_s.w $rtemp, $wtemp2[0] 3728 // mtc1 $rtemp, $ftemp 3729 // copy_s.w $rtemp2, $wtemp2[1] 3730 // $fd = mthc1 $rtemp2, $ftemp 3731 MachineBasicBlock * 3732 MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI, 3733 MachineBasicBlock *BB, 3734 bool IsFGR64) const { 3735 3736 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3737 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3738 // it. 3739 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3740 3741 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3742 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; 3743 3744 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3745 DebugLoc DL = MI.getDebugLoc(); 3746 unsigned Fd = MI.getOperand(0).getReg(); 3747 unsigned Ws = MI.getOperand(1).getReg(); 3748 3749 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3750 const TargetRegisterClass *GPRRC = 3751 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3752 unsigned MTC1Opc = IsFGR64onMips64 3753 ? Mips::DMTC1 3754 : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1); 3755 unsigned COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W; 3756 3757 unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3758 unsigned WPHI = Wtemp; 3759 3760 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws); 3761 if (IsFGR64) { 3762 WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3763 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp); 3764 } 3765 3766 // Perform the safety regclass copy mentioned above. 3767 unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC); 3768 unsigned FPRPHI = IsFGR64onMips32 3769 ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass) 3770 : Fd; 3771 BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0); 3772 BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp); 3773 3774 if (IsFGR64onMips32) { 3775 unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3776 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2) 3777 .addReg(WPHI) 3778 .addImm(1); 3779 BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd) 3780 .addReg(FPRPHI) 3781 .addReg(Rtemp2); 3782 } 3783 3784 MI.eraseFromParent(); 3785 return BB; 3786 } 3787 3788 // Emit the FEXP2_W_1 pseudo instructions. 3789 // 3790 // fexp2_w_1_pseudo $wd, $wt 3791 // => 3792 // ldi.w $ws, 1 3793 // fexp2.w $wd, $ws, $wt 3794 MachineBasicBlock * 3795 MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI, 3796 MachineBasicBlock *BB) const { 3797 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3798 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3799 const TargetRegisterClass *RC = &Mips::MSA128WRegClass; 3800 unsigned Ws1 = RegInfo.createVirtualRegister(RC); 3801 unsigned Ws2 = RegInfo.createVirtualRegister(RC); 3802 DebugLoc DL = MI.getDebugLoc(); 3803 3804 // Splat 1.0 into a vector 3805 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1); 3806 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1); 3807 3808 // Emit 1.0 * fexp2(Wt) 3809 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI.getOperand(0).getReg()) 3810 .addReg(Ws2) 3811 .addReg(MI.getOperand(1).getReg()); 3812 3813 MI.eraseFromParent(); // The pseudo instruction is gone now. 3814 return BB; 3815 } 3816 3817 // Emit the FEXP2_D_1 pseudo instructions. 3818 // 3819 // fexp2_d_1_pseudo $wd, $wt 3820 // => 3821 // ldi.d $ws, 1 3822 // fexp2.d $wd, $ws, $wt 3823 MachineBasicBlock * 3824 MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI, 3825 MachineBasicBlock *BB) const { 3826 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3827 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3828 const TargetRegisterClass *RC = &Mips::MSA128DRegClass; 3829 unsigned Ws1 = RegInfo.createVirtualRegister(RC); 3830 unsigned Ws2 = RegInfo.createVirtualRegister(RC); 3831 DebugLoc DL = MI.getDebugLoc(); 3832 3833 // Splat 1.0 into a vector 3834 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1); 3835 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1); 3836 3837 // Emit 1.0 * fexp2(Wt) 3838 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI.getOperand(0).getReg()) 3839 .addReg(Ws2) 3840 .addReg(MI.getOperand(1).getReg()); 3841 3842 MI.eraseFromParent(); // The pseudo instruction is gone now. 3843 return BB; 3844 } 3845