1 //===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Subclass of MipsTargetLowering specialized for mips32/64. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "MipsSEISelLowering.h" 14 #include "MipsMachineFunction.h" 15 #include "MipsRegisterInfo.h" 16 #include "MipsTargetMachine.h" 17 #include "llvm/ADT/APInt.h" 18 #include "llvm/CodeGen/MachineInstrBuilder.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/IR/Intrinsics.h" 21 #include "llvm/Support/CommandLine.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/ErrorHandling.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include "llvm/Target/TargetInstrInfo.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "mips-isel" 30 31 static cl::opt<bool> 32 UseMipsTailCalls("mips-tail-calls", cl::Hidden, 33 cl::desc("MIPS: permit tail calls."), cl::init(false)); 34 35 static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), 36 cl::desc("Expand double precision loads and " 37 "stores to their single precision " 38 "counterparts")); 39 40 MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, 41 const MipsSubtarget &STI) 42 : MipsTargetLowering(TM, STI) { 43 // Set up the register classes 44 addRegisterClass(MVT::i32, &Mips::GPR32RegClass); 45 46 if (Subtarget.isGP64bit()) 47 addRegisterClass(MVT::i64, &Mips::GPR64RegClass); 48 49 if (Subtarget.hasDSP() || Subtarget.hasMSA()) { 50 // Expand all truncating stores and extending loads. 51 for (MVT VT0 : MVT::vector_valuetypes()) { 52 for (MVT VT1 : MVT::vector_valuetypes()) { 53 setTruncStoreAction(VT0, VT1, Expand); 54 setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); 55 setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); 56 setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); 57 } 58 } 59 } 60 61 if (Subtarget.hasDSP()) { 62 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; 63 64 for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { 65 addRegisterClass(VecTys[i], &Mips::DSPRRegClass); 66 67 // Expand all builtin opcodes. 68 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 69 setOperationAction(Opc, VecTys[i], Expand); 70 71 setOperationAction(ISD::ADD, VecTys[i], Legal); 72 setOperationAction(ISD::SUB, VecTys[i], Legal); 73 setOperationAction(ISD::LOAD, VecTys[i], Legal); 74 setOperationAction(ISD::STORE, VecTys[i], Legal); 75 setOperationAction(ISD::BITCAST, VecTys[i], Legal); 76 } 77 78 setTargetDAGCombine(ISD::SHL); 79 setTargetDAGCombine(ISD::SRA); 80 setTargetDAGCombine(ISD::SRL); 81 setTargetDAGCombine(ISD::SETCC); 82 setTargetDAGCombine(ISD::VSELECT); 83 } 84 85 if (Subtarget.hasDSPR2()) 86 setOperationAction(ISD::MUL, MVT::v2i16, Legal); 87 88 if (Subtarget.hasMSA()) { 89 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); 90 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); 91 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); 92 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass); 93 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass); 94 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); 95 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); 96 97 // f16 is a storage-only type, always promote it to f32. 98 addRegisterClass(MVT::f16, &Mips::MSA128HRegClass); 99 setOperationAction(ISD::SETCC, MVT::f16, Promote); 100 setOperationAction(ISD::BR_CC, MVT::f16, Promote); 101 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); 102 setOperationAction(ISD::SELECT, MVT::f16, Promote); 103 setOperationAction(ISD::FADD, MVT::f16, Promote); 104 setOperationAction(ISD::FSUB, MVT::f16, Promote); 105 setOperationAction(ISD::FMUL, MVT::f16, Promote); 106 setOperationAction(ISD::FDIV, MVT::f16, Promote); 107 setOperationAction(ISD::FREM, MVT::f16, Promote); 108 setOperationAction(ISD::FMA, MVT::f16, Promote); 109 setOperationAction(ISD::FNEG, MVT::f16, Promote); 110 setOperationAction(ISD::FABS, MVT::f16, Promote); 111 setOperationAction(ISD::FCEIL, MVT::f16, Promote); 112 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); 113 setOperationAction(ISD::FCOS, MVT::f16, Promote); 114 setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); 115 setOperationAction(ISD::FFLOOR, MVT::f16, Promote); 116 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); 117 setOperationAction(ISD::FPOW, MVT::f16, Promote); 118 setOperationAction(ISD::FPOWI, MVT::f16, Promote); 119 setOperationAction(ISD::FRINT, MVT::f16, Promote); 120 setOperationAction(ISD::FSIN, MVT::f16, Promote); 121 setOperationAction(ISD::FSINCOS, MVT::f16, Promote); 122 setOperationAction(ISD::FSQRT, MVT::f16, Promote); 123 setOperationAction(ISD::FEXP, MVT::f16, Promote); 124 setOperationAction(ISD::FEXP2, MVT::f16, Promote); 125 setOperationAction(ISD::FLOG, MVT::f16, Promote); 126 setOperationAction(ISD::FLOG2, MVT::f16, Promote); 127 setOperationAction(ISD::FLOG10, MVT::f16, Promote); 128 setOperationAction(ISD::FROUND, MVT::f16, Promote); 129 setOperationAction(ISD::FTRUNC, MVT::f16, Promote); 130 setOperationAction(ISD::FMINNUM, MVT::f16, Promote); 131 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); 132 setOperationAction(ISD::FMINNAN, MVT::f16, Promote); 133 setOperationAction(ISD::FMAXNAN, MVT::f16, Promote); 134 135 setTargetDAGCombine(ISD::AND); 136 setTargetDAGCombine(ISD::OR); 137 setTargetDAGCombine(ISD::SRA); 138 setTargetDAGCombine(ISD::VSELECT); 139 setTargetDAGCombine(ISD::XOR); 140 } 141 142 if (!Subtarget.useSoftFloat()) { 143 addRegisterClass(MVT::f32, &Mips::FGR32RegClass); 144 145 // When dealing with single precision only, use libcalls 146 if (!Subtarget.isSingleFloat()) { 147 if (Subtarget.isFP64bit()) 148 addRegisterClass(MVT::f64, &Mips::FGR64RegClass); 149 else 150 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); 151 } 152 } 153 154 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); 155 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); 156 setOperationAction(ISD::MULHS, MVT::i32, Custom); 157 setOperationAction(ISD::MULHU, MVT::i32, Custom); 158 159 if (Subtarget.hasCnMips()) 160 setOperationAction(ISD::MUL, MVT::i64, Legal); 161 else if (Subtarget.isGP64bit()) 162 setOperationAction(ISD::MUL, MVT::i64, Custom); 163 164 if (Subtarget.isGP64bit()) { 165 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); 166 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); 167 setOperationAction(ISD::MULHS, MVT::i64, Custom); 168 setOperationAction(ISD::MULHU, MVT::i64, Custom); 169 setOperationAction(ISD::SDIVREM, MVT::i64, Custom); 170 setOperationAction(ISD::UDIVREM, MVT::i64, Custom); 171 } 172 173 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 174 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 175 176 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 177 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 178 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 179 setOperationAction(ISD::LOAD, MVT::i32, Custom); 180 setOperationAction(ISD::STORE, MVT::i32, Custom); 181 182 setTargetDAGCombine(ISD::ADDE); 183 setTargetDAGCombine(ISD::SUBE); 184 setTargetDAGCombine(ISD::MUL); 185 186 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 187 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 188 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 189 190 if (NoDPLoadStore) { 191 setOperationAction(ISD::LOAD, MVT::f64, Custom); 192 setOperationAction(ISD::STORE, MVT::f64, Custom); 193 } 194 195 if (Subtarget.hasMips32r6()) { 196 // MIPS32r6 replaces the accumulator-based multiplies with a three register 197 // instruction 198 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 199 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 200 setOperationAction(ISD::MUL, MVT::i32, Legal); 201 setOperationAction(ISD::MULHS, MVT::i32, Legal); 202 setOperationAction(ISD::MULHU, MVT::i32, Legal); 203 204 // MIPS32r6 replaces the accumulator-based division/remainder with separate 205 // three register division and remainder instructions. 206 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 207 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 208 setOperationAction(ISD::SDIV, MVT::i32, Legal); 209 setOperationAction(ISD::UDIV, MVT::i32, Legal); 210 setOperationAction(ISD::SREM, MVT::i32, Legal); 211 setOperationAction(ISD::UREM, MVT::i32, Legal); 212 213 // MIPS32r6 replaces conditional moves with an equivalent that removes the 214 // need for three GPR read ports. 215 setOperationAction(ISD::SETCC, MVT::i32, Legal); 216 setOperationAction(ISD::SELECT, MVT::i32, Legal); 217 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 218 219 setOperationAction(ISD::SETCC, MVT::f32, Legal); 220 setOperationAction(ISD::SELECT, MVT::f32, Legal); 221 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 222 223 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6"); 224 setOperationAction(ISD::SETCC, MVT::f64, Legal); 225 setOperationAction(ISD::SELECT, MVT::f64, Legal); 226 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 227 228 setOperationAction(ISD::BRCOND, MVT::Other, Legal); 229 230 // Floating point > and >= are supported via < and <= 231 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 232 setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); 233 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); 234 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 235 236 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 237 setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); 238 setCondCodeAction(ISD::SETUGE, MVT::f64, Expand); 239 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 240 } 241 242 if (Subtarget.hasMips64r6()) { 243 // MIPS64r6 replaces the accumulator-based multiplies with a three register 244 // instruction 245 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 246 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 247 setOperationAction(ISD::MUL, MVT::i64, Legal); 248 setOperationAction(ISD::MULHS, MVT::i64, Legal); 249 setOperationAction(ISD::MULHU, MVT::i64, Legal); 250 251 // MIPS32r6 replaces the accumulator-based division/remainder with separate 252 // three register division and remainder instructions. 253 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 254 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 255 setOperationAction(ISD::SDIV, MVT::i64, Legal); 256 setOperationAction(ISD::UDIV, MVT::i64, Legal); 257 setOperationAction(ISD::SREM, MVT::i64, Legal); 258 setOperationAction(ISD::UREM, MVT::i64, Legal); 259 260 // MIPS64r6 replaces conditional moves with an equivalent that removes the 261 // need for three GPR read ports. 262 setOperationAction(ISD::SETCC, MVT::i64, Legal); 263 setOperationAction(ISD::SELECT, MVT::i64, Legal); 264 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 265 } 266 267 computeRegisterProperties(Subtarget.getRegisterInfo()); 268 } 269 270 const MipsTargetLowering * 271 llvm::createMipsSETargetLowering(const MipsTargetMachine &TM, 272 const MipsSubtarget &STI) { 273 return new MipsSETargetLowering(TM, STI); 274 } 275 276 const TargetRegisterClass * 277 MipsSETargetLowering::getRepRegClassFor(MVT VT) const { 278 if (VT == MVT::Untyped) 279 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass; 280 281 return TargetLowering::getRepRegClassFor(VT); 282 } 283 284 // Enable MSA support for the given integer type and Register class. 285 void MipsSETargetLowering:: 286 addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 287 addRegisterClass(Ty, RC); 288 289 // Expand all builtin opcodes. 290 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 291 setOperationAction(Opc, Ty, Expand); 292 293 setOperationAction(ISD::BITCAST, Ty, Legal); 294 setOperationAction(ISD::LOAD, Ty, Legal); 295 setOperationAction(ISD::STORE, Ty, Legal); 296 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); 297 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 298 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 299 300 setOperationAction(ISD::ADD, Ty, Legal); 301 setOperationAction(ISD::AND, Ty, Legal); 302 setOperationAction(ISD::CTLZ, Ty, Legal); 303 setOperationAction(ISD::CTPOP, Ty, Legal); 304 setOperationAction(ISD::MUL, Ty, Legal); 305 setOperationAction(ISD::OR, Ty, Legal); 306 setOperationAction(ISD::SDIV, Ty, Legal); 307 setOperationAction(ISD::SREM, Ty, Legal); 308 setOperationAction(ISD::SHL, Ty, Legal); 309 setOperationAction(ISD::SRA, Ty, Legal); 310 setOperationAction(ISD::SRL, Ty, Legal); 311 setOperationAction(ISD::SUB, Ty, Legal); 312 setOperationAction(ISD::UDIV, Ty, Legal); 313 setOperationAction(ISD::UREM, Ty, Legal); 314 setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); 315 setOperationAction(ISD::VSELECT, Ty, Legal); 316 setOperationAction(ISD::XOR, Ty, Legal); 317 318 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { 319 setOperationAction(ISD::FP_TO_SINT, Ty, Legal); 320 setOperationAction(ISD::FP_TO_UINT, Ty, Legal); 321 setOperationAction(ISD::SINT_TO_FP, Ty, Legal); 322 setOperationAction(ISD::UINT_TO_FP, Ty, Legal); 323 } 324 325 setOperationAction(ISD::SETCC, Ty, Legal); 326 setCondCodeAction(ISD::SETNE, Ty, Expand); 327 setCondCodeAction(ISD::SETGE, Ty, Expand); 328 setCondCodeAction(ISD::SETGT, Ty, Expand); 329 setCondCodeAction(ISD::SETUGE, Ty, Expand); 330 setCondCodeAction(ISD::SETUGT, Ty, Expand); 331 } 332 333 // Enable MSA support for the given floating-point type and Register class. 334 void MipsSETargetLowering:: 335 addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 336 addRegisterClass(Ty, RC); 337 338 // Expand all builtin opcodes. 339 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 340 setOperationAction(Opc, Ty, Expand); 341 342 setOperationAction(ISD::LOAD, Ty, Legal); 343 setOperationAction(ISD::STORE, Ty, Legal); 344 setOperationAction(ISD::BITCAST, Ty, Legal); 345 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); 346 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 347 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 348 349 if (Ty != MVT::v8f16) { 350 setOperationAction(ISD::FABS, Ty, Legal); 351 setOperationAction(ISD::FADD, Ty, Legal); 352 setOperationAction(ISD::FDIV, Ty, Legal); 353 setOperationAction(ISD::FEXP2, Ty, Legal); 354 setOperationAction(ISD::FLOG2, Ty, Legal); 355 setOperationAction(ISD::FMA, Ty, Legal); 356 setOperationAction(ISD::FMUL, Ty, Legal); 357 setOperationAction(ISD::FRINT, Ty, Legal); 358 setOperationAction(ISD::FSQRT, Ty, Legal); 359 setOperationAction(ISD::FSUB, Ty, Legal); 360 setOperationAction(ISD::VSELECT, Ty, Legal); 361 362 setOperationAction(ISD::SETCC, Ty, Legal); 363 setCondCodeAction(ISD::SETOGE, Ty, Expand); 364 setCondCodeAction(ISD::SETOGT, Ty, Expand); 365 setCondCodeAction(ISD::SETUGE, Ty, Expand); 366 setCondCodeAction(ISD::SETUGT, Ty, Expand); 367 setCondCodeAction(ISD::SETGE, Ty, Expand); 368 setCondCodeAction(ISD::SETGT, Ty, Expand); 369 } 370 } 371 372 bool 373 MipsSETargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 374 unsigned, 375 unsigned, 376 bool *Fast) const { 377 MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; 378 379 if (Subtarget.systemSupportsUnalignedAccess()) { 380 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's 381 // implementation defined whether this is handled by hardware, software, or 382 // a hybrid of the two but it's expected that most implementations will 383 // handle the majority of cases in hardware. 384 if (Fast) 385 *Fast = true; 386 return true; 387 } 388 389 switch (SVT) { 390 case MVT::i64: 391 case MVT::i32: 392 if (Fast) 393 *Fast = true; 394 return true; 395 default: 396 return false; 397 } 398 } 399 400 SDValue MipsSETargetLowering::LowerOperation(SDValue Op, 401 SelectionDAG &DAG) const { 402 switch(Op.getOpcode()) { 403 case ISD::LOAD: return lowerLOAD(Op, DAG); 404 case ISD::STORE: return lowerSTORE(Op, DAG); 405 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); 406 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); 407 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); 408 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG); 409 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG); 410 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG); 411 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, 412 DAG); 413 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); 414 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); 415 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); 416 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); 417 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); 418 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); 419 } 420 421 return MipsTargetLowering::LowerOperation(Op, DAG); 422 } 423 424 // selectMADD - 425 // Transforms a subgraph in CurDAG if the following pattern is found: 426 // (addc multLo, Lo0), (adde multHi, Hi0), 427 // where, 428 // multHi/Lo: product of multiplication 429 // Lo0: initial value of Lo register 430 // Hi0: initial value of Hi register 431 // Return true if pattern matching was successful. 432 static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { 433 // ADDENode's second operand must be a flag output of an ADDC node in order 434 // for the matching to be successful. 435 SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); 436 437 if (ADDCNode->getOpcode() != ISD::ADDC) 438 return false; 439 440 SDValue MultHi = ADDENode->getOperand(0); 441 SDValue MultLo = ADDCNode->getOperand(0); 442 SDNode *MultNode = MultHi.getNode(); 443 unsigned MultOpc = MultHi.getOpcode(); 444 445 // MultHi and MultLo must be generated by the same node, 446 if (MultLo.getNode() != MultNode) 447 return false; 448 449 // and it must be a multiplication. 450 if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) 451 return false; 452 453 // MultLo amd MultHi must be the first and second output of MultNode 454 // respectively. 455 if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) 456 return false; 457 458 // Transform this to a MADD only if ADDENode and ADDCNode are the only users 459 // of the values of MultNode, in which case MultNode will be removed in later 460 // phases. 461 // If there exist users other than ADDENode or ADDCNode, this function returns 462 // here, which will result in MultNode being mapped to a single MULT 463 // instruction node rather than a pair of MULT and MADD instructions being 464 // produced. 465 if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) 466 return false; 467 468 SDLoc DL(ADDENode); 469 470 // Initialize accumulator. 471 SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, 472 ADDCNode->getOperand(1), 473 ADDENode->getOperand(1)); 474 475 // create MipsMAdd(u) node 476 MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; 477 478 SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped, 479 MultNode->getOperand(0),// Factor 0 480 MultNode->getOperand(1),// Factor 1 481 ACCIn); 482 483 // replace uses of adde and addc here 484 if (!SDValue(ADDCNode, 0).use_empty()) { 485 SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); 486 CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut); 487 } 488 if (!SDValue(ADDENode, 0).use_empty()) { 489 SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); 490 CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut); 491 } 492 493 return true; 494 } 495 496 // selectMSUB - 497 // Transforms a subgraph in CurDAG if the following pattern is found: 498 // (addc Lo0, multLo), (sube Hi0, multHi), 499 // where, 500 // multHi/Lo: product of multiplication 501 // Lo0: initial value of Lo register 502 // Hi0: initial value of Hi register 503 // Return true if pattern matching was successful. 504 static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { 505 // SUBENode's second operand must be a flag output of an SUBC node in order 506 // for the matching to be successful. 507 SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); 508 509 if (SUBCNode->getOpcode() != ISD::SUBC) 510 return false; 511 512 SDValue MultHi = SUBENode->getOperand(1); 513 SDValue MultLo = SUBCNode->getOperand(1); 514 SDNode *MultNode = MultHi.getNode(); 515 unsigned MultOpc = MultHi.getOpcode(); 516 517 // MultHi and MultLo must be generated by the same node, 518 if (MultLo.getNode() != MultNode) 519 return false; 520 521 // and it must be a multiplication. 522 if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) 523 return false; 524 525 // MultLo amd MultHi must be the first and second output of MultNode 526 // respectively. 527 if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) 528 return false; 529 530 // Transform this to a MSUB only if SUBENode and SUBCNode are the only users 531 // of the values of MultNode, in which case MultNode will be removed in later 532 // phases. 533 // If there exist users other than SUBENode or SUBCNode, this function returns 534 // here, which will result in MultNode being mapped to a single MULT 535 // instruction node rather than a pair of MULT and MSUB instructions being 536 // produced. 537 if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) 538 return false; 539 540 SDLoc DL(SUBENode); 541 542 // Initialize accumulator. 543 SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, 544 SUBCNode->getOperand(0), 545 SUBENode->getOperand(0)); 546 547 // create MipsSub(u) node 548 MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; 549 550 SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, 551 MultNode->getOperand(0),// Factor 0 552 MultNode->getOperand(1),// Factor 1 553 ACCIn); 554 555 // replace uses of sube and subc here 556 if (!SDValue(SUBCNode, 0).use_empty()) { 557 SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub); 558 CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut); 559 } 560 if (!SDValue(SUBENode, 0).use_empty()) { 561 SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub); 562 CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut); 563 } 564 565 return true; 566 } 567 568 static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, 569 TargetLowering::DAGCombinerInfo &DCI, 570 const MipsSubtarget &Subtarget) { 571 if (DCI.isBeforeLegalize()) 572 return SDValue(); 573 574 if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && 575 N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG)) 576 return SDValue(N, 0); 577 578 return SDValue(); 579 } 580 581 // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT 582 // 583 // Performs the following transformations: 584 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its 585 // sign/zero-extension is completely overwritten by the new one performed by 586 // the ISD::AND. 587 // - Removes redundant zero extensions performed by an ISD::AND. 588 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 589 TargetLowering::DAGCombinerInfo &DCI, 590 const MipsSubtarget &Subtarget) { 591 if (!Subtarget.hasMSA()) 592 return SDValue(); 593 594 SDValue Op0 = N->getOperand(0); 595 SDValue Op1 = N->getOperand(1); 596 unsigned Op0Opcode = Op0->getOpcode(); 597 598 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) 599 // where $d + 1 == 2^n and n == 32 600 // or $d + 1 == 2^n and n <= 32 and ZExt 601 // -> (MipsVExtractZExt $a, $b, $c) 602 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || 603 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { 604 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1); 605 606 if (!Mask) 607 return SDValue(); 608 609 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); 610 611 if (Log2IfPositive <= 0) 612 return SDValue(); // Mask+1 is not a power of 2 613 614 SDValue Op0Op2 = Op0->getOperand(2); 615 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT(); 616 unsigned ExtendTySize = ExtendTy.getSizeInBits(); 617 unsigned Log2 = Log2IfPositive; 618 619 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || 620 Log2 == ExtendTySize) { 621 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; 622 return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0), 623 Op0->getVTList(), 624 makeArrayRef(Ops, Op0->getNumOperands())); 625 } 626 } 627 628 return SDValue(); 629 } 630 631 // Determine if the specified node is a constant vector splat. 632 // 633 // Returns true and sets Imm if: 634 // * N is a ISD::BUILD_VECTOR representing a constant splat 635 // 636 // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The 637 // differences are that it assumes the MSA has already been checked and the 638 // arbitrary requirement for a maximum of 32-bit integers isn't applied (and 639 // must not be in order for binsri.d to be selectable). 640 static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { 641 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode()); 642 643 if (!Node) 644 return false; 645 646 APInt SplatValue, SplatUndef; 647 unsigned SplatBitSize; 648 bool HasAnyUndefs; 649 650 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 651 8, !IsLittleEndian)) 652 return false; 653 654 Imm = SplatValue; 655 656 return true; 657 } 658 659 // Test whether the given node is an all-ones build_vector. 660 static bool isVectorAllOnes(SDValue N) { 661 // Look through bitcasts. Endianness doesn't matter because we are looking 662 // for an all-ones value. 663 if (N->getOpcode() == ISD::BITCAST) 664 N = N->getOperand(0); 665 666 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); 667 668 if (!BVN) 669 return false; 670 671 APInt SplatValue, SplatUndef; 672 unsigned SplatBitSize; 673 bool HasAnyUndefs; 674 675 // Endianness doesn't matter in this context because we are looking for 676 // an all-ones value. 677 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) 678 return SplatValue.isAllOnesValue(); 679 680 return false; 681 } 682 683 // Test whether N is the bitwise inverse of OfNode. 684 static bool isBitwiseInverse(SDValue N, SDValue OfNode) { 685 if (N->getOpcode() != ISD::XOR) 686 return false; 687 688 if (isVectorAllOnes(N->getOperand(0))) 689 return N->getOperand(1) == OfNode; 690 691 if (isVectorAllOnes(N->getOperand(1))) 692 return N->getOperand(0) == OfNode; 693 694 return false; 695 } 696 697 // Perform combines where ISD::OR is the root node. 698 // 699 // Performs the following transformations: 700 // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) 701 // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit 702 // vector type. 703 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 704 TargetLowering::DAGCombinerInfo &DCI, 705 const MipsSubtarget &Subtarget) { 706 if (!Subtarget.hasMSA()) 707 return SDValue(); 708 709 EVT Ty = N->getValueType(0); 710 711 if (!Ty.is128BitVector()) 712 return SDValue(); 713 714 SDValue Op0 = N->getOperand(0); 715 SDValue Op1 = N->getOperand(1); 716 717 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { 718 SDValue Op0Op0 = Op0->getOperand(0); 719 SDValue Op0Op1 = Op0->getOperand(1); 720 SDValue Op1Op0 = Op1->getOperand(0); 721 SDValue Op1Op1 = Op1->getOperand(1); 722 bool IsLittleEndian = !Subtarget.isLittle(); 723 724 SDValue IfSet, IfClr, Cond; 725 bool IsConstantMask = false; 726 APInt Mask, InvMask; 727 728 // If Op0Op0 is an appropriate mask, try to find it's inverse in either 729 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while 730 // looking. 731 // IfClr will be set if we find a valid match. 732 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) { 733 Cond = Op0Op0; 734 IfSet = Op0Op1; 735 736 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 737 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 738 IfClr = Op1Op1; 739 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 740 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 741 IfClr = Op1Op0; 742 743 IsConstantMask = true; 744 } 745 746 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same 747 // thing again using this mask. 748 // IfClr will be set if we find a valid match. 749 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) { 750 Cond = Op0Op1; 751 IfSet = Op0Op0; 752 753 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 754 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 755 IfClr = Op1Op1; 756 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 757 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 758 IfClr = Op1Op0; 759 760 IsConstantMask = true; 761 } 762 763 // If IfClr is not yet set, try looking for a non-constant match. 764 // IfClr will be set if we find a valid match amongst the eight 765 // possibilities. 766 if (!IfClr.getNode()) { 767 if (isBitwiseInverse(Op0Op0, Op1Op0)) { 768 Cond = Op1Op0; 769 IfSet = Op1Op1; 770 IfClr = Op0Op1; 771 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { 772 Cond = Op1Op0; 773 IfSet = Op1Op1; 774 IfClr = Op0Op0; 775 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { 776 Cond = Op1Op1; 777 IfSet = Op1Op0; 778 IfClr = Op0Op1; 779 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { 780 Cond = Op1Op1; 781 IfSet = Op1Op0; 782 IfClr = Op0Op0; 783 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { 784 Cond = Op0Op0; 785 IfSet = Op0Op1; 786 IfClr = Op1Op1; 787 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { 788 Cond = Op0Op0; 789 IfSet = Op0Op1; 790 IfClr = Op1Op0; 791 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { 792 Cond = Op0Op1; 793 IfSet = Op0Op0; 794 IfClr = Op1Op1; 795 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { 796 Cond = Op0Op1; 797 IfSet = Op0Op0; 798 IfClr = Op1Op0; 799 } 800 } 801 802 // At this point, IfClr will be set if we have a valid match. 803 if (!IfClr.getNode()) 804 return SDValue(); 805 806 assert(Cond.getNode() && IfSet.getNode()); 807 808 // Fold degenerate cases. 809 if (IsConstantMask) { 810 if (Mask.isAllOnesValue()) 811 return IfSet; 812 else if (Mask == 0) 813 return IfClr; 814 } 815 816 // Transform the DAG into an equivalent VSELECT. 817 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr); 818 } 819 820 return SDValue(); 821 } 822 823 static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, 824 TargetLowering::DAGCombinerInfo &DCI, 825 const MipsSubtarget &Subtarget) { 826 if (DCI.isBeforeLegalize()) 827 return SDValue(); 828 829 if (Subtarget.hasMips32() && N->getValueType(0) == MVT::i32 && 830 selectMSUB(N, &DAG)) 831 return SDValue(N, 0); 832 833 return SDValue(); 834 } 835 836 static SDValue genConstMult(SDValue X, uint64_t C, const SDLoc &DL, EVT VT, 837 EVT ShiftTy, SelectionDAG &DAG) { 838 // Clear the upper (64 - VT.sizeInBits) bits. 839 C &= ((uint64_t)-1) >> (64 - VT.getSizeInBits()); 840 841 // Return 0. 842 if (C == 0) 843 return DAG.getConstant(0, DL, VT); 844 845 // Return x. 846 if (C == 1) 847 return X; 848 849 // If c is power of 2, return (shl x, log2(c)). 850 if (isPowerOf2_64(C)) 851 return DAG.getNode(ISD::SHL, DL, VT, X, 852 DAG.getConstant(Log2_64(C), DL, ShiftTy)); 853 854 unsigned Log2Ceil = Log2_64_Ceil(C); 855 uint64_t Floor = 1LL << Log2_64(C); 856 uint64_t Ceil = Log2Ceil == 64 ? 0LL : 1LL << Log2Ceil; 857 858 // If |c - floor_c| <= |c - ceil_c|, 859 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), 860 // return (add constMult(x, floor_c), constMult(x, c - floor_c)). 861 if (C - Floor <= Ceil - C) { 862 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); 863 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); 864 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); 865 } 866 867 // If |c - floor_c| > |c - ceil_c|, 868 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). 869 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); 870 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); 871 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); 872 } 873 874 static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, 875 const TargetLowering::DAGCombinerInfo &DCI, 876 const MipsSETargetLowering *TL) { 877 EVT VT = N->getValueType(0); 878 879 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 880 if (!VT.isVector()) 881 return genConstMult(N->getOperand(0), C->getZExtValue(), SDLoc(N), VT, 882 TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), 883 DAG); 884 885 return SDValue(N, 0); 886 } 887 888 static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, 889 SelectionDAG &DAG, 890 const MipsSubtarget &Subtarget) { 891 // See if this is a vector splat immediate node. 892 APInt SplatValue, SplatUndef; 893 unsigned SplatBitSize; 894 bool HasAnyUndefs; 895 unsigned EltSize = Ty.getScalarSizeInBits(); 896 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 897 898 if (!Subtarget.hasDSP()) 899 return SDValue(); 900 901 if (!BV || 902 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 903 EltSize, !Subtarget.isLittle()) || 904 (SplatBitSize != EltSize) || 905 (SplatValue.getZExtValue() >= EltSize)) 906 return SDValue(); 907 908 SDLoc DL(N); 909 return DAG.getNode(Opc, DL, Ty, N->getOperand(0), 910 DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32)); 911 } 912 913 static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, 914 TargetLowering::DAGCombinerInfo &DCI, 915 const MipsSubtarget &Subtarget) { 916 EVT Ty = N->getValueType(0); 917 918 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 919 return SDValue(); 920 921 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); 922 } 923 924 // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold 925 // constant splats into MipsISD::SHRA_DSP for DSPr2. 926 // 927 // Performs the following transformations: 928 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its 929 // sign/zero-extension is completely overwritten by the new one performed by 930 // the ISD::SRA and ISD::SHL nodes. 931 // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL 932 // sequence. 933 // 934 // See performDSPShiftCombine for more information about the transformation 935 // used for DSPr2. 936 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 937 TargetLowering::DAGCombinerInfo &DCI, 938 const MipsSubtarget &Subtarget) { 939 EVT Ty = N->getValueType(0); 940 941 if (Subtarget.hasMSA()) { 942 SDValue Op0 = N->getOperand(0); 943 SDValue Op1 = N->getOperand(1); 944 945 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) 946 // where $d + sizeof($c) == 32 947 // or $d + sizeof($c) <= 32 and SExt 948 // -> (MipsVExtractSExt $a, $b, $c) 949 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { 950 SDValue Op0Op0 = Op0->getOperand(0); 951 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1); 952 953 if (!ShAmount) 954 return SDValue(); 955 956 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && 957 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) 958 return SDValue(); 959 960 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT(); 961 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); 962 963 if (TotalBits == 32 || 964 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && 965 TotalBits <= 32)) { 966 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), 967 Op0Op0->getOperand(2) }; 968 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0), 969 Op0Op0->getVTList(), 970 makeArrayRef(Ops, Op0Op0->getNumOperands())); 971 } 972 } 973 } 974 975 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2())) 976 return SDValue(); 977 978 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); 979 } 980 981 982 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 983 TargetLowering::DAGCombinerInfo &DCI, 984 const MipsSubtarget &Subtarget) { 985 EVT Ty = N->getValueType(0); 986 987 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8)) 988 return SDValue(); 989 990 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); 991 } 992 993 static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { 994 bool IsV216 = (Ty == MVT::v2i16); 995 996 switch (CC) { 997 case ISD::SETEQ: 998 case ISD::SETNE: return true; 999 case ISD::SETLT: 1000 case ISD::SETLE: 1001 case ISD::SETGT: 1002 case ISD::SETGE: return IsV216; 1003 case ISD::SETULT: 1004 case ISD::SETULE: 1005 case ISD::SETUGT: 1006 case ISD::SETUGE: return !IsV216; 1007 default: return false; 1008 } 1009 } 1010 1011 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { 1012 EVT Ty = N->getValueType(0); 1013 1014 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 1015 return SDValue(); 1016 1017 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get())) 1018 return SDValue(); 1019 1020 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0), 1021 N->getOperand(1), N->getOperand(2)); 1022 } 1023 1024 static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { 1025 EVT Ty = N->getValueType(0); 1026 1027 if (Ty.is128BitVector() && Ty.isInteger()) { 1028 // Try the following combines: 1029 // (vselect (setcc $a, $b, SETLT), $b, $a)) -> (vsmax $a, $b) 1030 // (vselect (setcc $a, $b, SETLE), $b, $a)) -> (vsmax $a, $b) 1031 // (vselect (setcc $a, $b, SETLT), $a, $b)) -> (vsmin $a, $b) 1032 // (vselect (setcc $a, $b, SETLE), $a, $b)) -> (vsmin $a, $b) 1033 // (vselect (setcc $a, $b, SETULT), $b, $a)) -> (vumax $a, $b) 1034 // (vselect (setcc $a, $b, SETULE), $b, $a)) -> (vumax $a, $b) 1035 // (vselect (setcc $a, $b, SETULT), $a, $b)) -> (vumin $a, $b) 1036 // (vselect (setcc $a, $b, SETULE), $a, $b)) -> (vumin $a, $b) 1037 // SETGT/SETGE/SETUGT/SETUGE variants of these will show up initially but 1038 // will be expanded to equivalent SETLT/SETLE/SETULT/SETULE versions by the 1039 // legalizer. 1040 SDValue Op0 = N->getOperand(0); 1041 1042 if (Op0->getOpcode() != ISD::SETCC) 1043 return SDValue(); 1044 1045 ISD::CondCode CondCode = cast<CondCodeSDNode>(Op0->getOperand(2))->get(); 1046 bool Signed; 1047 1048 if (CondCode == ISD::SETLT || CondCode == ISD::SETLE) 1049 Signed = true; 1050 else if (CondCode == ISD::SETULT || CondCode == ISD::SETULE) 1051 Signed = false; 1052 else 1053 return SDValue(); 1054 1055 SDValue Op1 = N->getOperand(1); 1056 SDValue Op2 = N->getOperand(2); 1057 SDValue Op0Op0 = Op0->getOperand(0); 1058 SDValue Op0Op1 = Op0->getOperand(1); 1059 1060 if (Op1 == Op0Op0 && Op2 == Op0Op1) 1061 return DAG.getNode(Signed ? MipsISD::VSMIN : MipsISD::VUMIN, SDLoc(N), 1062 Ty, Op1, Op2); 1063 else if (Op1 == Op0Op1 && Op2 == Op0Op0) 1064 return DAG.getNode(Signed ? MipsISD::VSMAX : MipsISD::VUMAX, SDLoc(N), 1065 Ty, Op1, Op2); 1066 } else if ((Ty == MVT::v2i16) || (Ty == MVT::v4i8)) { 1067 SDValue SetCC = N->getOperand(0); 1068 1069 if (SetCC.getOpcode() != MipsISD::SETCC_DSP) 1070 return SDValue(); 1071 1072 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, 1073 SetCC.getOperand(0), SetCC.getOperand(1), 1074 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); 1075 } 1076 1077 return SDValue(); 1078 } 1079 1080 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 1081 const MipsSubtarget &Subtarget) { 1082 EVT Ty = N->getValueType(0); 1083 1084 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { 1085 // Try the following combines: 1086 // (xor (or $a, $b), (build_vector allones)) 1087 // (xor (or $a, $b), (bitcast (build_vector allones))) 1088 SDValue Op0 = N->getOperand(0); 1089 SDValue Op1 = N->getOperand(1); 1090 SDValue NotOp; 1091 1092 if (ISD::isBuildVectorAllOnes(Op0.getNode())) 1093 NotOp = Op1; 1094 else if (ISD::isBuildVectorAllOnes(Op1.getNode())) 1095 NotOp = Op0; 1096 else 1097 return SDValue(); 1098 1099 if (NotOp->getOpcode() == ISD::OR) 1100 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), 1101 NotOp->getOperand(1)); 1102 } 1103 1104 return SDValue(); 1105 } 1106 1107 SDValue 1108 MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 1109 SelectionDAG &DAG = DCI.DAG; 1110 SDValue Val; 1111 1112 switch (N->getOpcode()) { 1113 case ISD::ADDE: 1114 return performADDECombine(N, DAG, DCI, Subtarget); 1115 case ISD::AND: 1116 Val = performANDCombine(N, DAG, DCI, Subtarget); 1117 break; 1118 case ISD::OR: 1119 Val = performORCombine(N, DAG, DCI, Subtarget); 1120 break; 1121 case ISD::SUBE: 1122 return performSUBECombine(N, DAG, DCI, Subtarget); 1123 case ISD::MUL: 1124 return performMULCombine(N, DAG, DCI, this); 1125 case ISD::SHL: 1126 Val = performSHLCombine(N, DAG, DCI, Subtarget); 1127 break; 1128 case ISD::SRA: 1129 return performSRACombine(N, DAG, DCI, Subtarget); 1130 case ISD::SRL: 1131 return performSRLCombine(N, DAG, DCI, Subtarget); 1132 case ISD::VSELECT: 1133 return performVSELECTCombine(N, DAG); 1134 case ISD::XOR: 1135 Val = performXORCombine(N, DAG, Subtarget); 1136 break; 1137 case ISD::SETCC: 1138 Val = performSETCCCombine(N, DAG); 1139 break; 1140 } 1141 1142 if (Val.getNode()) { 1143 DEBUG(dbgs() << "\nMipsSE DAG Combine:\n"; 1144 N->printrWithDepth(dbgs(), &DAG); 1145 dbgs() << "\n=> \n"; 1146 Val.getNode()->printrWithDepth(dbgs(), &DAG); 1147 dbgs() << "\n"); 1148 return Val; 1149 } 1150 1151 return MipsTargetLowering::PerformDAGCombine(N, DCI); 1152 } 1153 1154 MachineBasicBlock * 1155 MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 1156 MachineBasicBlock *BB) const { 1157 switch (MI.getOpcode()) { 1158 default: 1159 return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); 1160 case Mips::BPOSGE32_PSEUDO: 1161 return emitBPOSGE32(MI, BB); 1162 case Mips::SNZ_B_PSEUDO: 1163 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B); 1164 case Mips::SNZ_H_PSEUDO: 1165 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H); 1166 case Mips::SNZ_W_PSEUDO: 1167 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W); 1168 case Mips::SNZ_D_PSEUDO: 1169 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D); 1170 case Mips::SNZ_V_PSEUDO: 1171 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V); 1172 case Mips::SZ_B_PSEUDO: 1173 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B); 1174 case Mips::SZ_H_PSEUDO: 1175 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H); 1176 case Mips::SZ_W_PSEUDO: 1177 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W); 1178 case Mips::SZ_D_PSEUDO: 1179 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D); 1180 case Mips::SZ_V_PSEUDO: 1181 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V); 1182 case Mips::COPY_FW_PSEUDO: 1183 return emitCOPY_FW(MI, BB); 1184 case Mips::COPY_FD_PSEUDO: 1185 return emitCOPY_FD(MI, BB); 1186 case Mips::INSERT_FW_PSEUDO: 1187 return emitINSERT_FW(MI, BB); 1188 case Mips::INSERT_FD_PSEUDO: 1189 return emitINSERT_FD(MI, BB); 1190 case Mips::INSERT_B_VIDX_PSEUDO: 1191 case Mips::INSERT_B_VIDX64_PSEUDO: 1192 return emitINSERT_DF_VIDX(MI, BB, 1, false); 1193 case Mips::INSERT_H_VIDX_PSEUDO: 1194 case Mips::INSERT_H_VIDX64_PSEUDO: 1195 return emitINSERT_DF_VIDX(MI, BB, 2, false); 1196 case Mips::INSERT_W_VIDX_PSEUDO: 1197 case Mips::INSERT_W_VIDX64_PSEUDO: 1198 return emitINSERT_DF_VIDX(MI, BB, 4, false); 1199 case Mips::INSERT_D_VIDX_PSEUDO: 1200 case Mips::INSERT_D_VIDX64_PSEUDO: 1201 return emitINSERT_DF_VIDX(MI, BB, 8, false); 1202 case Mips::INSERT_FW_VIDX_PSEUDO: 1203 case Mips::INSERT_FW_VIDX64_PSEUDO: 1204 return emitINSERT_DF_VIDX(MI, BB, 4, true); 1205 case Mips::INSERT_FD_VIDX_PSEUDO: 1206 case Mips::INSERT_FD_VIDX64_PSEUDO: 1207 return emitINSERT_DF_VIDX(MI, BB, 8, true); 1208 case Mips::FILL_FW_PSEUDO: 1209 return emitFILL_FW(MI, BB); 1210 case Mips::FILL_FD_PSEUDO: 1211 return emitFILL_FD(MI, BB); 1212 case Mips::FEXP2_W_1_PSEUDO: 1213 return emitFEXP2_W_1(MI, BB); 1214 case Mips::FEXP2_D_1_PSEUDO: 1215 return emitFEXP2_D_1(MI, BB); 1216 case Mips::ST_F16: 1217 return emitST_F16_PSEUDO(MI, BB); 1218 case Mips::LD_F16: 1219 return emitLD_F16_PSEUDO(MI, BB); 1220 case Mips::MSA_FP_EXTEND_W_PSEUDO: 1221 return emitFPEXTEND_PSEUDO(MI, BB, false); 1222 case Mips::MSA_FP_ROUND_W_PSEUDO: 1223 return emitFPROUND_PSEUDO(MI, BB, false); 1224 case Mips::MSA_FP_EXTEND_D_PSEUDO: 1225 return emitFPEXTEND_PSEUDO(MI, BB, true); 1226 case Mips::MSA_FP_ROUND_D_PSEUDO: 1227 return emitFPROUND_PSEUDO(MI, BB, true); 1228 } 1229 } 1230 1231 bool MipsSETargetLowering::isEligibleForTailCallOptimization( 1232 const CCState &CCInfo, unsigned NextStackOffset, 1233 const MipsFunctionInfo &FI) const { 1234 if (!UseMipsTailCalls) 1235 return false; 1236 1237 // Exception has to be cleared with eret. 1238 if (FI.isISR()) 1239 return false; 1240 1241 // Return false if either the callee or caller has a byval argument. 1242 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) 1243 return false; 1244 1245 // Return true if the callee's argument area is no larger than the 1246 // caller's. 1247 return NextStackOffset <= FI.getIncomingArgSize(); 1248 } 1249 1250 void MipsSETargetLowering:: 1251 getOpndList(SmallVectorImpl<SDValue> &Ops, 1252 std::deque< std::pair<unsigned, SDValue> > &RegsToPass, 1253 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, 1254 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, 1255 SDValue Chain) const { 1256 Ops.push_back(Callee); 1257 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, 1258 InternalLinkage, IsCallReloc, CLI, Callee, 1259 Chain); 1260 } 1261 1262 SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1263 LoadSDNode &Nd = *cast<LoadSDNode>(Op); 1264 1265 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1266 return MipsTargetLowering::lowerLOAD(Op, DAG); 1267 1268 // Replace a double precision load with two i32 loads and a buildpair64. 1269 SDLoc DL(Op); 1270 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1271 EVT PtrVT = Ptr.getValueType(); 1272 1273 // i32 load from lower address. 1274 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(), 1275 Nd.getAlignment(), Nd.getMemOperand()->getFlags()); 1276 1277 // i32 load from higher address. 1278 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1279 SDValue Hi = DAG.getLoad( 1280 MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(), 1281 std::min(Nd.getAlignment(), 4U), Nd.getMemOperand()->getFlags()); 1282 1283 if (!Subtarget.isLittle()) 1284 std::swap(Lo, Hi); 1285 1286 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1287 SDValue Ops[2] = {BP, Hi.getValue(1)}; 1288 return DAG.getMergeValues(Ops, DL); 1289 } 1290 1291 SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1292 StoreSDNode &Nd = *cast<StoreSDNode>(Op); 1293 1294 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1295 return MipsTargetLowering::lowerSTORE(Op, DAG); 1296 1297 // Replace a double precision store with two extractelement64s and i32 stores. 1298 SDLoc DL(Op); 1299 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1300 EVT PtrVT = Ptr.getValueType(); 1301 SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1302 Val, DAG.getConstant(0, DL, MVT::i32)); 1303 SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1304 Val, DAG.getConstant(1, DL, MVT::i32)); 1305 1306 if (!Subtarget.isLittle()) 1307 std::swap(Lo, Hi); 1308 1309 // i32 store to lower address. 1310 Chain = 1311 DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlignment(), 1312 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1313 1314 // i32 store to higher address. 1315 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1316 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), 1317 std::min(Nd.getAlignment(), 4U), 1318 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1319 } 1320 1321 SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, 1322 bool HasLo, bool HasHi, 1323 SelectionDAG &DAG) const { 1324 // MIPS32r6/MIPS64r6 removed accumulator based multiplies. 1325 assert(!Subtarget.hasMips32r6()); 1326 1327 EVT Ty = Op.getOperand(0).getValueType(); 1328 SDLoc DL(Op); 1329 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, 1330 Op.getOperand(0), Op.getOperand(1)); 1331 SDValue Lo, Hi; 1332 1333 if (HasLo) 1334 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult); 1335 if (HasHi) 1336 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult); 1337 1338 if (!HasLo || !HasHi) 1339 return HasLo ? Lo : Hi; 1340 1341 SDValue Vals[] = { Lo, Hi }; 1342 return DAG.getMergeValues(Vals, DL); 1343 } 1344 1345 static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) { 1346 SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1347 DAG.getConstant(0, DL, MVT::i32)); 1348 SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1349 DAG.getConstant(1, DL, MVT::i32)); 1350 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi); 1351 } 1352 1353 static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) { 1354 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op); 1355 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op); 1356 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1357 } 1358 1359 // This function expands mips intrinsic nodes which have 64-bit input operands 1360 // or output values. 1361 // 1362 // out64 = intrinsic-node in64 1363 // => 1364 // lo = copy (extract-element (in64, 0)) 1365 // hi = copy (extract-element (in64, 1)) 1366 // mips-specific-node 1367 // v0 = copy lo 1368 // v1 = copy hi 1369 // out64 = merge-values (v0, v1) 1370 // 1371 static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1372 SDLoc DL(Op); 1373 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; 1374 SmallVector<SDValue, 3> Ops; 1375 unsigned OpNo = 0; 1376 1377 // See if Op has a chain input. 1378 if (HasChainIn) 1379 Ops.push_back(Op->getOperand(OpNo++)); 1380 1381 // The next operand is the intrinsic opcode. 1382 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); 1383 1384 // See if the next operand has type i64. 1385 SDValue Opnd = Op->getOperand(++OpNo), In64; 1386 1387 if (Opnd.getValueType() == MVT::i64) 1388 In64 = initAccumulator(Opnd, DL, DAG); 1389 else 1390 Ops.push_back(Opnd); 1391 1392 // Push the remaining operands. 1393 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) 1394 Ops.push_back(Op->getOperand(OpNo)); 1395 1396 // Add In64 to the end of the list. 1397 if (In64.getNode()) 1398 Ops.push_back(In64); 1399 1400 // Scan output. 1401 SmallVector<EVT, 2> ResTys; 1402 1403 for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end(); 1404 I != E; ++I) 1405 ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I); 1406 1407 // Create node. 1408 SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops); 1409 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; 1410 1411 if (!HasChainIn) 1412 return Out; 1413 1414 assert(Val->getValueType(1) == MVT::Other); 1415 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; 1416 return DAG.getMergeValues(Vals, DL); 1417 } 1418 1419 // Lower an MSA copy intrinsic into the specified SelectionDAG node 1420 static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1421 SDLoc DL(Op); 1422 SDValue Vec = Op->getOperand(1); 1423 SDValue Idx = Op->getOperand(2); 1424 EVT ResTy = Op->getValueType(0); 1425 EVT EltTy = Vec->getValueType(0).getVectorElementType(); 1426 1427 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx, 1428 DAG.getValueType(EltTy)); 1429 1430 return Result; 1431 } 1432 1433 static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { 1434 EVT ResVecTy = Op->getValueType(0); 1435 EVT ViaVecTy = ResVecTy; 1436 SDLoc DL(Op); 1437 1438 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and 1439 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating 1440 // lanes. 1441 SDValue LaneA; 1442 SDValue LaneB = Op->getOperand(2); 1443 1444 if (ResVecTy == MVT::v2i64) { 1445 LaneA = DAG.getConstant(0, DL, MVT::i32); 1446 ViaVecTy = MVT::v4i32; 1447 } else 1448 LaneA = LaneB; 1449 1450 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, 1451 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; 1452 1453 SDValue Result = DAG.getBuildVector( 1454 ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1455 1456 if (ViaVecTy != ResVecTy) 1457 Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, Result); 1458 1459 return Result; 1460 } 1461 1462 static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, 1463 bool IsSigned = false) { 1464 return DAG.getConstant( 1465 APInt(Op->getValueType(0).getScalarType().getSizeInBits(), 1466 Op->getConstantOperandVal(ImmOp), IsSigned), 1467 SDLoc(Op), Op->getValueType(0)); 1468 } 1469 1470 static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, 1471 bool BigEndian, SelectionDAG &DAG) { 1472 EVT ViaVecTy = VecTy; 1473 SDValue SplatValueA = SplatValue; 1474 SDValue SplatValueB = SplatValue; 1475 SDLoc DL(SplatValue); 1476 1477 if (VecTy == MVT::v2i64) { 1478 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. 1479 ViaVecTy = MVT::v4i32; 1480 1481 SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); 1482 SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, 1483 DAG.getConstant(32, DL, MVT::i32)); 1484 SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); 1485 } 1486 1487 // We currently hold the parts in little endian order. Swap them if 1488 // necessary. 1489 if (BigEndian) 1490 std::swap(SplatValueA, SplatValueB); 1491 1492 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1493 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1494 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1495 SplatValueA, SplatValueB, SplatValueA, SplatValueB }; 1496 1497 SDValue Result = DAG.getBuildVector( 1498 ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1499 1500 if (VecTy != ViaVecTy) 1501 Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); 1502 1503 return Result; 1504 } 1505 1506 static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, 1507 unsigned Opc, SDValue Imm, 1508 bool BigEndian) { 1509 EVT VecTy = Op->getValueType(0); 1510 SDValue Exp2Imm; 1511 SDLoc DL(Op); 1512 1513 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it 1514 // here for now. 1515 if (VecTy == MVT::v2i64) { 1516 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) { 1517 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue(); 1518 1519 SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL, 1520 MVT::i32); 1521 SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32); 1522 1523 if (BigEndian) 1524 std::swap(BitImmLoOp, BitImmHiOp); 1525 1526 Exp2Imm = DAG.getNode( 1527 ISD::BITCAST, DL, MVT::v2i64, 1528 DAG.getBuildVector(MVT::v4i32, DL, 1529 {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp})); 1530 } 1531 } 1532 1533 if (!Exp2Imm.getNode()) { 1534 // We couldnt constant fold, do a vector shift instead 1535 1536 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since 1537 // only values 0-63 are valid. 1538 if (VecTy == MVT::v2i64) 1539 Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm); 1540 1541 Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG); 1542 1543 Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy), 1544 Exp2Imm); 1545 } 1546 1547 return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); 1548 } 1549 1550 static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { 1551 SDLoc DL(Op); 1552 EVT ResTy = Op->getValueType(0); 1553 SDValue Vec = Op->getOperand(2); 1554 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); 1555 MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32; 1556 SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, 1557 DL, ResEltTy); 1558 SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG); 1559 1560 return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec); 1561 } 1562 1563 static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { 1564 EVT ResTy = Op->getValueType(0); 1565 SDLoc DL(Op); 1566 SDValue One = DAG.getConstant(1, DL, ResTy); 1567 SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG)); 1568 1569 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), 1570 DAG.getNOT(DL, Bit, ResTy)); 1571 } 1572 1573 static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { 1574 SDLoc DL(Op); 1575 EVT ResTy = Op->getValueType(0); 1576 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) 1577 << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue(); 1578 SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy); 1579 1580 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask); 1581 } 1582 1583 SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 1584 SelectionDAG &DAG) const { 1585 SDLoc DL(Op); 1586 unsigned Intrinsic = cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue(); 1587 switch (Intrinsic) { 1588 default: 1589 return SDValue(); 1590 case Intrinsic::mips_shilo: 1591 return lowerDSPIntr(Op, DAG, MipsISD::SHILO); 1592 case Intrinsic::mips_dpau_h_qbl: 1593 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); 1594 case Intrinsic::mips_dpau_h_qbr: 1595 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); 1596 case Intrinsic::mips_dpsu_h_qbl: 1597 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); 1598 case Intrinsic::mips_dpsu_h_qbr: 1599 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); 1600 case Intrinsic::mips_dpa_w_ph: 1601 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); 1602 case Intrinsic::mips_dps_w_ph: 1603 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); 1604 case Intrinsic::mips_dpax_w_ph: 1605 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); 1606 case Intrinsic::mips_dpsx_w_ph: 1607 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); 1608 case Intrinsic::mips_mulsa_w_ph: 1609 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); 1610 case Intrinsic::mips_mult: 1611 return lowerDSPIntr(Op, DAG, MipsISD::Mult); 1612 case Intrinsic::mips_multu: 1613 return lowerDSPIntr(Op, DAG, MipsISD::Multu); 1614 case Intrinsic::mips_madd: 1615 return lowerDSPIntr(Op, DAG, MipsISD::MAdd); 1616 case Intrinsic::mips_maddu: 1617 return lowerDSPIntr(Op, DAG, MipsISD::MAddu); 1618 case Intrinsic::mips_msub: 1619 return lowerDSPIntr(Op, DAG, MipsISD::MSub); 1620 case Intrinsic::mips_msubu: 1621 return lowerDSPIntr(Op, DAG, MipsISD::MSubu); 1622 case Intrinsic::mips_addv_b: 1623 case Intrinsic::mips_addv_h: 1624 case Intrinsic::mips_addv_w: 1625 case Intrinsic::mips_addv_d: 1626 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1627 Op->getOperand(2)); 1628 case Intrinsic::mips_addvi_b: 1629 case Intrinsic::mips_addvi_h: 1630 case Intrinsic::mips_addvi_w: 1631 case Intrinsic::mips_addvi_d: 1632 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1633 lowerMSASplatImm(Op, 2, DAG)); 1634 case Intrinsic::mips_and_v: 1635 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1636 Op->getOperand(2)); 1637 case Intrinsic::mips_andi_b: 1638 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1639 lowerMSASplatImm(Op, 2, DAG)); 1640 case Intrinsic::mips_bclr_b: 1641 case Intrinsic::mips_bclr_h: 1642 case Intrinsic::mips_bclr_w: 1643 case Intrinsic::mips_bclr_d: 1644 return lowerMSABitClear(Op, DAG); 1645 case Intrinsic::mips_bclri_b: 1646 case Intrinsic::mips_bclri_h: 1647 case Intrinsic::mips_bclri_w: 1648 case Intrinsic::mips_bclri_d: 1649 return lowerMSABitClearImm(Op, DAG); 1650 case Intrinsic::mips_binsli_b: 1651 case Intrinsic::mips_binsli_h: 1652 case Intrinsic::mips_binsli_w: 1653 case Intrinsic::mips_binsli_d: { 1654 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear) 1655 EVT VecTy = Op->getValueType(0); 1656 EVT EltTy = VecTy.getVectorElementType(); 1657 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1658 report_fatal_error("Immediate out of range"); 1659 APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), 1660 Op->getConstantOperandVal(3) + 1); 1661 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1662 DAG.getConstant(Mask, DL, VecTy, true), 1663 Op->getOperand(2), Op->getOperand(1)); 1664 } 1665 case Intrinsic::mips_binsri_b: 1666 case Intrinsic::mips_binsri_h: 1667 case Intrinsic::mips_binsri_w: 1668 case Intrinsic::mips_binsri_d: { 1669 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear) 1670 EVT VecTy = Op->getValueType(0); 1671 EVT EltTy = VecTy.getVectorElementType(); 1672 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1673 report_fatal_error("Immediate out of range"); 1674 APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), 1675 Op->getConstantOperandVal(3) + 1); 1676 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1677 DAG.getConstant(Mask, DL, VecTy, true), 1678 Op->getOperand(2), Op->getOperand(1)); 1679 } 1680 case Intrinsic::mips_bmnz_v: 1681 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1682 Op->getOperand(2), Op->getOperand(1)); 1683 case Intrinsic::mips_bmnzi_b: 1684 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1685 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2), 1686 Op->getOperand(1)); 1687 case Intrinsic::mips_bmz_v: 1688 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1689 Op->getOperand(1), Op->getOperand(2)); 1690 case Intrinsic::mips_bmzi_b: 1691 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1692 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1), 1693 Op->getOperand(2)); 1694 case Intrinsic::mips_bneg_b: 1695 case Intrinsic::mips_bneg_h: 1696 case Intrinsic::mips_bneg_w: 1697 case Intrinsic::mips_bneg_d: { 1698 EVT VecTy = Op->getValueType(0); 1699 SDValue One = DAG.getConstant(1, DL, VecTy); 1700 1701 return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), 1702 DAG.getNode(ISD::SHL, DL, VecTy, One, 1703 truncateVecElts(Op, DAG))); 1704 } 1705 case Intrinsic::mips_bnegi_b: 1706 case Intrinsic::mips_bnegi_h: 1707 case Intrinsic::mips_bnegi_w: 1708 case Intrinsic::mips_bnegi_d: 1709 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2), 1710 !Subtarget.isLittle()); 1711 case Intrinsic::mips_bnz_b: 1712 case Intrinsic::mips_bnz_h: 1713 case Intrinsic::mips_bnz_w: 1714 case Intrinsic::mips_bnz_d: 1715 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0), 1716 Op->getOperand(1)); 1717 case Intrinsic::mips_bnz_v: 1718 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0), 1719 Op->getOperand(1)); 1720 case Intrinsic::mips_bsel_v: 1721 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1722 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1723 Op->getOperand(1), Op->getOperand(3), 1724 Op->getOperand(2)); 1725 case Intrinsic::mips_bseli_b: 1726 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1727 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1728 Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG), 1729 Op->getOperand(2)); 1730 case Intrinsic::mips_bset_b: 1731 case Intrinsic::mips_bset_h: 1732 case Intrinsic::mips_bset_w: 1733 case Intrinsic::mips_bset_d: { 1734 EVT VecTy = Op->getValueType(0); 1735 SDValue One = DAG.getConstant(1, DL, VecTy); 1736 1737 return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), 1738 DAG.getNode(ISD::SHL, DL, VecTy, One, 1739 truncateVecElts(Op, DAG))); 1740 } 1741 case Intrinsic::mips_bseti_b: 1742 case Intrinsic::mips_bseti_h: 1743 case Intrinsic::mips_bseti_w: 1744 case Intrinsic::mips_bseti_d: 1745 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2), 1746 !Subtarget.isLittle()); 1747 case Intrinsic::mips_bz_b: 1748 case Intrinsic::mips_bz_h: 1749 case Intrinsic::mips_bz_w: 1750 case Intrinsic::mips_bz_d: 1751 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0), 1752 Op->getOperand(1)); 1753 case Intrinsic::mips_bz_v: 1754 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0), 1755 Op->getOperand(1)); 1756 case Intrinsic::mips_ceq_b: 1757 case Intrinsic::mips_ceq_h: 1758 case Intrinsic::mips_ceq_w: 1759 case Intrinsic::mips_ceq_d: 1760 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1761 Op->getOperand(2), ISD::SETEQ); 1762 case Intrinsic::mips_ceqi_b: 1763 case Intrinsic::mips_ceqi_h: 1764 case Intrinsic::mips_ceqi_w: 1765 case Intrinsic::mips_ceqi_d: 1766 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1767 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ); 1768 case Intrinsic::mips_cle_s_b: 1769 case Intrinsic::mips_cle_s_h: 1770 case Intrinsic::mips_cle_s_w: 1771 case Intrinsic::mips_cle_s_d: 1772 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1773 Op->getOperand(2), ISD::SETLE); 1774 case Intrinsic::mips_clei_s_b: 1775 case Intrinsic::mips_clei_s_h: 1776 case Intrinsic::mips_clei_s_w: 1777 case Intrinsic::mips_clei_s_d: 1778 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1779 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE); 1780 case Intrinsic::mips_cle_u_b: 1781 case Intrinsic::mips_cle_u_h: 1782 case Intrinsic::mips_cle_u_w: 1783 case Intrinsic::mips_cle_u_d: 1784 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1785 Op->getOperand(2), ISD::SETULE); 1786 case Intrinsic::mips_clei_u_b: 1787 case Intrinsic::mips_clei_u_h: 1788 case Intrinsic::mips_clei_u_w: 1789 case Intrinsic::mips_clei_u_d: 1790 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1791 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE); 1792 case Intrinsic::mips_clt_s_b: 1793 case Intrinsic::mips_clt_s_h: 1794 case Intrinsic::mips_clt_s_w: 1795 case Intrinsic::mips_clt_s_d: 1796 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1797 Op->getOperand(2), ISD::SETLT); 1798 case Intrinsic::mips_clti_s_b: 1799 case Intrinsic::mips_clti_s_h: 1800 case Intrinsic::mips_clti_s_w: 1801 case Intrinsic::mips_clti_s_d: 1802 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1803 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT); 1804 case Intrinsic::mips_clt_u_b: 1805 case Intrinsic::mips_clt_u_h: 1806 case Intrinsic::mips_clt_u_w: 1807 case Intrinsic::mips_clt_u_d: 1808 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1809 Op->getOperand(2), ISD::SETULT); 1810 case Intrinsic::mips_clti_u_b: 1811 case Intrinsic::mips_clti_u_h: 1812 case Intrinsic::mips_clti_u_w: 1813 case Intrinsic::mips_clti_u_d: 1814 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1815 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT); 1816 case Intrinsic::mips_copy_s_b: 1817 case Intrinsic::mips_copy_s_h: 1818 case Intrinsic::mips_copy_s_w: 1819 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1820 case Intrinsic::mips_copy_s_d: 1821 if (Subtarget.hasMips64()) 1822 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64. 1823 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1824 else { 1825 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1826 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1827 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1828 Op->getValueType(0), Op->getOperand(1), 1829 Op->getOperand(2)); 1830 } 1831 case Intrinsic::mips_copy_u_b: 1832 case Intrinsic::mips_copy_u_h: 1833 case Intrinsic::mips_copy_u_w: 1834 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1835 case Intrinsic::mips_copy_u_d: 1836 if (Subtarget.hasMips64()) 1837 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64. 1838 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1839 else { 1840 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1841 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1842 // Note: When i64 is illegal, this results in copy_s.w instructions 1843 // instead of copy_u.w instructions. This makes no difference to the 1844 // behaviour since i64 is only illegal when the register file is 32-bit. 1845 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1846 Op->getValueType(0), Op->getOperand(1), 1847 Op->getOperand(2)); 1848 } 1849 case Intrinsic::mips_div_s_b: 1850 case Intrinsic::mips_div_s_h: 1851 case Intrinsic::mips_div_s_w: 1852 case Intrinsic::mips_div_s_d: 1853 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), 1854 Op->getOperand(2)); 1855 case Intrinsic::mips_div_u_b: 1856 case Intrinsic::mips_div_u_h: 1857 case Intrinsic::mips_div_u_w: 1858 case Intrinsic::mips_div_u_d: 1859 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), 1860 Op->getOperand(2)); 1861 case Intrinsic::mips_fadd_w: 1862 case Intrinsic::mips_fadd_d: { 1863 // TODO: If intrinsics have fast-math-flags, propagate them. 1864 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), 1865 Op->getOperand(2)); 1866 } 1867 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away 1868 case Intrinsic::mips_fceq_w: 1869 case Intrinsic::mips_fceq_d: 1870 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1871 Op->getOperand(2), ISD::SETOEQ); 1872 case Intrinsic::mips_fcle_w: 1873 case Intrinsic::mips_fcle_d: 1874 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1875 Op->getOperand(2), ISD::SETOLE); 1876 case Intrinsic::mips_fclt_w: 1877 case Intrinsic::mips_fclt_d: 1878 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1879 Op->getOperand(2), ISD::SETOLT); 1880 case Intrinsic::mips_fcne_w: 1881 case Intrinsic::mips_fcne_d: 1882 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1883 Op->getOperand(2), ISD::SETONE); 1884 case Intrinsic::mips_fcor_w: 1885 case Intrinsic::mips_fcor_d: 1886 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1887 Op->getOperand(2), ISD::SETO); 1888 case Intrinsic::mips_fcueq_w: 1889 case Intrinsic::mips_fcueq_d: 1890 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1891 Op->getOperand(2), ISD::SETUEQ); 1892 case Intrinsic::mips_fcule_w: 1893 case Intrinsic::mips_fcule_d: 1894 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1895 Op->getOperand(2), ISD::SETULE); 1896 case Intrinsic::mips_fcult_w: 1897 case Intrinsic::mips_fcult_d: 1898 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1899 Op->getOperand(2), ISD::SETULT); 1900 case Intrinsic::mips_fcun_w: 1901 case Intrinsic::mips_fcun_d: 1902 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1903 Op->getOperand(2), ISD::SETUO); 1904 case Intrinsic::mips_fcune_w: 1905 case Intrinsic::mips_fcune_d: 1906 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1907 Op->getOperand(2), ISD::SETUNE); 1908 case Intrinsic::mips_fdiv_w: 1909 case Intrinsic::mips_fdiv_d: { 1910 // TODO: If intrinsics have fast-math-flags, propagate them. 1911 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), 1912 Op->getOperand(2)); 1913 } 1914 case Intrinsic::mips_ffint_u_w: 1915 case Intrinsic::mips_ffint_u_d: 1916 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), 1917 Op->getOperand(1)); 1918 case Intrinsic::mips_ffint_s_w: 1919 case Intrinsic::mips_ffint_s_d: 1920 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), 1921 Op->getOperand(1)); 1922 case Intrinsic::mips_fill_b: 1923 case Intrinsic::mips_fill_h: 1924 case Intrinsic::mips_fill_w: 1925 case Intrinsic::mips_fill_d: { 1926 EVT ResTy = Op->getValueType(0); 1927 SmallVector<SDValue, 16> Ops(ResTy.getVectorNumElements(), 1928 Op->getOperand(1)); 1929 1930 // If ResTy is v2i64 then the type legalizer will break this node down into 1931 // an equivalent v4i32. 1932 return DAG.getBuildVector(ResTy, DL, Ops); 1933 } 1934 case Intrinsic::mips_fexp2_w: 1935 case Intrinsic::mips_fexp2_d: { 1936 // TODO: If intrinsics have fast-math-flags, propagate them. 1937 EVT ResTy = Op->getValueType(0); 1938 return DAG.getNode( 1939 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1), 1940 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2))); 1941 } 1942 case Intrinsic::mips_flog2_w: 1943 case Intrinsic::mips_flog2_d: 1944 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1)); 1945 case Intrinsic::mips_fmadd_w: 1946 case Intrinsic::mips_fmadd_d: 1947 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0), 1948 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1949 case Intrinsic::mips_fmul_w: 1950 case Intrinsic::mips_fmul_d: { 1951 // TODO: If intrinsics have fast-math-flags, propagate them. 1952 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), 1953 Op->getOperand(2)); 1954 } 1955 case Intrinsic::mips_fmsub_w: 1956 case Intrinsic::mips_fmsub_d: { 1957 // TODO: If intrinsics have fast-math-flags, propagate them. 1958 EVT ResTy = Op->getValueType(0); 1959 return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1), 1960 DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy, 1961 Op->getOperand(2), Op->getOperand(3))); 1962 } 1963 case Intrinsic::mips_frint_w: 1964 case Intrinsic::mips_frint_d: 1965 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); 1966 case Intrinsic::mips_fsqrt_w: 1967 case Intrinsic::mips_fsqrt_d: 1968 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); 1969 case Intrinsic::mips_fsub_w: 1970 case Intrinsic::mips_fsub_d: { 1971 // TODO: If intrinsics have fast-math-flags, propagate them. 1972 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), 1973 Op->getOperand(2)); 1974 } 1975 case Intrinsic::mips_ftrunc_u_w: 1976 case Intrinsic::mips_ftrunc_u_d: 1977 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), 1978 Op->getOperand(1)); 1979 case Intrinsic::mips_ftrunc_s_w: 1980 case Intrinsic::mips_ftrunc_s_d: 1981 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0), 1982 Op->getOperand(1)); 1983 case Intrinsic::mips_ilvev_b: 1984 case Intrinsic::mips_ilvev_h: 1985 case Intrinsic::mips_ilvev_w: 1986 case Intrinsic::mips_ilvev_d: 1987 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0), 1988 Op->getOperand(1), Op->getOperand(2)); 1989 case Intrinsic::mips_ilvl_b: 1990 case Intrinsic::mips_ilvl_h: 1991 case Intrinsic::mips_ilvl_w: 1992 case Intrinsic::mips_ilvl_d: 1993 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0), 1994 Op->getOperand(1), Op->getOperand(2)); 1995 case Intrinsic::mips_ilvod_b: 1996 case Intrinsic::mips_ilvod_h: 1997 case Intrinsic::mips_ilvod_w: 1998 case Intrinsic::mips_ilvod_d: 1999 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0), 2000 Op->getOperand(1), Op->getOperand(2)); 2001 case Intrinsic::mips_ilvr_b: 2002 case Intrinsic::mips_ilvr_h: 2003 case Intrinsic::mips_ilvr_w: 2004 case Intrinsic::mips_ilvr_d: 2005 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0), 2006 Op->getOperand(1), Op->getOperand(2)); 2007 case Intrinsic::mips_insert_b: 2008 case Intrinsic::mips_insert_h: 2009 case Intrinsic::mips_insert_w: 2010 case Intrinsic::mips_insert_d: 2011 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 2012 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2)); 2013 case Intrinsic::mips_insve_b: 2014 case Intrinsic::mips_insve_h: 2015 case Intrinsic::mips_insve_w: 2016 case Intrinsic::mips_insve_d: { 2017 // Report an error for out of range values. 2018 int64_t Max; 2019 switch (Intrinsic) { 2020 case Intrinsic::mips_insve_b: Max = 15; break; 2021 case Intrinsic::mips_insve_h: Max = 7; break; 2022 case Intrinsic::mips_insve_w: Max = 3; break; 2023 case Intrinsic::mips_insve_d: Max = 1; break; 2024 default: llvm_unreachable("Unmatched intrinsic"); 2025 } 2026 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2027 if (Value < 0 || Value > Max) 2028 report_fatal_error("Immediate out of range"); 2029 return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0), 2030 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3), 2031 DAG.getConstant(0, DL, MVT::i32)); 2032 } 2033 case Intrinsic::mips_ldi_b: 2034 case Intrinsic::mips_ldi_h: 2035 case Intrinsic::mips_ldi_w: 2036 case Intrinsic::mips_ldi_d: 2037 return lowerMSASplatImm(Op, 1, DAG, true); 2038 case Intrinsic::mips_lsa: 2039 case Intrinsic::mips_dlsa: { 2040 EVT ResTy = Op->getValueType(0); 2041 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 2042 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy, 2043 Op->getOperand(2), Op->getOperand(3))); 2044 } 2045 case Intrinsic::mips_maddv_b: 2046 case Intrinsic::mips_maddv_h: 2047 case Intrinsic::mips_maddv_w: 2048 case Intrinsic::mips_maddv_d: { 2049 EVT ResTy = Op->getValueType(0); 2050 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 2051 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 2052 Op->getOperand(2), Op->getOperand(3))); 2053 } 2054 case Intrinsic::mips_max_s_b: 2055 case Intrinsic::mips_max_s_h: 2056 case Intrinsic::mips_max_s_w: 2057 case Intrinsic::mips_max_s_d: 2058 return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), 2059 Op->getOperand(1), Op->getOperand(2)); 2060 case Intrinsic::mips_max_u_b: 2061 case Intrinsic::mips_max_u_h: 2062 case Intrinsic::mips_max_u_w: 2063 case Intrinsic::mips_max_u_d: 2064 return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), 2065 Op->getOperand(1), Op->getOperand(2)); 2066 case Intrinsic::mips_maxi_s_b: 2067 case Intrinsic::mips_maxi_s_h: 2068 case Intrinsic::mips_maxi_s_w: 2069 case Intrinsic::mips_maxi_s_d: 2070 return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), 2071 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 2072 case Intrinsic::mips_maxi_u_b: 2073 case Intrinsic::mips_maxi_u_h: 2074 case Intrinsic::mips_maxi_u_w: 2075 case Intrinsic::mips_maxi_u_d: 2076 return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), 2077 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2078 case Intrinsic::mips_min_s_b: 2079 case Intrinsic::mips_min_s_h: 2080 case Intrinsic::mips_min_s_w: 2081 case Intrinsic::mips_min_s_d: 2082 return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), 2083 Op->getOperand(1), Op->getOperand(2)); 2084 case Intrinsic::mips_min_u_b: 2085 case Intrinsic::mips_min_u_h: 2086 case Intrinsic::mips_min_u_w: 2087 case Intrinsic::mips_min_u_d: 2088 return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), 2089 Op->getOperand(1), Op->getOperand(2)); 2090 case Intrinsic::mips_mini_s_b: 2091 case Intrinsic::mips_mini_s_h: 2092 case Intrinsic::mips_mini_s_w: 2093 case Intrinsic::mips_mini_s_d: 2094 return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), 2095 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 2096 case Intrinsic::mips_mini_u_b: 2097 case Intrinsic::mips_mini_u_h: 2098 case Intrinsic::mips_mini_u_w: 2099 case Intrinsic::mips_mini_u_d: 2100 return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), 2101 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2102 case Intrinsic::mips_mod_s_b: 2103 case Intrinsic::mips_mod_s_h: 2104 case Intrinsic::mips_mod_s_w: 2105 case Intrinsic::mips_mod_s_d: 2106 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), 2107 Op->getOperand(2)); 2108 case Intrinsic::mips_mod_u_b: 2109 case Intrinsic::mips_mod_u_h: 2110 case Intrinsic::mips_mod_u_w: 2111 case Intrinsic::mips_mod_u_d: 2112 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), 2113 Op->getOperand(2)); 2114 case Intrinsic::mips_mulv_b: 2115 case Intrinsic::mips_mulv_h: 2116 case Intrinsic::mips_mulv_w: 2117 case Intrinsic::mips_mulv_d: 2118 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), 2119 Op->getOperand(2)); 2120 case Intrinsic::mips_msubv_b: 2121 case Intrinsic::mips_msubv_h: 2122 case Intrinsic::mips_msubv_w: 2123 case Intrinsic::mips_msubv_d: { 2124 EVT ResTy = Op->getValueType(0); 2125 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), 2126 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 2127 Op->getOperand(2), Op->getOperand(3))); 2128 } 2129 case Intrinsic::mips_nlzc_b: 2130 case Intrinsic::mips_nlzc_h: 2131 case Intrinsic::mips_nlzc_w: 2132 case Intrinsic::mips_nlzc_d: 2133 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); 2134 case Intrinsic::mips_nor_v: { 2135 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2136 Op->getOperand(1), Op->getOperand(2)); 2137 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2138 } 2139 case Intrinsic::mips_nori_b: { 2140 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2141 Op->getOperand(1), 2142 lowerMSASplatImm(Op, 2, DAG)); 2143 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2144 } 2145 case Intrinsic::mips_or_v: 2146 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), 2147 Op->getOperand(2)); 2148 case Intrinsic::mips_ori_b: 2149 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2150 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2151 case Intrinsic::mips_pckev_b: 2152 case Intrinsic::mips_pckev_h: 2153 case Intrinsic::mips_pckev_w: 2154 case Intrinsic::mips_pckev_d: 2155 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0), 2156 Op->getOperand(1), Op->getOperand(2)); 2157 case Intrinsic::mips_pckod_b: 2158 case Intrinsic::mips_pckod_h: 2159 case Intrinsic::mips_pckod_w: 2160 case Intrinsic::mips_pckod_d: 2161 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0), 2162 Op->getOperand(1), Op->getOperand(2)); 2163 case Intrinsic::mips_pcnt_b: 2164 case Intrinsic::mips_pcnt_h: 2165 case Intrinsic::mips_pcnt_w: 2166 case Intrinsic::mips_pcnt_d: 2167 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); 2168 case Intrinsic::mips_sat_s_b: 2169 case Intrinsic::mips_sat_s_h: 2170 case Intrinsic::mips_sat_s_w: 2171 case Intrinsic::mips_sat_s_d: 2172 case Intrinsic::mips_sat_u_b: 2173 case Intrinsic::mips_sat_u_h: 2174 case Intrinsic::mips_sat_u_w: 2175 case Intrinsic::mips_sat_u_d: { 2176 // Report an error for out of range values. 2177 int64_t Max; 2178 switch (Intrinsic) { 2179 case Intrinsic::mips_sat_s_b: 2180 case Intrinsic::mips_sat_u_b: Max = 7; break; 2181 case Intrinsic::mips_sat_s_h: 2182 case Intrinsic::mips_sat_u_h: Max = 15; break; 2183 case Intrinsic::mips_sat_s_w: 2184 case Intrinsic::mips_sat_u_w: Max = 31; break; 2185 case Intrinsic::mips_sat_s_d: 2186 case Intrinsic::mips_sat_u_d: Max = 63; break; 2187 default: llvm_unreachable("Unmatched intrinsic"); 2188 } 2189 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2190 if (Value < 0 || Value > Max) 2191 report_fatal_error("Immediate out of range"); 2192 return SDValue(); 2193 } 2194 case Intrinsic::mips_shf_b: 2195 case Intrinsic::mips_shf_h: 2196 case Intrinsic::mips_shf_w: { 2197 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2198 if (Value < 0 || Value > 255) 2199 report_fatal_error("Immediate out of range"); 2200 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0), 2201 Op->getOperand(2), Op->getOperand(1)); 2202 } 2203 case Intrinsic::mips_sldi_b: 2204 case Intrinsic::mips_sldi_h: 2205 case Intrinsic::mips_sldi_w: 2206 case Intrinsic::mips_sldi_d: { 2207 // Report an error for out of range values. 2208 int64_t Max; 2209 switch (Intrinsic) { 2210 case Intrinsic::mips_sldi_b: Max = 15; break; 2211 case Intrinsic::mips_sldi_h: Max = 7; break; 2212 case Intrinsic::mips_sldi_w: Max = 3; break; 2213 case Intrinsic::mips_sldi_d: Max = 1; break; 2214 default: llvm_unreachable("Unmatched intrinsic"); 2215 } 2216 int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue(); 2217 if (Value < 0 || Value > Max) 2218 report_fatal_error("Immediate out of range"); 2219 return SDValue(); 2220 } 2221 case Intrinsic::mips_sll_b: 2222 case Intrinsic::mips_sll_h: 2223 case Intrinsic::mips_sll_w: 2224 case Intrinsic::mips_sll_d: 2225 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), 2226 truncateVecElts(Op, DAG)); 2227 case Intrinsic::mips_slli_b: 2228 case Intrinsic::mips_slli_h: 2229 case Intrinsic::mips_slli_w: 2230 case Intrinsic::mips_slli_d: 2231 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), 2232 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2233 case Intrinsic::mips_splat_b: 2234 case Intrinsic::mips_splat_h: 2235 case Intrinsic::mips_splat_w: 2236 case Intrinsic::mips_splat_d: 2237 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle 2238 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because 2239 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32. 2240 // Instead we lower to MipsISD::VSHF and match from there. 2241 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2242 lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1), 2243 Op->getOperand(1)); 2244 case Intrinsic::mips_splati_b: 2245 case Intrinsic::mips_splati_h: 2246 case Intrinsic::mips_splati_w: 2247 case Intrinsic::mips_splati_d: 2248 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2249 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), 2250 Op->getOperand(1)); 2251 case Intrinsic::mips_sra_b: 2252 case Intrinsic::mips_sra_h: 2253 case Intrinsic::mips_sra_w: 2254 case Intrinsic::mips_sra_d: 2255 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), 2256 truncateVecElts(Op, DAG)); 2257 case Intrinsic::mips_srai_b: 2258 case Intrinsic::mips_srai_h: 2259 case Intrinsic::mips_srai_w: 2260 case Intrinsic::mips_srai_d: 2261 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), 2262 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2263 case Intrinsic::mips_srari_b: 2264 case Intrinsic::mips_srari_h: 2265 case Intrinsic::mips_srari_w: 2266 case Intrinsic::mips_srari_d: { 2267 // Report an error for out of range values. 2268 int64_t Max; 2269 switch (Intrinsic) { 2270 case Intrinsic::mips_srari_b: Max = 7; break; 2271 case Intrinsic::mips_srari_h: Max = 15; break; 2272 case Intrinsic::mips_srari_w: Max = 31; break; 2273 case Intrinsic::mips_srari_d: Max = 63; break; 2274 default: llvm_unreachable("Unmatched intrinsic"); 2275 } 2276 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2277 if (Value < 0 || Value > Max) 2278 report_fatal_error("Immediate out of range"); 2279 return SDValue(); 2280 } 2281 case Intrinsic::mips_srl_b: 2282 case Intrinsic::mips_srl_h: 2283 case Intrinsic::mips_srl_w: 2284 case Intrinsic::mips_srl_d: 2285 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), 2286 truncateVecElts(Op, DAG)); 2287 case Intrinsic::mips_srli_b: 2288 case Intrinsic::mips_srli_h: 2289 case Intrinsic::mips_srli_w: 2290 case Intrinsic::mips_srli_d: 2291 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), 2292 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2293 case Intrinsic::mips_srlri_b: 2294 case Intrinsic::mips_srlri_h: 2295 case Intrinsic::mips_srlri_w: 2296 case Intrinsic::mips_srlri_d: { 2297 // Report an error for out of range values. 2298 int64_t Max; 2299 switch (Intrinsic) { 2300 case Intrinsic::mips_srlri_b: Max = 7; break; 2301 case Intrinsic::mips_srlri_h: Max = 15; break; 2302 case Intrinsic::mips_srlri_w: Max = 31; break; 2303 case Intrinsic::mips_srlri_d: Max = 63; break; 2304 default: llvm_unreachable("Unmatched intrinsic"); 2305 } 2306 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2307 if (Value < 0 || Value > Max) 2308 report_fatal_error("Immediate out of range"); 2309 return SDValue(); 2310 } 2311 case Intrinsic::mips_subv_b: 2312 case Intrinsic::mips_subv_h: 2313 case Intrinsic::mips_subv_w: 2314 case Intrinsic::mips_subv_d: 2315 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), 2316 Op->getOperand(2)); 2317 case Intrinsic::mips_subvi_b: 2318 case Intrinsic::mips_subvi_h: 2319 case Intrinsic::mips_subvi_w: 2320 case Intrinsic::mips_subvi_d: 2321 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), 2322 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2323 case Intrinsic::mips_vshf_b: 2324 case Intrinsic::mips_vshf_h: 2325 case Intrinsic::mips_vshf_w: 2326 case Intrinsic::mips_vshf_d: 2327 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2328 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 2329 case Intrinsic::mips_xor_v: 2330 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), 2331 Op->getOperand(2)); 2332 case Intrinsic::mips_xori_b: 2333 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), 2334 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2335 case Intrinsic::thread_pointer: { 2336 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2337 return DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT); 2338 } 2339 } 2340 } 2341 2342 static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2343 const MipsSubtarget &Subtarget) { 2344 SDLoc DL(Op); 2345 SDValue ChainIn = Op->getOperand(0); 2346 SDValue Address = Op->getOperand(2); 2347 SDValue Offset = Op->getOperand(3); 2348 EVT ResTy = Op->getValueType(0); 2349 EVT PtrTy = Address->getValueType(0); 2350 2351 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2352 // however takes an i32 signed constant offset. The actual type of the 2353 // intrinsic is a scaled signed i10. 2354 if (Subtarget.isABI_N64()) 2355 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2356 2357 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2358 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), 2359 /* Alignment = */ 16); 2360 } 2361 2362 SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 2363 SelectionDAG &DAG) const { 2364 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2365 switch (Intr) { 2366 default: 2367 return SDValue(); 2368 case Intrinsic::mips_extp: 2369 return lowerDSPIntr(Op, DAG, MipsISD::EXTP); 2370 case Intrinsic::mips_extpdp: 2371 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); 2372 case Intrinsic::mips_extr_w: 2373 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); 2374 case Intrinsic::mips_extr_r_w: 2375 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); 2376 case Intrinsic::mips_extr_rs_w: 2377 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); 2378 case Intrinsic::mips_extr_s_h: 2379 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); 2380 case Intrinsic::mips_mthlip: 2381 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); 2382 case Intrinsic::mips_mulsaq_s_w_ph: 2383 return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); 2384 case Intrinsic::mips_maq_s_w_phl: 2385 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); 2386 case Intrinsic::mips_maq_s_w_phr: 2387 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); 2388 case Intrinsic::mips_maq_sa_w_phl: 2389 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); 2390 case Intrinsic::mips_maq_sa_w_phr: 2391 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); 2392 case Intrinsic::mips_dpaq_s_w_ph: 2393 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); 2394 case Intrinsic::mips_dpsq_s_w_ph: 2395 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); 2396 case Intrinsic::mips_dpaq_sa_l_w: 2397 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); 2398 case Intrinsic::mips_dpsq_sa_l_w: 2399 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); 2400 case Intrinsic::mips_dpaqx_s_w_ph: 2401 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); 2402 case Intrinsic::mips_dpaqx_sa_w_ph: 2403 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); 2404 case Intrinsic::mips_dpsqx_s_w_ph: 2405 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); 2406 case Intrinsic::mips_dpsqx_sa_w_ph: 2407 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); 2408 case Intrinsic::mips_ld_b: 2409 case Intrinsic::mips_ld_h: 2410 case Intrinsic::mips_ld_w: 2411 case Intrinsic::mips_ld_d: 2412 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget); 2413 } 2414 } 2415 2416 static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2417 const MipsSubtarget &Subtarget) { 2418 SDLoc DL(Op); 2419 SDValue ChainIn = Op->getOperand(0); 2420 SDValue Value = Op->getOperand(2); 2421 SDValue Address = Op->getOperand(3); 2422 SDValue Offset = Op->getOperand(4); 2423 EVT PtrTy = Address->getValueType(0); 2424 2425 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2426 // however takes an i32 signed constant offset. The actual type of the 2427 // intrinsic is a scaled signed i10. 2428 if (Subtarget.isABI_N64()) 2429 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2430 2431 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2432 2433 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), 2434 /* Alignment = */ 16); 2435 } 2436 2437 SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, 2438 SelectionDAG &DAG) const { 2439 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2440 switch (Intr) { 2441 default: 2442 return SDValue(); 2443 case Intrinsic::mips_st_b: 2444 case Intrinsic::mips_st_h: 2445 case Intrinsic::mips_st_w: 2446 case Intrinsic::mips_st_d: 2447 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget); 2448 } 2449 } 2450 2451 /// \brief Check if the given BuildVectorSDNode is a splat. 2452 /// This method currently relies on DAG nodes being reused when equivalent, 2453 /// so it's possible for this to return false even when isConstantSplat returns 2454 /// true. 2455 static bool isSplatVector(const BuildVectorSDNode *N) { 2456 unsigned int nOps = N->getNumOperands(); 2457 assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector"); 2458 2459 SDValue Operand0 = N->getOperand(0); 2460 2461 for (unsigned int i = 1; i < nOps; ++i) { 2462 if (N->getOperand(i) != Operand0) 2463 return false; 2464 } 2465 2466 return true; 2467 } 2468 2469 // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. 2470 // 2471 // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We 2472 // choose to sign-extend but we could have equally chosen zero-extend. The 2473 // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT 2474 // result into this node later (possibly changing it to a zero-extend in the 2475 // process). 2476 SDValue MipsSETargetLowering:: 2477 lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { 2478 SDLoc DL(Op); 2479 EVT ResTy = Op->getValueType(0); 2480 SDValue Op0 = Op->getOperand(0); 2481 EVT VecTy = Op0->getValueType(0); 2482 2483 if (!VecTy.is128BitVector()) 2484 return SDValue(); 2485 2486 if (ResTy.isInteger()) { 2487 SDValue Op1 = Op->getOperand(1); 2488 EVT EltTy = VecTy.getVectorElementType(); 2489 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, 2490 DAG.getValueType(EltTy)); 2491 } 2492 2493 return Op; 2494 } 2495 2496 static bool isConstantOrUndef(const SDValue Op) { 2497 if (Op->isUndef()) 2498 return true; 2499 if (isa<ConstantSDNode>(Op)) 2500 return true; 2501 if (isa<ConstantFPSDNode>(Op)) 2502 return true; 2503 return false; 2504 } 2505 2506 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 2507 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 2508 if (isConstantOrUndef(Op->getOperand(i))) 2509 return true; 2510 return false; 2511 } 2512 2513 // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the 2514 // backend. 2515 // 2516 // Lowers according to the following rules: 2517 // - Constant splats are legal as-is as long as the SplatBitSize is a power of 2518 // 2 less than or equal to 64 and the value fits into a signed 10-bit 2519 // immediate 2520 // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize 2521 // is a power of 2 less than or equal to 64 and the value does not fit into a 2522 // signed 10-bit immediate 2523 // - Non-constant splats are legal as-is. 2524 // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. 2525 // - All others are illegal and must be expanded. 2526 SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, 2527 SelectionDAG &DAG) const { 2528 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 2529 EVT ResTy = Op->getValueType(0); 2530 SDLoc DL(Op); 2531 APInt SplatValue, SplatUndef; 2532 unsigned SplatBitSize; 2533 bool HasAnyUndefs; 2534 2535 if (!Subtarget.hasMSA() || !ResTy.is128BitVector()) 2536 return SDValue(); 2537 2538 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 2539 HasAnyUndefs, 8, 2540 !Subtarget.isLittle()) && SplatBitSize <= 64) { 2541 // We can only cope with 8, 16, 32, or 64-bit elements 2542 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 2543 SplatBitSize != 64) 2544 return SDValue(); 2545 2546 // If the value isn't an integer type we will have to bitcast 2547 // from an integer type first. Also, if there are any undefs, we must 2548 // lower them to defined values first. 2549 if (ResTy.isInteger() && !HasAnyUndefs) 2550 return Op; 2551 2552 EVT ViaVecTy; 2553 2554 switch (SplatBitSize) { 2555 default: 2556 return SDValue(); 2557 case 8: 2558 ViaVecTy = MVT::v16i8; 2559 break; 2560 case 16: 2561 ViaVecTy = MVT::v8i16; 2562 break; 2563 case 32: 2564 ViaVecTy = MVT::v4i32; 2565 break; 2566 case 64: 2567 // There's no fill.d to fall back on for 64-bit values 2568 return SDValue(); 2569 } 2570 2571 // SelectionDAG::getConstant will promote SplatValue appropriately. 2572 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); 2573 2574 // Bitcast to the type we originally wanted 2575 if (ViaVecTy != ResTy) 2576 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 2577 2578 return Result; 2579 } else if (isSplatVector(Node)) 2580 return Op; 2581 else if (!isConstantOrUndefBUILD_VECTOR(Node)) { 2582 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 2583 // The resulting code is the same length as the expansion, but it doesn't 2584 // use memory operations 2585 EVT ResTy = Node->getValueType(0); 2586 2587 assert(ResTy.isVector()); 2588 2589 unsigned NumElts = ResTy.getVectorNumElements(); 2590 SDValue Vector = DAG.getUNDEF(ResTy); 2591 for (unsigned i = 0; i < NumElts; ++i) { 2592 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 2593 Node->getOperand(i), 2594 DAG.getConstant(i, DL, MVT::i32)); 2595 } 2596 return Vector; 2597 } 2598 2599 return SDValue(); 2600 } 2601 2602 // Lower VECTOR_SHUFFLE into SHF (if possible). 2603 // 2604 // SHF splits the vector into blocks of four elements, then shuffles these 2605 // elements according to a <4 x i2> constant (encoded as an integer immediate). 2606 // 2607 // It is therefore possible to lower into SHF when the mask takes the form: 2608 // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> 2609 // When undef's appear they are treated as if they were whatever value is 2610 // necessary in order to fit the above forms. 2611 // 2612 // For example: 2613 // %2 = shufflevector <8 x i16> %0, <8 x i16> undef, 2614 // <8 x i32> <i32 3, i32 2, i32 1, i32 0, 2615 // i32 7, i32 6, i32 5, i32 4> 2616 // is lowered to: 2617 // (SHF_H $w0, $w1, 27) 2618 // where the 27 comes from: 2619 // 3 + (2 << 2) + (1 << 4) + (0 << 6) 2620 static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, 2621 SmallVector<int, 16> Indices, 2622 SelectionDAG &DAG) { 2623 int SHFIndices[4] = { -1, -1, -1, -1 }; 2624 2625 if (Indices.size() < 4) 2626 return SDValue(); 2627 2628 for (unsigned i = 0; i < 4; ++i) { 2629 for (unsigned j = i; j < Indices.size(); j += 4) { 2630 int Idx = Indices[j]; 2631 2632 // Convert from vector index to 4-element subvector index 2633 // If an index refers to an element outside of the subvector then give up 2634 if (Idx != -1) { 2635 Idx -= 4 * (j / 4); 2636 if (Idx < 0 || Idx >= 4) 2637 return SDValue(); 2638 } 2639 2640 // If the mask has an undef, replace it with the current index. 2641 // Note that it might still be undef if the current index is also undef 2642 if (SHFIndices[i] == -1) 2643 SHFIndices[i] = Idx; 2644 2645 // Check that non-undef values are the same as in the mask. If they 2646 // aren't then give up 2647 if (!(Idx == -1 || Idx == SHFIndices[i])) 2648 return SDValue(); 2649 } 2650 } 2651 2652 // Calculate the immediate. Replace any remaining undefs with zero 2653 APInt Imm(32, 0); 2654 for (int i = 3; i >= 0; --i) { 2655 int Idx = SHFIndices[i]; 2656 2657 if (Idx == -1) 2658 Idx = 0; 2659 2660 Imm <<= 2; 2661 Imm |= Idx & 0x3; 2662 } 2663 2664 SDLoc DL(Op); 2665 return DAG.getNode(MipsISD::SHF, DL, ResTy, 2666 DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0)); 2667 } 2668 2669 /// Determine whether a range fits a regular pattern of values. 2670 /// This function accounts for the possibility of jumping over the End iterator. 2671 template <typename ValType> 2672 static bool 2673 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, 2674 unsigned CheckStride, 2675 typename SmallVectorImpl<ValType>::const_iterator End, 2676 ValType ExpectedIndex, unsigned ExpectedIndexStride) { 2677 auto &I = Begin; 2678 2679 while (I != End) { 2680 if (*I != -1 && *I != ExpectedIndex) 2681 return false; 2682 ExpectedIndex += ExpectedIndexStride; 2683 2684 // Incrementing past End is undefined behaviour so we must increment one 2685 // step at a time and check for End at each step. 2686 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) 2687 ; // Empty loop body. 2688 } 2689 return true; 2690 } 2691 2692 // Determine whether VECTOR_SHUFFLE is a SPLATI. 2693 // 2694 // It is a SPLATI when the mask is: 2695 // <x, x, x, ...> 2696 // where x is any valid index. 2697 // 2698 // When undef's appear in the mask they are treated as if they were whatever 2699 // value is necessary in order to fit the above form. 2700 static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, 2701 SmallVector<int, 16> Indices, 2702 SelectionDAG &DAG) { 2703 assert((Indices.size() % 2) == 0); 2704 2705 int SplatIndex = -1; 2706 for (const auto &V : Indices) { 2707 if (V != -1) { 2708 SplatIndex = V; 2709 break; 2710 } 2711 } 2712 2713 return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex, 2714 0); 2715 } 2716 2717 // Lower VECTOR_SHUFFLE into ILVEV (if possible). 2718 // 2719 // ILVEV interleaves the even elements from each vector. 2720 // 2721 // It is possible to lower into ILVEV when the mask consists of two of the 2722 // following forms interleaved: 2723 // <0, 2, 4, ...> 2724 // <n, n+2, n+4, ...> 2725 // where n is the number of elements in the vector. 2726 // For example: 2727 // <0, 0, 2, 2, 4, 4, ...> 2728 // <0, n, 2, n+2, 4, n+4, ...> 2729 // 2730 // When undef's appear in the mask they are treated as if they were whatever 2731 // value is necessary in order to fit the above forms. 2732 static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, 2733 SmallVector<int, 16> Indices, 2734 SelectionDAG &DAG) { 2735 assert((Indices.size() % 2) == 0); 2736 2737 SDValue Wt; 2738 SDValue Ws; 2739 const auto &Begin = Indices.begin(); 2740 const auto &End = Indices.end(); 2741 2742 // Check even elements are taken from the even elements of one half or the 2743 // other and pick an operand accordingly. 2744 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2)) 2745 Wt = Op->getOperand(0); 2746 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2)) 2747 Wt = Op->getOperand(1); 2748 else 2749 return SDValue(); 2750 2751 // Check odd elements are taken from the even elements of one half or the 2752 // other and pick an operand accordingly. 2753 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2)) 2754 Ws = Op->getOperand(0); 2755 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2)) 2756 Ws = Op->getOperand(1); 2757 else 2758 return SDValue(); 2759 2760 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt); 2761 } 2762 2763 // Lower VECTOR_SHUFFLE into ILVOD (if possible). 2764 // 2765 // ILVOD interleaves the odd elements from each vector. 2766 // 2767 // It is possible to lower into ILVOD when the mask consists of two of the 2768 // following forms interleaved: 2769 // <1, 3, 5, ...> 2770 // <n+1, n+3, n+5, ...> 2771 // where n is the number of elements in the vector. 2772 // For example: 2773 // <1, 1, 3, 3, 5, 5, ...> 2774 // <1, n+1, 3, n+3, 5, n+5, ...> 2775 // 2776 // When undef's appear in the mask they are treated as if they were whatever 2777 // value is necessary in order to fit the above forms. 2778 static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, 2779 SmallVector<int, 16> Indices, 2780 SelectionDAG &DAG) { 2781 assert((Indices.size() % 2) == 0); 2782 2783 SDValue Wt; 2784 SDValue Ws; 2785 const auto &Begin = Indices.begin(); 2786 const auto &End = Indices.end(); 2787 2788 // Check even elements are taken from the odd elements of one half or the 2789 // other and pick an operand accordingly. 2790 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2)) 2791 Wt = Op->getOperand(0); 2792 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2)) 2793 Wt = Op->getOperand(1); 2794 else 2795 return SDValue(); 2796 2797 // Check odd elements are taken from the odd elements of one half or the 2798 // other and pick an operand accordingly. 2799 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2)) 2800 Ws = Op->getOperand(0); 2801 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2)) 2802 Ws = Op->getOperand(1); 2803 else 2804 return SDValue(); 2805 2806 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Wt, Ws); 2807 } 2808 2809 // Lower VECTOR_SHUFFLE into ILVR (if possible). 2810 // 2811 // ILVR interleaves consecutive elements from the right (lowest-indexed) half of 2812 // each vector. 2813 // 2814 // It is possible to lower into ILVR when the mask consists of two of the 2815 // following forms interleaved: 2816 // <0, 1, 2, ...> 2817 // <n, n+1, n+2, ...> 2818 // where n is the number of elements in the vector. 2819 // For example: 2820 // <0, 0, 1, 1, 2, 2, ...> 2821 // <0, n, 1, n+1, 2, n+2, ...> 2822 // 2823 // When undef's appear in the mask they are treated as if they were whatever 2824 // value is necessary in order to fit the above forms. 2825 static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, 2826 SmallVector<int, 16> Indices, 2827 SelectionDAG &DAG) { 2828 assert((Indices.size() % 2) == 0); 2829 2830 SDValue Wt; 2831 SDValue Ws; 2832 const auto &Begin = Indices.begin(); 2833 const auto &End = Indices.end(); 2834 2835 // Check even elements are taken from the right (lowest-indexed) elements of 2836 // one half or the other and pick an operand accordingly. 2837 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1)) 2838 Wt = Op->getOperand(0); 2839 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1)) 2840 Wt = Op->getOperand(1); 2841 else 2842 return SDValue(); 2843 2844 // Check odd elements are taken from the right (lowest-indexed) elements of 2845 // one half or the other and pick an operand accordingly. 2846 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1)) 2847 Ws = Op->getOperand(0); 2848 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1)) 2849 Ws = Op->getOperand(1); 2850 else 2851 return SDValue(); 2852 2853 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt); 2854 } 2855 2856 // Lower VECTOR_SHUFFLE into ILVL (if possible). 2857 // 2858 // ILVL interleaves consecutive elements from the left (highest-indexed) half 2859 // of each vector. 2860 // 2861 // It is possible to lower into ILVL when the mask consists of two of the 2862 // following forms interleaved: 2863 // <x, x+1, x+2, ...> 2864 // <n+x, n+x+1, n+x+2, ...> 2865 // where n is the number of elements in the vector and x is half n. 2866 // For example: 2867 // <x, x, x+1, x+1, x+2, x+2, ...> 2868 // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> 2869 // 2870 // When undef's appear in the mask they are treated as if they were whatever 2871 // value is necessary in order to fit the above forms. 2872 static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, 2873 SmallVector<int, 16> Indices, 2874 SelectionDAG &DAG) { 2875 assert((Indices.size() % 2) == 0); 2876 2877 unsigned HalfSize = Indices.size() / 2; 2878 SDValue Wt; 2879 SDValue Ws; 2880 const auto &Begin = Indices.begin(); 2881 const auto &End = Indices.end(); 2882 2883 // Check even elements are taken from the left (highest-indexed) elements of 2884 // one half or the other and pick an operand accordingly. 2885 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1)) 2886 Wt = Op->getOperand(0); 2887 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1)) 2888 Wt = Op->getOperand(1); 2889 else 2890 return SDValue(); 2891 2892 // Check odd elements are taken from the left (highest-indexed) elements of 2893 // one half or the other and pick an operand accordingly. 2894 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1)) 2895 Ws = Op->getOperand(0); 2896 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize, 2897 1)) 2898 Ws = Op->getOperand(1); 2899 else 2900 return SDValue(); 2901 2902 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt); 2903 } 2904 2905 // Lower VECTOR_SHUFFLE into PCKEV (if possible). 2906 // 2907 // PCKEV copies the even elements of each vector into the result vector. 2908 // 2909 // It is possible to lower into PCKEV when the mask consists of two of the 2910 // following forms concatenated: 2911 // <0, 2, 4, ...> 2912 // <n, n+2, n+4, ...> 2913 // where n is the number of elements in the vector. 2914 // For example: 2915 // <0, 2, 4, ..., 0, 2, 4, ...> 2916 // <0, 2, 4, ..., n, n+2, n+4, ...> 2917 // 2918 // When undef's appear in the mask they are treated as if they were whatever 2919 // value is necessary in order to fit the above forms. 2920 static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, 2921 SmallVector<int, 16> Indices, 2922 SelectionDAG &DAG) { 2923 assert((Indices.size() % 2) == 0); 2924 2925 SDValue Wt; 2926 SDValue Ws; 2927 const auto &Begin = Indices.begin(); 2928 const auto &Mid = Indices.begin() + Indices.size() / 2; 2929 const auto &End = Indices.end(); 2930 2931 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2)) 2932 Wt = Op->getOperand(0); 2933 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2)) 2934 Wt = Op->getOperand(1); 2935 else 2936 return SDValue(); 2937 2938 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2)) 2939 Ws = Op->getOperand(0); 2940 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2)) 2941 Ws = Op->getOperand(1); 2942 else 2943 return SDValue(); 2944 2945 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt); 2946 } 2947 2948 // Lower VECTOR_SHUFFLE into PCKOD (if possible). 2949 // 2950 // PCKOD copies the odd elements of each vector into the result vector. 2951 // 2952 // It is possible to lower into PCKOD when the mask consists of two of the 2953 // following forms concatenated: 2954 // <1, 3, 5, ...> 2955 // <n+1, n+3, n+5, ...> 2956 // where n is the number of elements in the vector. 2957 // For example: 2958 // <1, 3, 5, ..., 1, 3, 5, ...> 2959 // <1, 3, 5, ..., n+1, n+3, n+5, ...> 2960 // 2961 // When undef's appear in the mask they are treated as if they were whatever 2962 // value is necessary in order to fit the above forms. 2963 static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, 2964 SmallVector<int, 16> Indices, 2965 SelectionDAG &DAG) { 2966 assert((Indices.size() % 2) == 0); 2967 2968 SDValue Wt; 2969 SDValue Ws; 2970 const auto &Begin = Indices.begin(); 2971 const auto &Mid = Indices.begin() + Indices.size() / 2; 2972 const auto &End = Indices.end(); 2973 2974 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2)) 2975 Wt = Op->getOperand(0); 2976 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2)) 2977 Wt = Op->getOperand(1); 2978 else 2979 return SDValue(); 2980 2981 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2)) 2982 Ws = Op->getOperand(0); 2983 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2)) 2984 Ws = Op->getOperand(1); 2985 else 2986 return SDValue(); 2987 2988 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt); 2989 } 2990 2991 // Lower VECTOR_SHUFFLE into VSHF. 2992 // 2993 // This mostly consists of converting the shuffle indices in Indices into a 2994 // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is 2995 // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, 2996 // if the type is v8i16 and all the indices are less than 8 then the second 2997 // operand is unused and can be replaced with anything. We choose to replace it 2998 // with the used operand since this reduces the number of instructions overall. 2999 static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, 3000 SmallVector<int, 16> Indices, 3001 SelectionDAG &DAG) { 3002 SmallVector<SDValue, 16> Ops; 3003 SDValue Op0; 3004 SDValue Op1; 3005 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); 3006 EVT MaskEltTy = MaskVecTy.getVectorElementType(); 3007 bool Using1stVec = false; 3008 bool Using2ndVec = false; 3009 SDLoc DL(Op); 3010 int ResTyNumElts = ResTy.getVectorNumElements(); 3011 3012 for (int i = 0; i < ResTyNumElts; ++i) { 3013 // Idx == -1 means UNDEF 3014 int Idx = Indices[i]; 3015 3016 if (0 <= Idx && Idx < ResTyNumElts) 3017 Using1stVec = true; 3018 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) 3019 Using2ndVec = true; 3020 } 3021 3022 for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end(); 3023 ++I) 3024 Ops.push_back(DAG.getTargetConstant(*I, DL, MaskEltTy)); 3025 3026 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); 3027 3028 if (Using1stVec && Using2ndVec) { 3029 Op0 = Op->getOperand(0); 3030 Op1 = Op->getOperand(1); 3031 } else if (Using1stVec) 3032 Op0 = Op1 = Op->getOperand(0); 3033 else if (Using2ndVec) 3034 Op0 = Op1 = Op->getOperand(1); 3035 else 3036 llvm_unreachable("shuffle vector mask references neither vector operand?"); 3037 3038 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. 3039 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> 3040 // VSHF concatenates the vectors in a bitwise fashion: 3041 // <0b00, 0b01> + <0b10, 0b11> -> 3042 // 0b0100 + 0b1110 -> 0b01001110 3043 // <0b10, 0b11, 0b00, 0b01> 3044 // We must therefore swap the operands to get the correct result. 3045 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0); 3046 } 3047 3048 // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the 3049 // indices in the shuffle. 3050 SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 3051 SelectionDAG &DAG) const { 3052 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op); 3053 EVT ResTy = Op->getValueType(0); 3054 3055 if (!ResTy.is128BitVector()) 3056 return SDValue(); 3057 3058 int ResTyNumElts = ResTy.getVectorNumElements(); 3059 SmallVector<int, 16> Indices; 3060 3061 for (int i = 0; i < ResTyNumElts; ++i) 3062 Indices.push_back(Node->getMaskElt(i)); 3063 3064 // splati.[bhwd] is preferable to the others but is matched from 3065 // MipsISD::VSHF. 3066 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG)) 3067 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 3068 SDValue Result; 3069 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG))) 3070 return Result; 3071 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG))) 3072 return Result; 3073 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG))) 3074 return Result; 3075 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG))) 3076 return Result; 3077 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG))) 3078 return Result; 3079 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG))) 3080 return Result; 3081 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG))) 3082 return Result; 3083 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 3084 } 3085 3086 MachineBasicBlock * 3087 MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI, 3088 MachineBasicBlock *BB) const { 3089 // $bb: 3090 // bposge32_pseudo $vr0 3091 // => 3092 // $bb: 3093 // bposge32 $tbb 3094 // $fbb: 3095 // li $vr2, 0 3096 // b $sink 3097 // $tbb: 3098 // li $vr1, 1 3099 // $sink: 3100 // $vr0 = phi($vr2, $fbb, $vr1, $tbb) 3101 3102 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3103 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3104 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3105 DebugLoc DL = MI.getDebugLoc(); 3106 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3107 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3108 MachineFunction *F = BB->getParent(); 3109 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3110 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3111 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3112 F->insert(It, FBB); 3113 F->insert(It, TBB); 3114 F->insert(It, Sink); 3115 3116 // Transfer the remainder of BB and its successor edges to Sink. 3117 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3118 BB->end()); 3119 Sink->transferSuccessorsAndUpdatePHIs(BB); 3120 3121 // Add successors. 3122 BB->addSuccessor(FBB); 3123 BB->addSuccessor(TBB); 3124 FBB->addSuccessor(Sink); 3125 TBB->addSuccessor(Sink); 3126 3127 // Insert the real bposge32 instruction to $BB. 3128 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); 3129 // Insert the real bposge32c instruction to $BB. 3130 BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB); 3131 3132 // Fill $FBB. 3133 unsigned VR2 = RegInfo.createVirtualRegister(RC); 3134 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) 3135 .addReg(Mips::ZERO).addImm(0); 3136 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3137 3138 // Fill $TBB. 3139 unsigned VR1 = RegInfo.createVirtualRegister(RC); 3140 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) 3141 .addReg(Mips::ZERO).addImm(1); 3142 3143 // Insert phi function to $Sink. 3144 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3145 MI.getOperand(0).getReg()) 3146 .addReg(VR2) 3147 .addMBB(FBB) 3148 .addReg(VR1) 3149 .addMBB(TBB); 3150 3151 MI.eraseFromParent(); // The pseudo instruction is gone now. 3152 return Sink; 3153 } 3154 3155 MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo( 3156 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { 3157 // $bb: 3158 // vany_nonzero $rd, $ws 3159 // => 3160 // $bb: 3161 // bnz.b $ws, $tbb 3162 // b $fbb 3163 // $fbb: 3164 // li $rd1, 0 3165 // b $sink 3166 // $tbb: 3167 // li $rd2, 1 3168 // $sink: 3169 // $rd = phi($rd1, $fbb, $rd2, $tbb) 3170 3171 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3172 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3173 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3174 DebugLoc DL = MI.getDebugLoc(); 3175 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3176 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3177 MachineFunction *F = BB->getParent(); 3178 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3179 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3180 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3181 F->insert(It, FBB); 3182 F->insert(It, TBB); 3183 F->insert(It, Sink); 3184 3185 // Transfer the remainder of BB and its successor edges to Sink. 3186 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3187 BB->end()); 3188 Sink->transferSuccessorsAndUpdatePHIs(BB); 3189 3190 // Add successors. 3191 BB->addSuccessor(FBB); 3192 BB->addSuccessor(TBB); 3193 FBB->addSuccessor(Sink); 3194 TBB->addSuccessor(Sink); 3195 3196 // Insert the real bnz.b instruction to $BB. 3197 BuildMI(BB, DL, TII->get(BranchOp)) 3198 .addReg(MI.getOperand(1).getReg()) 3199 .addMBB(TBB); 3200 3201 // Fill $FBB. 3202 unsigned RD1 = RegInfo.createVirtualRegister(RC); 3203 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) 3204 .addReg(Mips::ZERO).addImm(0); 3205 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3206 3207 // Fill $TBB. 3208 unsigned RD2 = RegInfo.createVirtualRegister(RC); 3209 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) 3210 .addReg(Mips::ZERO).addImm(1); 3211 3212 // Insert phi function to $Sink. 3213 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3214 MI.getOperand(0).getReg()) 3215 .addReg(RD1) 3216 .addMBB(FBB) 3217 .addReg(RD2) 3218 .addMBB(TBB); 3219 3220 MI.eraseFromParent(); // The pseudo instruction is gone now. 3221 return Sink; 3222 } 3223 3224 // Emit the COPY_FW pseudo instruction. 3225 // 3226 // copy_fw_pseudo $fd, $ws, n 3227 // => 3228 // copy_u_w $rt, $ws, $n 3229 // mtc1 $rt, $fd 3230 // 3231 // When n is zero, the equivalent operation can be performed with (potentially) 3232 // zero instructions due to register overlaps. This optimization is never valid 3233 // for lane 1 because it would require FR=0 mode which isn't supported by MSA. 3234 MachineBasicBlock * 3235 MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI, 3236 MachineBasicBlock *BB) const { 3237 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3238 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3239 DebugLoc DL = MI.getDebugLoc(); 3240 unsigned Fd = MI.getOperand(0).getReg(); 3241 unsigned Ws = MI.getOperand(1).getReg(); 3242 unsigned Lane = MI.getOperand(2).getImm(); 3243 3244 if (Lane == 0) { 3245 unsigned Wt = Ws; 3246 if (!Subtarget.useOddSPReg()) { 3247 // We must copy to an even-numbered MSA register so that the 3248 // single-precision sub-register is also guaranteed to be even-numbered. 3249 Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass); 3250 3251 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws); 3252 } 3253 3254 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3255 } else { 3256 unsigned Wt = RegInfo.createVirtualRegister( 3257 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : 3258 &Mips::MSA128WEvensRegClass); 3259 3260 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); 3261 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3262 } 3263 3264 MI.eraseFromParent(); // The pseudo instruction is gone now. 3265 return BB; 3266 } 3267 3268 // Emit the COPY_FD pseudo instruction. 3269 // 3270 // copy_fd_pseudo $fd, $ws, n 3271 // => 3272 // splati.d $wt, $ws, $n 3273 // copy $fd, $wt:sub_64 3274 // 3275 // When n is zero, the equivalent operation can be performed with (potentially) 3276 // zero instructions due to register overlaps. This optimization is always 3277 // valid because FR=1 mode which is the only supported mode in MSA. 3278 MachineBasicBlock * 3279 MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI, 3280 MachineBasicBlock *BB) const { 3281 assert(Subtarget.isFP64bit()); 3282 3283 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3284 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3285 unsigned Fd = MI.getOperand(0).getReg(); 3286 unsigned Ws = MI.getOperand(1).getReg(); 3287 unsigned Lane = MI.getOperand(2).getImm() * 2; 3288 DebugLoc DL = MI.getDebugLoc(); 3289 3290 if (Lane == 0) 3291 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64); 3292 else { 3293 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3294 3295 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1); 3296 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64); 3297 } 3298 3299 MI.eraseFromParent(); // The pseudo instruction is gone now. 3300 return BB; 3301 } 3302 3303 // Emit the INSERT_FW pseudo instruction. 3304 // 3305 // insert_fw_pseudo $wd, $wd_in, $n, $fs 3306 // => 3307 // subreg_to_reg $wt:sub_lo, $fs 3308 // insve_w $wd[$n], $wd_in, $wt[0] 3309 MachineBasicBlock * 3310 MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI, 3311 MachineBasicBlock *BB) const { 3312 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3313 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3314 DebugLoc DL = MI.getDebugLoc(); 3315 unsigned Wd = MI.getOperand(0).getReg(); 3316 unsigned Wd_in = MI.getOperand(1).getReg(); 3317 unsigned Lane = MI.getOperand(2).getImm(); 3318 unsigned Fs = MI.getOperand(3).getReg(); 3319 unsigned Wt = RegInfo.createVirtualRegister( 3320 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : 3321 &Mips::MSA128WEvensRegClass); 3322 3323 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3324 .addImm(0) 3325 .addReg(Fs) 3326 .addImm(Mips::sub_lo); 3327 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd) 3328 .addReg(Wd_in) 3329 .addImm(Lane) 3330 .addReg(Wt) 3331 .addImm(0); 3332 3333 MI.eraseFromParent(); // The pseudo instruction is gone now. 3334 return BB; 3335 } 3336 3337 // Emit the INSERT_FD pseudo instruction. 3338 // 3339 // insert_fd_pseudo $wd, $fs, n 3340 // => 3341 // subreg_to_reg $wt:sub_64, $fs 3342 // insve_d $wd[$n], $wd_in, $wt[0] 3343 MachineBasicBlock * 3344 MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI, 3345 MachineBasicBlock *BB) const { 3346 assert(Subtarget.isFP64bit()); 3347 3348 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3349 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3350 DebugLoc DL = MI.getDebugLoc(); 3351 unsigned Wd = MI.getOperand(0).getReg(); 3352 unsigned Wd_in = MI.getOperand(1).getReg(); 3353 unsigned Lane = MI.getOperand(2).getImm(); 3354 unsigned Fs = MI.getOperand(3).getReg(); 3355 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3356 3357 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3358 .addImm(0) 3359 .addReg(Fs) 3360 .addImm(Mips::sub_64); 3361 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd) 3362 .addReg(Wd_in) 3363 .addImm(Lane) 3364 .addReg(Wt) 3365 .addImm(0); 3366 3367 MI.eraseFromParent(); // The pseudo instruction is gone now. 3368 return BB; 3369 } 3370 3371 // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. 3372 // 3373 // For integer: 3374 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) 3375 // => 3376 // (SLL $lanetmp1, $lane, <log2size) 3377 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3378 // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs) 3379 // (NEG $lanetmp2, $lanetmp1) 3380 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3381 // 3382 // For floating point: 3383 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs) 3384 // => 3385 // (SUBREG_TO_REG $wt, $fs, <subreg>) 3386 // (SLL $lanetmp1, $lane, <log2size) 3387 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3388 // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0) 3389 // (NEG $lanetmp2, $lanetmp1) 3390 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3391 MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( 3392 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes, 3393 bool IsFP) const { 3394 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3395 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3396 DebugLoc DL = MI.getDebugLoc(); 3397 unsigned Wd = MI.getOperand(0).getReg(); 3398 unsigned SrcVecReg = MI.getOperand(1).getReg(); 3399 unsigned LaneReg = MI.getOperand(2).getReg(); 3400 unsigned SrcValReg = MI.getOperand(3).getReg(); 3401 3402 const TargetRegisterClass *VecRC = nullptr; 3403 // FIXME: This should be true for N32 too. 3404 const TargetRegisterClass *GPRRC = 3405 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3406 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0; 3407 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL; 3408 unsigned EltLog2Size; 3409 unsigned InsertOp = 0; 3410 unsigned InsveOp = 0; 3411 switch (EltSizeInBytes) { 3412 default: 3413 llvm_unreachable("Unexpected size"); 3414 case 1: 3415 EltLog2Size = 0; 3416 InsertOp = Mips::INSERT_B; 3417 InsveOp = Mips::INSVE_B; 3418 VecRC = &Mips::MSA128BRegClass; 3419 break; 3420 case 2: 3421 EltLog2Size = 1; 3422 InsertOp = Mips::INSERT_H; 3423 InsveOp = Mips::INSVE_H; 3424 VecRC = &Mips::MSA128HRegClass; 3425 break; 3426 case 4: 3427 EltLog2Size = 2; 3428 InsertOp = Mips::INSERT_W; 3429 InsveOp = Mips::INSVE_W; 3430 VecRC = &Mips::MSA128WRegClass; 3431 break; 3432 case 8: 3433 EltLog2Size = 3; 3434 InsertOp = Mips::INSERT_D; 3435 InsveOp = Mips::INSVE_D; 3436 VecRC = &Mips::MSA128DRegClass; 3437 break; 3438 } 3439 3440 if (IsFP) { 3441 unsigned Wt = RegInfo.createVirtualRegister(VecRC); 3442 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3443 .addImm(0) 3444 .addReg(SrcValReg) 3445 .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo); 3446 SrcValReg = Wt; 3447 } 3448 3449 // Convert the lane index into a byte index 3450 if (EltSizeInBytes != 1) { 3451 unsigned LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); 3452 BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1) 3453 .addReg(LaneReg) 3454 .addImm(EltLog2Size); 3455 LaneReg = LaneTmp1; 3456 } 3457 3458 // Rotate bytes around so that the desired lane is element zero 3459 unsigned WdTmp1 = RegInfo.createVirtualRegister(VecRC); 3460 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1) 3461 .addReg(SrcVecReg) 3462 .addReg(SrcVecReg) 3463 .addReg(LaneReg, 0, SubRegIdx); 3464 3465 unsigned WdTmp2 = RegInfo.createVirtualRegister(VecRC); 3466 if (IsFP) { 3467 // Use insve.df to insert to element zero 3468 BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2) 3469 .addReg(WdTmp1) 3470 .addImm(0) 3471 .addReg(SrcValReg) 3472 .addImm(0); 3473 } else { 3474 // Use insert.df to insert to element zero 3475 BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2) 3476 .addReg(WdTmp1) 3477 .addReg(SrcValReg) 3478 .addImm(0); 3479 } 3480 3481 // Rotate elements the rest of the way for a full rotation. 3482 // sld.df inteprets $rt modulo the number of columns so we only need to negate 3483 // the lane index to do this. 3484 unsigned LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); 3485 BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB), 3486 LaneTmp2) 3487 .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO) 3488 .addReg(LaneReg); 3489 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd) 3490 .addReg(WdTmp2) 3491 .addReg(WdTmp2) 3492 .addReg(LaneTmp2, 0, SubRegIdx); 3493 3494 MI.eraseFromParent(); // The pseudo instruction is gone now. 3495 return BB; 3496 } 3497 3498 // Emit the FILL_FW pseudo instruction. 3499 // 3500 // fill_fw_pseudo $wd, $fs 3501 // => 3502 // implicit_def $wt1 3503 // insert_subreg $wt2:subreg_lo, $wt1, $fs 3504 // splati.w $wd, $wt2[0] 3505 MachineBasicBlock * 3506 MipsSETargetLowering::emitFILL_FW(MachineInstr &MI, 3507 MachineBasicBlock *BB) const { 3508 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3509 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3510 DebugLoc DL = MI.getDebugLoc(); 3511 unsigned Wd = MI.getOperand(0).getReg(); 3512 unsigned Fs = MI.getOperand(1).getReg(); 3513 unsigned Wt1 = RegInfo.createVirtualRegister( 3514 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3515 : &Mips::MSA128WEvensRegClass); 3516 unsigned Wt2 = RegInfo.createVirtualRegister( 3517 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3518 : &Mips::MSA128WEvensRegClass); 3519 3520 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3521 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3522 .addReg(Wt1) 3523 .addReg(Fs) 3524 .addImm(Mips::sub_lo); 3525 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0); 3526 3527 MI.eraseFromParent(); // The pseudo instruction is gone now. 3528 return BB; 3529 } 3530 3531 // Emit the FILL_FD pseudo instruction. 3532 // 3533 // fill_fd_pseudo $wd, $fs 3534 // => 3535 // implicit_def $wt1 3536 // insert_subreg $wt2:subreg_64, $wt1, $fs 3537 // splati.d $wd, $wt2[0] 3538 MachineBasicBlock * 3539 MipsSETargetLowering::emitFILL_FD(MachineInstr &MI, 3540 MachineBasicBlock *BB) const { 3541 assert(Subtarget.isFP64bit()); 3542 3543 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3544 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3545 DebugLoc DL = MI.getDebugLoc(); 3546 unsigned Wd = MI.getOperand(0).getReg(); 3547 unsigned Fs = MI.getOperand(1).getReg(); 3548 unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3549 unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3550 3551 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3552 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3553 .addReg(Wt1) 3554 .addReg(Fs) 3555 .addImm(Mips::sub_64); 3556 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0); 3557 3558 MI.eraseFromParent(); // The pseudo instruction is gone now. 3559 return BB; 3560 } 3561 3562 // Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA 3563 // register. 3564 // 3565 // STF16 MSA128F16:$wd, mem_simm10:$addr 3566 // => 3567 // copy_u.h $rtemp,$wd[0] 3568 // sh $rtemp, $addr 3569 // 3570 // Safety: We can't use st.h & co as they would over write the memory after 3571 // the destination. It would require half floats be allocated 16 bytes(!) of 3572 // space. 3573 MachineBasicBlock * 3574 MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, 3575 MachineBasicBlock *BB) const { 3576 3577 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3578 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3579 DebugLoc DL = MI.getDebugLoc(); 3580 unsigned Ws = MI.getOperand(0).getReg(); 3581 unsigned Rt = MI.getOperand(1).getReg(); 3582 const MachineMemOperand &MMO = **MI.memoperands_begin(); 3583 unsigned Imm = MMO.getOffset(); 3584 3585 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3586 // spill and reload can expand as a GPR64 operand. Examine the 3587 // operand in detail and default to ABI. 3588 const TargetRegisterClass *RC = 3589 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3590 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3591 : &Mips::GPR64RegClass); 3592 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3593 unsigned Rs = RegInfo.createVirtualRegister(RC); 3594 3595 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0); 3596 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64)) 3597 .addReg(Rs) 3598 .addReg(Rt) 3599 .addImm(Imm) 3600 .addMemOperand(BB->getParent()->getMachineMemOperand( 3601 &MMO, MMO.getOffset(), MMO.getSize())); 3602 3603 MI.eraseFromParent(); 3604 return BB; 3605 } 3606 3607 // Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register. 3608 // 3609 // LD_F16 MSA128F16:$wd, mem_simm10:$addr 3610 // => 3611 // lh $rtemp, $addr 3612 // fill.h $wd, $rtemp 3613 // 3614 // Safety: We can't use ld.h & co as they over-read from the source. 3615 // Additionally, if the address is not modulo 16, 2 cases can occur: 3616 // a) Segmentation fault as the load instruction reads from a memory page 3617 // memory it's not supposed to. 3618 // b) The load crosses an implementation specific boundary, requiring OS 3619 // intervention. 3620 // 3621 MachineBasicBlock * 3622 MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, 3623 MachineBasicBlock *BB) const { 3624 3625 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3626 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3627 DebugLoc DL = MI.getDebugLoc(); 3628 unsigned Wd = MI.getOperand(0).getReg(); 3629 3630 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3631 // spill and reload can expand as a GPR64 operand. Examine the 3632 // operand in detail and default to ABI. 3633 const TargetRegisterClass *RC = 3634 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3635 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3636 : &Mips::GPR64RegClass); 3637 3638 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3639 unsigned Rt = RegInfo.createVirtualRegister(RC); 3640 3641 MachineInstrBuilder MIB = 3642 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt); 3643 for (unsigned i = 1; i < MI.getNumOperands(); i++) 3644 MIB.add(MI.getOperand(i)); 3645 3646 BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt); 3647 3648 MI.eraseFromParent(); 3649 return BB; 3650 } 3651 3652 // Emit the FPROUND_PSEUDO instruction. 3653 // 3654 // Round an FGR64Opnd, FGR32Opnd to an f16. 3655 // 3656 // Safety: Cycle the operand through the GPRs so the result always ends up 3657 // the correct MSA register. 3658 // 3659 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs 3660 // / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register 3661 // (which they can be, as the MSA registers are defined to alias the 3662 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3663 // the correct register class. That requires operands be tie-able across 3664 // register classes which have a sub/super register class relationship. 3665 // 3666 // For FPG32Opnd: 3667 // 3668 // FPROUND MSA128F16:$wd, FGR32Opnd:$fs 3669 // => 3670 // mfc1 $rtemp, $fs 3671 // fill.w $rtemp, $wtemp 3672 // fexdo.w $wd, $wtemp, $wtemp 3673 // 3674 // For FPG64Opnd on mips32r2+: 3675 // 3676 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3677 // => 3678 // mfc1 $rtemp, $fs 3679 // fill.w $rtemp, $wtemp 3680 // mfhc1 $rtemp2, $fs 3681 // insert.w $wtemp[1], $rtemp2 3682 // insert.w $wtemp[3], $rtemp2 3683 // fexdo.w $wtemp2, $wtemp, $wtemp 3684 // fexdo.h $wd, $temp2, $temp2 3685 // 3686 // For FGR64Opnd on mips64r2+: 3687 // 3688 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3689 // => 3690 // dmfc1 $rtemp, $fs 3691 // fill.d $rtemp, $wtemp 3692 // fexdo.w $wtemp2, $wtemp, $wtemp 3693 // fexdo.h $wd, $wtemp2, $wtemp2 3694 // 3695 // Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the 3696 // undef bits are "just right" and the exception enable bits are 3697 // set. By using fill.w to replicate $fs into all elements over 3698 // insert.w for one element, we avoid that potiential case. If 3699 // fexdo.[hw] causes an exception in, the exception is valid and it 3700 // occurs for all elements. 3701 // 3702 MachineBasicBlock * 3703 MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, 3704 MachineBasicBlock *BB, 3705 bool IsFGR64) const { 3706 3707 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3708 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3709 // it. 3710 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3711 3712 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3713 3714 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3715 DebugLoc DL = MI.getDebugLoc(); 3716 unsigned Wd = MI.getOperand(0).getReg(); 3717 unsigned Fs = MI.getOperand(1).getReg(); 3718 3719 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3720 unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3721 const TargetRegisterClass *GPRRC = 3722 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3723 unsigned MFC1Opc = IsFGR64onMips64 ? Mips::DMFC1 : Mips::MFC1; 3724 unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; 3725 3726 // Perform the register class copy as mentioned above. 3727 unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC); 3728 BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs); 3729 BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp); 3730 unsigned WPHI = Wtemp; 3731 3732 if (!Subtarget.hasMips64() && IsFGR64) { 3733 unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3734 BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs); 3735 unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3736 unsigned Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3737 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2) 3738 .addReg(Wtemp) 3739 .addReg(Rtemp2) 3740 .addImm(1); 3741 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3) 3742 .addReg(Wtemp2) 3743 .addReg(Rtemp2) 3744 .addImm(3); 3745 WPHI = Wtemp3; 3746 } 3747 3748 if (IsFGR64) { 3749 unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3750 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2) 3751 .addReg(WPHI) 3752 .addReg(WPHI); 3753 WPHI = Wtemp2; 3754 } 3755 3756 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI); 3757 3758 MI.eraseFromParent(); 3759 return BB; 3760 } 3761 3762 // Emit the FPEXTEND_PSEUDO instruction. 3763 // 3764 // Expand an f16 to either a FGR32Opnd or FGR64Opnd. 3765 // 3766 // Safety: Cycle the result through the GPRs so the result always ends up 3767 // the correct floating point register. 3768 // 3769 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd 3770 // / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register 3771 // (which they can be, as the MSA registers are defined to alias the 3772 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3773 // the correct register class. That requires operands be tie-able across 3774 // register classes which have a sub/super register class relationship. I 3775 // haven't checked. 3776 // 3777 // For FGR32Opnd: 3778 // 3779 // FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws 3780 // => 3781 // fexupr.w $wtemp, $ws 3782 // copy_s.w $rtemp, $ws[0] 3783 // mtc1 $rtemp, $fd 3784 // 3785 // For FGR64Opnd on Mips64: 3786 // 3787 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3788 // => 3789 // fexupr.w $wtemp, $ws 3790 // fexupr.d $wtemp2, $wtemp 3791 // copy_s.d $rtemp, $wtemp2s[0] 3792 // dmtc1 $rtemp, $fd 3793 // 3794 // For FGR64Opnd on Mips32: 3795 // 3796 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3797 // => 3798 // fexupr.w $wtemp, $ws 3799 // fexupr.d $wtemp2, $wtemp 3800 // copy_s.w $rtemp, $wtemp2[0] 3801 // mtc1 $rtemp, $ftemp 3802 // copy_s.w $rtemp2, $wtemp2[1] 3803 // $fd = mthc1 $rtemp2, $ftemp 3804 // 3805 MachineBasicBlock * 3806 MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI, 3807 MachineBasicBlock *BB, 3808 bool IsFGR64) const { 3809 3810 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3811 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3812 // it. 3813 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3814 3815 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3816 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; 3817 3818 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3819 DebugLoc DL = MI.getDebugLoc(); 3820 unsigned Fd = MI.getOperand(0).getReg(); 3821 unsigned Ws = MI.getOperand(1).getReg(); 3822 3823 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3824 const TargetRegisterClass *GPRRC = 3825 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3826 unsigned MTC1Opc = IsFGR64onMips64 ? Mips::DMTC1 : Mips::MTC1; 3827 unsigned COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W; 3828 3829 unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3830 unsigned WPHI = Wtemp; 3831 3832 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws); 3833 if (IsFGR64) { 3834 WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3835 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp); 3836 } 3837 3838 // Perform the safety regclass copy mentioned above. 3839 unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC); 3840 unsigned FPRPHI = IsFGR64onMips32 3841 ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass) 3842 : Fd; 3843 BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0); 3844 BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp); 3845 3846 if (IsFGR64onMips32) { 3847 unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3848 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2) 3849 .addReg(WPHI) 3850 .addImm(1); 3851 BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd) 3852 .addReg(FPRPHI) 3853 .addReg(Rtemp2); 3854 } 3855 3856 MI.eraseFromParent(); 3857 return BB; 3858 } 3859 3860 // Emit the FEXP2_W_1 pseudo instructions. 3861 // 3862 // fexp2_w_1_pseudo $wd, $wt 3863 // => 3864 // ldi.w $ws, 1 3865 // fexp2.w $wd, $ws, $wt 3866 MachineBasicBlock * 3867 MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI, 3868 MachineBasicBlock *BB) const { 3869 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3870 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3871 const TargetRegisterClass *RC = &Mips::MSA128WRegClass; 3872 unsigned Ws1 = RegInfo.createVirtualRegister(RC); 3873 unsigned Ws2 = RegInfo.createVirtualRegister(RC); 3874 DebugLoc DL = MI.getDebugLoc(); 3875 3876 // Splat 1.0 into a vector 3877 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1); 3878 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1); 3879 3880 // Emit 1.0 * fexp2(Wt) 3881 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI.getOperand(0).getReg()) 3882 .addReg(Ws2) 3883 .addReg(MI.getOperand(1).getReg()); 3884 3885 MI.eraseFromParent(); // The pseudo instruction is gone now. 3886 return BB; 3887 } 3888 3889 // Emit the FEXP2_D_1 pseudo instructions. 3890 // 3891 // fexp2_d_1_pseudo $wd, $wt 3892 // => 3893 // ldi.d $ws, 1 3894 // fexp2.d $wd, $ws, $wt 3895 MachineBasicBlock * 3896 MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI, 3897 MachineBasicBlock *BB) const { 3898 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3899 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3900 const TargetRegisterClass *RC = &Mips::MSA128DRegClass; 3901 unsigned Ws1 = RegInfo.createVirtualRegister(RC); 3902 unsigned Ws2 = RegInfo.createVirtualRegister(RC); 3903 DebugLoc DL = MI.getDebugLoc(); 3904 3905 // Splat 1.0 into a vector 3906 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1); 3907 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1); 3908 3909 // Emit 1.0 * fexp2(Wt) 3910 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI.getOperand(0).getReg()) 3911 .addReg(Ws2) 3912 .addReg(MI.getOperand(1).getReg()); 3913 3914 MI.eraseFromParent(); // The pseudo instruction is gone now. 3915 return BB; 3916 } 3917