1 //===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Subclass of MipsTargetLowering specialized for mips32/64. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "MipsSEISelLowering.h" 14 #include "MipsMachineFunction.h" 15 #include "MipsRegisterInfo.h" 16 #include "MipsTargetMachine.h" 17 #include "llvm/ADT/APInt.h" 18 #include "llvm/CodeGen/MachineInstrBuilder.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/IR/Intrinsics.h" 21 #include "llvm/Support/CommandLine.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/ErrorHandling.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include "llvm/Target/TargetInstrInfo.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "mips-isel" 30 31 static cl::opt<bool> 32 UseMipsTailCalls("mips-tail-calls", cl::Hidden, 33 cl::desc("MIPS: permit tail calls."), cl::init(false)); 34 35 static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), 36 cl::desc("Expand double precision loads and " 37 "stores to their single precision " 38 "counterparts")); 39 40 MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, 41 const MipsSubtarget &STI) 42 : MipsTargetLowering(TM, STI) { 43 // Set up the register classes 44 addRegisterClass(MVT::i32, &Mips::GPR32RegClass); 45 46 if (Subtarget.isGP64bit()) 47 addRegisterClass(MVT::i64, &Mips::GPR64RegClass); 48 49 if (Subtarget.hasDSP() || Subtarget.hasMSA()) { 50 // Expand all truncating stores and extending loads. 51 for (MVT VT0 : MVT::vector_valuetypes()) { 52 for (MVT VT1 : MVT::vector_valuetypes()) { 53 setTruncStoreAction(VT0, VT1, Expand); 54 setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); 55 setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); 56 setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); 57 } 58 } 59 } 60 61 if (Subtarget.hasDSP()) { 62 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; 63 64 for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { 65 addRegisterClass(VecTys[i], &Mips::DSPRRegClass); 66 67 // Expand all builtin opcodes. 68 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 69 setOperationAction(Opc, VecTys[i], Expand); 70 71 setOperationAction(ISD::ADD, VecTys[i], Legal); 72 setOperationAction(ISD::SUB, VecTys[i], Legal); 73 setOperationAction(ISD::LOAD, VecTys[i], Legal); 74 setOperationAction(ISD::STORE, VecTys[i], Legal); 75 setOperationAction(ISD::BITCAST, VecTys[i], Legal); 76 } 77 78 setTargetDAGCombine(ISD::SHL); 79 setTargetDAGCombine(ISD::SRA); 80 setTargetDAGCombine(ISD::SRL); 81 setTargetDAGCombine(ISD::SETCC); 82 setTargetDAGCombine(ISD::VSELECT); 83 } 84 85 if (Subtarget.hasDSPR2()) 86 setOperationAction(ISD::MUL, MVT::v2i16, Legal); 87 88 if (Subtarget.hasMSA()) { 89 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); 90 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); 91 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); 92 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass); 93 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass); 94 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); 95 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); 96 97 // f16 is a storage-only type, always promote it to f32. 98 addRegisterClass(MVT::f16, &Mips::MSA128HRegClass); 99 setOperationAction(ISD::SETCC, MVT::f16, Promote); 100 setOperationAction(ISD::BR_CC, MVT::f16, Promote); 101 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); 102 setOperationAction(ISD::SELECT, MVT::f16, Promote); 103 setOperationAction(ISD::FADD, MVT::f16, Promote); 104 setOperationAction(ISD::FSUB, MVT::f16, Promote); 105 setOperationAction(ISD::FMUL, MVT::f16, Promote); 106 setOperationAction(ISD::FDIV, MVT::f16, Promote); 107 setOperationAction(ISD::FREM, MVT::f16, Promote); 108 setOperationAction(ISD::FMA, MVT::f16, Promote); 109 setOperationAction(ISD::FNEG, MVT::f16, Promote); 110 setOperationAction(ISD::FABS, MVT::f16, Promote); 111 setOperationAction(ISD::FCEIL, MVT::f16, Promote); 112 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); 113 setOperationAction(ISD::FCOS, MVT::f16, Promote); 114 setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); 115 setOperationAction(ISD::FFLOOR, MVT::f16, Promote); 116 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); 117 setOperationAction(ISD::FPOW, MVT::f16, Promote); 118 setOperationAction(ISD::FPOWI, MVT::f16, Promote); 119 setOperationAction(ISD::FRINT, MVT::f16, Promote); 120 setOperationAction(ISD::FSIN, MVT::f16, Promote); 121 setOperationAction(ISD::FSINCOS, MVT::f16, Promote); 122 setOperationAction(ISD::FSQRT, MVT::f16, Promote); 123 setOperationAction(ISD::FEXP, MVT::f16, Promote); 124 setOperationAction(ISD::FEXP2, MVT::f16, Promote); 125 setOperationAction(ISD::FLOG, MVT::f16, Promote); 126 setOperationAction(ISD::FLOG2, MVT::f16, Promote); 127 setOperationAction(ISD::FLOG10, MVT::f16, Promote); 128 setOperationAction(ISD::FROUND, MVT::f16, Promote); 129 setOperationAction(ISD::FTRUNC, MVT::f16, Promote); 130 setOperationAction(ISD::FMINNUM, MVT::f16, Promote); 131 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); 132 setOperationAction(ISD::FMINNAN, MVT::f16, Promote); 133 setOperationAction(ISD::FMAXNAN, MVT::f16, Promote); 134 135 setTargetDAGCombine(ISD::AND); 136 setTargetDAGCombine(ISD::OR); 137 setTargetDAGCombine(ISD::SRA); 138 setTargetDAGCombine(ISD::VSELECT); 139 setTargetDAGCombine(ISD::XOR); 140 } 141 142 if (!Subtarget.useSoftFloat()) { 143 addRegisterClass(MVT::f32, &Mips::FGR32RegClass); 144 145 // When dealing with single precision only, use libcalls 146 if (!Subtarget.isSingleFloat()) { 147 if (Subtarget.isFP64bit()) 148 addRegisterClass(MVT::f64, &Mips::FGR64RegClass); 149 else 150 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); 151 } 152 } 153 154 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); 155 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); 156 setOperationAction(ISD::MULHS, MVT::i32, Custom); 157 setOperationAction(ISD::MULHU, MVT::i32, Custom); 158 159 if (Subtarget.hasCnMips()) 160 setOperationAction(ISD::MUL, MVT::i64, Legal); 161 else if (Subtarget.isGP64bit()) 162 setOperationAction(ISD::MUL, MVT::i64, Custom); 163 164 if (Subtarget.isGP64bit()) { 165 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); 166 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); 167 setOperationAction(ISD::MULHS, MVT::i64, Custom); 168 setOperationAction(ISD::MULHU, MVT::i64, Custom); 169 setOperationAction(ISD::SDIVREM, MVT::i64, Custom); 170 setOperationAction(ISD::UDIVREM, MVT::i64, Custom); 171 } 172 173 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 174 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 175 176 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 177 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 178 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 179 setOperationAction(ISD::LOAD, MVT::i32, Custom); 180 setOperationAction(ISD::STORE, MVT::i32, Custom); 181 182 setTargetDAGCombine(ISD::ADDE); 183 setTargetDAGCombine(ISD::SUBE); 184 setTargetDAGCombine(ISD::MUL); 185 186 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 187 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 188 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 189 190 if (NoDPLoadStore) { 191 setOperationAction(ISD::LOAD, MVT::f64, Custom); 192 setOperationAction(ISD::STORE, MVT::f64, Custom); 193 } 194 195 if (Subtarget.hasMips32r6()) { 196 // MIPS32r6 replaces the accumulator-based multiplies with a three register 197 // instruction 198 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 199 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 200 setOperationAction(ISD::MUL, MVT::i32, Legal); 201 setOperationAction(ISD::MULHS, MVT::i32, Legal); 202 setOperationAction(ISD::MULHU, MVT::i32, Legal); 203 204 // MIPS32r6 replaces the accumulator-based division/remainder with separate 205 // three register division and remainder instructions. 206 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 207 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 208 setOperationAction(ISD::SDIV, MVT::i32, Legal); 209 setOperationAction(ISD::UDIV, MVT::i32, Legal); 210 setOperationAction(ISD::SREM, MVT::i32, Legal); 211 setOperationAction(ISD::UREM, MVT::i32, Legal); 212 213 // MIPS32r6 replaces conditional moves with an equivalent that removes the 214 // need for three GPR read ports. 215 setOperationAction(ISD::SETCC, MVT::i32, Legal); 216 setOperationAction(ISD::SELECT, MVT::i32, Legal); 217 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 218 219 setOperationAction(ISD::SETCC, MVT::f32, Legal); 220 setOperationAction(ISD::SELECT, MVT::f32, Legal); 221 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 222 223 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6"); 224 setOperationAction(ISD::SETCC, MVT::f64, Legal); 225 setOperationAction(ISD::SELECT, MVT::f64, Legal); 226 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 227 228 setOperationAction(ISD::BRCOND, MVT::Other, Legal); 229 230 // Floating point > and >= are supported via < and <= 231 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 232 setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); 233 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); 234 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 235 236 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 237 setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); 238 setCondCodeAction(ISD::SETUGE, MVT::f64, Expand); 239 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 240 } 241 242 if (Subtarget.hasMips64r6()) { 243 // MIPS64r6 replaces the accumulator-based multiplies with a three register 244 // instruction 245 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 246 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 247 setOperationAction(ISD::MUL, MVT::i64, Legal); 248 setOperationAction(ISD::MULHS, MVT::i64, Legal); 249 setOperationAction(ISD::MULHU, MVT::i64, Legal); 250 251 // MIPS32r6 replaces the accumulator-based division/remainder with separate 252 // three register division and remainder instructions. 253 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 254 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 255 setOperationAction(ISD::SDIV, MVT::i64, Legal); 256 setOperationAction(ISD::UDIV, MVT::i64, Legal); 257 setOperationAction(ISD::SREM, MVT::i64, Legal); 258 setOperationAction(ISD::UREM, MVT::i64, Legal); 259 260 // MIPS64r6 replaces conditional moves with an equivalent that removes the 261 // need for three GPR read ports. 262 setOperationAction(ISD::SETCC, MVT::i64, Legal); 263 setOperationAction(ISD::SELECT, MVT::i64, Legal); 264 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 265 } 266 267 computeRegisterProperties(Subtarget.getRegisterInfo()); 268 } 269 270 const MipsTargetLowering * 271 llvm::createMipsSETargetLowering(const MipsTargetMachine &TM, 272 const MipsSubtarget &STI) { 273 return new MipsSETargetLowering(TM, STI); 274 } 275 276 const TargetRegisterClass * 277 MipsSETargetLowering::getRepRegClassFor(MVT VT) const { 278 if (VT == MVT::Untyped) 279 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass; 280 281 return TargetLowering::getRepRegClassFor(VT); 282 } 283 284 // Enable MSA support for the given integer type and Register class. 285 void MipsSETargetLowering:: 286 addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 287 addRegisterClass(Ty, RC); 288 289 // Expand all builtin opcodes. 290 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 291 setOperationAction(Opc, Ty, Expand); 292 293 setOperationAction(ISD::BITCAST, Ty, Legal); 294 setOperationAction(ISD::LOAD, Ty, Legal); 295 setOperationAction(ISD::STORE, Ty, Legal); 296 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); 297 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 298 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 299 300 setOperationAction(ISD::ADD, Ty, Legal); 301 setOperationAction(ISD::AND, Ty, Legal); 302 setOperationAction(ISD::CTLZ, Ty, Legal); 303 setOperationAction(ISD::CTPOP, Ty, Legal); 304 setOperationAction(ISD::MUL, Ty, Legal); 305 setOperationAction(ISD::OR, Ty, Legal); 306 setOperationAction(ISD::SDIV, Ty, Legal); 307 setOperationAction(ISD::SREM, Ty, Legal); 308 setOperationAction(ISD::SHL, Ty, Legal); 309 setOperationAction(ISD::SRA, Ty, Legal); 310 setOperationAction(ISD::SRL, Ty, Legal); 311 setOperationAction(ISD::SUB, Ty, Legal); 312 setOperationAction(ISD::UDIV, Ty, Legal); 313 setOperationAction(ISD::UREM, Ty, Legal); 314 setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); 315 setOperationAction(ISD::VSELECT, Ty, Legal); 316 setOperationAction(ISD::XOR, Ty, Legal); 317 318 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { 319 setOperationAction(ISD::FP_TO_SINT, Ty, Legal); 320 setOperationAction(ISD::FP_TO_UINT, Ty, Legal); 321 setOperationAction(ISD::SINT_TO_FP, Ty, Legal); 322 setOperationAction(ISD::UINT_TO_FP, Ty, Legal); 323 } 324 325 setOperationAction(ISD::SETCC, Ty, Legal); 326 setCondCodeAction(ISD::SETNE, Ty, Expand); 327 setCondCodeAction(ISD::SETGE, Ty, Expand); 328 setCondCodeAction(ISD::SETGT, Ty, Expand); 329 setCondCodeAction(ISD::SETUGE, Ty, Expand); 330 setCondCodeAction(ISD::SETUGT, Ty, Expand); 331 } 332 333 // Enable MSA support for the given floating-point type and Register class. 334 void MipsSETargetLowering:: 335 addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 336 addRegisterClass(Ty, RC); 337 338 // Expand all builtin opcodes. 339 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 340 setOperationAction(Opc, Ty, Expand); 341 342 setOperationAction(ISD::LOAD, Ty, Legal); 343 setOperationAction(ISD::STORE, Ty, Legal); 344 setOperationAction(ISD::BITCAST, Ty, Legal); 345 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); 346 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 347 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 348 349 if (Ty != MVT::v8f16) { 350 setOperationAction(ISD::FABS, Ty, Legal); 351 setOperationAction(ISD::FADD, Ty, Legal); 352 setOperationAction(ISD::FDIV, Ty, Legal); 353 setOperationAction(ISD::FEXP2, Ty, Legal); 354 setOperationAction(ISD::FLOG2, Ty, Legal); 355 setOperationAction(ISD::FMA, Ty, Legal); 356 setOperationAction(ISD::FMUL, Ty, Legal); 357 setOperationAction(ISD::FRINT, Ty, Legal); 358 setOperationAction(ISD::FSQRT, Ty, Legal); 359 setOperationAction(ISD::FSUB, Ty, Legal); 360 setOperationAction(ISD::VSELECT, Ty, Legal); 361 362 setOperationAction(ISD::SETCC, Ty, Legal); 363 setCondCodeAction(ISD::SETOGE, Ty, Expand); 364 setCondCodeAction(ISD::SETOGT, Ty, Expand); 365 setCondCodeAction(ISD::SETUGE, Ty, Expand); 366 setCondCodeAction(ISD::SETUGT, Ty, Expand); 367 setCondCodeAction(ISD::SETGE, Ty, Expand); 368 setCondCodeAction(ISD::SETGT, Ty, Expand); 369 } 370 } 371 372 bool 373 MipsSETargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 374 unsigned, 375 unsigned, 376 bool *Fast) const { 377 MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; 378 379 if (Subtarget.systemSupportsUnalignedAccess()) { 380 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's 381 // implementation defined whether this is handled by hardware, software, or 382 // a hybrid of the two but it's expected that most implementations will 383 // handle the majority of cases in hardware. 384 if (Fast) 385 *Fast = true; 386 return true; 387 } 388 389 switch (SVT) { 390 case MVT::i64: 391 case MVT::i32: 392 if (Fast) 393 *Fast = true; 394 return true; 395 default: 396 return false; 397 } 398 } 399 400 SDValue MipsSETargetLowering::LowerOperation(SDValue Op, 401 SelectionDAG &DAG) const { 402 switch(Op.getOpcode()) { 403 case ISD::LOAD: return lowerLOAD(Op, DAG); 404 case ISD::STORE: return lowerSTORE(Op, DAG); 405 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); 406 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); 407 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); 408 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG); 409 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG); 410 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG); 411 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, 412 DAG); 413 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); 414 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); 415 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); 416 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); 417 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); 418 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); 419 } 420 421 return MipsTargetLowering::LowerOperation(Op, DAG); 422 } 423 424 // selectMADD - 425 // Transforms a subgraph in CurDAG if the following pattern is found: 426 // (addc multLo, Lo0), (adde multHi, Hi0), 427 // where, 428 // multHi/Lo: product of multiplication 429 // Lo0: initial value of Lo register 430 // Hi0: initial value of Hi register 431 // Return true if pattern matching was successful. 432 static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { 433 // ADDENode's second operand must be a flag output of an ADDC node in order 434 // for the matching to be successful. 435 SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); 436 437 if (ADDCNode->getOpcode() != ISD::ADDC) 438 return false; 439 440 SDValue MultHi = ADDENode->getOperand(0); 441 SDValue MultLo = ADDCNode->getOperand(0); 442 SDNode *MultNode = MultHi.getNode(); 443 unsigned MultOpc = MultHi.getOpcode(); 444 445 // MultHi and MultLo must be generated by the same node, 446 if (MultLo.getNode() != MultNode) 447 return false; 448 449 // and it must be a multiplication. 450 if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) 451 return false; 452 453 // MultLo amd MultHi must be the first and second output of MultNode 454 // respectively. 455 if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) 456 return false; 457 458 // Transform this to a MADD only if ADDENode and ADDCNode are the only users 459 // of the values of MultNode, in which case MultNode will be removed in later 460 // phases. 461 // If there exist users other than ADDENode or ADDCNode, this function returns 462 // here, which will result in MultNode being mapped to a single MULT 463 // instruction node rather than a pair of MULT and MADD instructions being 464 // produced. 465 if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) 466 return false; 467 468 SDLoc DL(ADDENode); 469 470 // Initialize accumulator. 471 SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, 472 ADDCNode->getOperand(1), 473 ADDENode->getOperand(1)); 474 475 // create MipsMAdd(u) node 476 MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; 477 478 SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped, 479 MultNode->getOperand(0),// Factor 0 480 MultNode->getOperand(1),// Factor 1 481 ACCIn); 482 483 // replace uses of adde and addc here 484 if (!SDValue(ADDCNode, 0).use_empty()) { 485 SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); 486 CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut); 487 } 488 if (!SDValue(ADDENode, 0).use_empty()) { 489 SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); 490 CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut); 491 } 492 493 return true; 494 } 495 496 // selectMSUB - 497 // Transforms a subgraph in CurDAG if the following pattern is found: 498 // (addc Lo0, multLo), (sube Hi0, multHi), 499 // where, 500 // multHi/Lo: product of multiplication 501 // Lo0: initial value of Lo register 502 // Hi0: initial value of Hi register 503 // Return true if pattern matching was successful. 504 static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { 505 // SUBENode's second operand must be a flag output of an SUBC node in order 506 // for the matching to be successful. 507 SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); 508 509 if (SUBCNode->getOpcode() != ISD::SUBC) 510 return false; 511 512 SDValue MultHi = SUBENode->getOperand(1); 513 SDValue MultLo = SUBCNode->getOperand(1); 514 SDNode *MultNode = MultHi.getNode(); 515 unsigned MultOpc = MultHi.getOpcode(); 516 517 // MultHi and MultLo must be generated by the same node, 518 if (MultLo.getNode() != MultNode) 519 return false; 520 521 // and it must be a multiplication. 522 if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) 523 return false; 524 525 // MultLo amd MultHi must be the first and second output of MultNode 526 // respectively. 527 if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) 528 return false; 529 530 // Transform this to a MSUB only if SUBENode and SUBCNode are the only users 531 // of the values of MultNode, in which case MultNode will be removed in later 532 // phases. 533 // If there exist users other than SUBENode or SUBCNode, this function returns 534 // here, which will result in MultNode being mapped to a single MULT 535 // instruction node rather than a pair of MULT and MSUB instructions being 536 // produced. 537 if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) 538 return false; 539 540 SDLoc DL(SUBENode); 541 542 // Initialize accumulator. 543 SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, 544 SUBCNode->getOperand(0), 545 SUBENode->getOperand(0)); 546 547 // create MipsSub(u) node 548 MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; 549 550 SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, 551 MultNode->getOperand(0),// Factor 0 552 MultNode->getOperand(1),// Factor 1 553 ACCIn); 554 555 // replace uses of sube and subc here 556 if (!SDValue(SUBCNode, 0).use_empty()) { 557 SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub); 558 CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut); 559 } 560 if (!SDValue(SUBENode, 0).use_empty()) { 561 SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub); 562 CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut); 563 } 564 565 return true; 566 } 567 568 static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, 569 TargetLowering::DAGCombinerInfo &DCI, 570 const MipsSubtarget &Subtarget) { 571 if (DCI.isBeforeLegalize()) 572 return SDValue(); 573 574 if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && 575 N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG)) 576 return SDValue(N, 0); 577 578 return SDValue(); 579 } 580 581 // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT 582 // 583 // Performs the following transformations: 584 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its 585 // sign/zero-extension is completely overwritten by the new one performed by 586 // the ISD::AND. 587 // - Removes redundant zero extensions performed by an ISD::AND. 588 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 589 TargetLowering::DAGCombinerInfo &DCI, 590 const MipsSubtarget &Subtarget) { 591 if (!Subtarget.hasMSA()) 592 return SDValue(); 593 594 SDValue Op0 = N->getOperand(0); 595 SDValue Op1 = N->getOperand(1); 596 unsigned Op0Opcode = Op0->getOpcode(); 597 598 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) 599 // where $d + 1 == 2^n and n == 32 600 // or $d + 1 == 2^n and n <= 32 and ZExt 601 // -> (MipsVExtractZExt $a, $b, $c) 602 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || 603 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { 604 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1); 605 606 if (!Mask) 607 return SDValue(); 608 609 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); 610 611 if (Log2IfPositive <= 0) 612 return SDValue(); // Mask+1 is not a power of 2 613 614 SDValue Op0Op2 = Op0->getOperand(2); 615 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT(); 616 unsigned ExtendTySize = ExtendTy.getSizeInBits(); 617 unsigned Log2 = Log2IfPositive; 618 619 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || 620 Log2 == ExtendTySize) { 621 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; 622 return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0), 623 Op0->getVTList(), 624 makeArrayRef(Ops, Op0->getNumOperands())); 625 } 626 } 627 628 return SDValue(); 629 } 630 631 // Determine if the specified node is a constant vector splat. 632 // 633 // Returns true and sets Imm if: 634 // * N is a ISD::BUILD_VECTOR representing a constant splat 635 // 636 // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The 637 // differences are that it assumes the MSA has already been checked and the 638 // arbitrary requirement for a maximum of 32-bit integers isn't applied (and 639 // must not be in order for binsri.d to be selectable). 640 static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { 641 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode()); 642 643 if (!Node) 644 return false; 645 646 APInt SplatValue, SplatUndef; 647 unsigned SplatBitSize; 648 bool HasAnyUndefs; 649 650 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 651 8, !IsLittleEndian)) 652 return false; 653 654 Imm = SplatValue; 655 656 return true; 657 } 658 659 // Test whether the given node is an all-ones build_vector. 660 static bool isVectorAllOnes(SDValue N) { 661 // Look through bitcasts. Endianness doesn't matter because we are looking 662 // for an all-ones value. 663 if (N->getOpcode() == ISD::BITCAST) 664 N = N->getOperand(0); 665 666 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); 667 668 if (!BVN) 669 return false; 670 671 APInt SplatValue, SplatUndef; 672 unsigned SplatBitSize; 673 bool HasAnyUndefs; 674 675 // Endianness doesn't matter in this context because we are looking for 676 // an all-ones value. 677 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) 678 return SplatValue.isAllOnesValue(); 679 680 return false; 681 } 682 683 // Test whether N is the bitwise inverse of OfNode. 684 static bool isBitwiseInverse(SDValue N, SDValue OfNode) { 685 if (N->getOpcode() != ISD::XOR) 686 return false; 687 688 if (isVectorAllOnes(N->getOperand(0))) 689 return N->getOperand(1) == OfNode; 690 691 if (isVectorAllOnes(N->getOperand(1))) 692 return N->getOperand(0) == OfNode; 693 694 return false; 695 } 696 697 // Perform combines where ISD::OR is the root node. 698 // 699 // Performs the following transformations: 700 // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) 701 // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit 702 // vector type. 703 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 704 TargetLowering::DAGCombinerInfo &DCI, 705 const MipsSubtarget &Subtarget) { 706 if (!Subtarget.hasMSA()) 707 return SDValue(); 708 709 EVT Ty = N->getValueType(0); 710 711 if (!Ty.is128BitVector()) 712 return SDValue(); 713 714 SDValue Op0 = N->getOperand(0); 715 SDValue Op1 = N->getOperand(1); 716 717 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { 718 SDValue Op0Op0 = Op0->getOperand(0); 719 SDValue Op0Op1 = Op0->getOperand(1); 720 SDValue Op1Op0 = Op1->getOperand(0); 721 SDValue Op1Op1 = Op1->getOperand(1); 722 bool IsLittleEndian = !Subtarget.isLittle(); 723 724 SDValue IfSet, IfClr, Cond; 725 bool IsConstantMask = false; 726 APInt Mask, InvMask; 727 728 // If Op0Op0 is an appropriate mask, try to find it's inverse in either 729 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while 730 // looking. 731 // IfClr will be set if we find a valid match. 732 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) { 733 Cond = Op0Op0; 734 IfSet = Op0Op1; 735 736 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 737 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 738 IfClr = Op1Op1; 739 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 740 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 741 IfClr = Op1Op0; 742 743 IsConstantMask = true; 744 } 745 746 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same 747 // thing again using this mask. 748 // IfClr will be set if we find a valid match. 749 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) { 750 Cond = Op0Op1; 751 IfSet = Op0Op0; 752 753 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 754 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 755 IfClr = Op1Op1; 756 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 757 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 758 IfClr = Op1Op0; 759 760 IsConstantMask = true; 761 } 762 763 // If IfClr is not yet set, try looking for a non-constant match. 764 // IfClr will be set if we find a valid match amongst the eight 765 // possibilities. 766 if (!IfClr.getNode()) { 767 if (isBitwiseInverse(Op0Op0, Op1Op0)) { 768 Cond = Op1Op0; 769 IfSet = Op1Op1; 770 IfClr = Op0Op1; 771 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { 772 Cond = Op1Op0; 773 IfSet = Op1Op1; 774 IfClr = Op0Op0; 775 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { 776 Cond = Op1Op1; 777 IfSet = Op1Op0; 778 IfClr = Op0Op1; 779 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { 780 Cond = Op1Op1; 781 IfSet = Op1Op0; 782 IfClr = Op0Op0; 783 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { 784 Cond = Op0Op0; 785 IfSet = Op0Op1; 786 IfClr = Op1Op1; 787 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { 788 Cond = Op0Op0; 789 IfSet = Op0Op1; 790 IfClr = Op1Op0; 791 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { 792 Cond = Op0Op1; 793 IfSet = Op0Op0; 794 IfClr = Op1Op1; 795 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { 796 Cond = Op0Op1; 797 IfSet = Op0Op0; 798 IfClr = Op1Op0; 799 } 800 } 801 802 // At this point, IfClr will be set if we have a valid match. 803 if (!IfClr.getNode()) 804 return SDValue(); 805 806 assert(Cond.getNode() && IfSet.getNode()); 807 808 // Fold degenerate cases. 809 if (IsConstantMask) { 810 if (Mask.isAllOnesValue()) 811 return IfSet; 812 else if (Mask == 0) 813 return IfClr; 814 } 815 816 // Transform the DAG into an equivalent VSELECT. 817 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr); 818 } 819 820 return SDValue(); 821 } 822 823 static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, 824 TargetLowering::DAGCombinerInfo &DCI, 825 const MipsSubtarget &Subtarget) { 826 if (DCI.isBeforeLegalize()) 827 return SDValue(); 828 829 if (Subtarget.hasMips32() && N->getValueType(0) == MVT::i32 && 830 selectMSUB(N, &DAG)) 831 return SDValue(N, 0); 832 833 return SDValue(); 834 } 835 836 static SDValue genConstMult(SDValue X, uint64_t C, const SDLoc &DL, EVT VT, 837 EVT ShiftTy, SelectionDAG &DAG) { 838 // Clear the upper (64 - VT.sizeInBits) bits. 839 C &= ((uint64_t)-1) >> (64 - VT.getSizeInBits()); 840 841 // Return 0. 842 if (C == 0) 843 return DAG.getConstant(0, DL, VT); 844 845 // Return x. 846 if (C == 1) 847 return X; 848 849 // If c is power of 2, return (shl x, log2(c)). 850 if (isPowerOf2_64(C)) 851 return DAG.getNode(ISD::SHL, DL, VT, X, 852 DAG.getConstant(Log2_64(C), DL, ShiftTy)); 853 854 unsigned Log2Ceil = Log2_64_Ceil(C); 855 uint64_t Floor = 1LL << Log2_64(C); 856 uint64_t Ceil = Log2Ceil == 64 ? 0LL : 1LL << Log2Ceil; 857 858 // If |c - floor_c| <= |c - ceil_c|, 859 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), 860 // return (add constMult(x, floor_c), constMult(x, c - floor_c)). 861 if (C - Floor <= Ceil - C) { 862 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); 863 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); 864 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); 865 } 866 867 // If |c - floor_c| > |c - ceil_c|, 868 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). 869 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); 870 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); 871 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); 872 } 873 874 static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, 875 const TargetLowering::DAGCombinerInfo &DCI, 876 const MipsSETargetLowering *TL) { 877 EVT VT = N->getValueType(0); 878 879 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 880 if (!VT.isVector()) 881 return genConstMult(N->getOperand(0), C->getZExtValue(), SDLoc(N), VT, 882 TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), 883 DAG); 884 885 return SDValue(N, 0); 886 } 887 888 static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, 889 SelectionDAG &DAG, 890 const MipsSubtarget &Subtarget) { 891 // See if this is a vector splat immediate node. 892 APInt SplatValue, SplatUndef; 893 unsigned SplatBitSize; 894 bool HasAnyUndefs; 895 unsigned EltSize = Ty.getScalarSizeInBits(); 896 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 897 898 if (!Subtarget.hasDSP()) 899 return SDValue(); 900 901 if (!BV || 902 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 903 EltSize, !Subtarget.isLittle()) || 904 (SplatBitSize != EltSize) || 905 (SplatValue.getZExtValue() >= EltSize)) 906 return SDValue(); 907 908 SDLoc DL(N); 909 return DAG.getNode(Opc, DL, Ty, N->getOperand(0), 910 DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32)); 911 } 912 913 static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, 914 TargetLowering::DAGCombinerInfo &DCI, 915 const MipsSubtarget &Subtarget) { 916 EVT Ty = N->getValueType(0); 917 918 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 919 return SDValue(); 920 921 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); 922 } 923 924 // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold 925 // constant splats into MipsISD::SHRA_DSP for DSPr2. 926 // 927 // Performs the following transformations: 928 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its 929 // sign/zero-extension is completely overwritten by the new one performed by 930 // the ISD::SRA and ISD::SHL nodes. 931 // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL 932 // sequence. 933 // 934 // See performDSPShiftCombine for more information about the transformation 935 // used for DSPr2. 936 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 937 TargetLowering::DAGCombinerInfo &DCI, 938 const MipsSubtarget &Subtarget) { 939 EVT Ty = N->getValueType(0); 940 941 if (Subtarget.hasMSA()) { 942 SDValue Op0 = N->getOperand(0); 943 SDValue Op1 = N->getOperand(1); 944 945 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) 946 // where $d + sizeof($c) == 32 947 // or $d + sizeof($c) <= 32 and SExt 948 // -> (MipsVExtractSExt $a, $b, $c) 949 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { 950 SDValue Op0Op0 = Op0->getOperand(0); 951 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1); 952 953 if (!ShAmount) 954 return SDValue(); 955 956 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && 957 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) 958 return SDValue(); 959 960 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT(); 961 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); 962 963 if (TotalBits == 32 || 964 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && 965 TotalBits <= 32)) { 966 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), 967 Op0Op0->getOperand(2) }; 968 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0), 969 Op0Op0->getVTList(), 970 makeArrayRef(Ops, Op0Op0->getNumOperands())); 971 } 972 } 973 } 974 975 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2())) 976 return SDValue(); 977 978 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); 979 } 980 981 982 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 983 TargetLowering::DAGCombinerInfo &DCI, 984 const MipsSubtarget &Subtarget) { 985 EVT Ty = N->getValueType(0); 986 987 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8)) 988 return SDValue(); 989 990 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); 991 } 992 993 static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { 994 bool IsV216 = (Ty == MVT::v2i16); 995 996 switch (CC) { 997 case ISD::SETEQ: 998 case ISD::SETNE: return true; 999 case ISD::SETLT: 1000 case ISD::SETLE: 1001 case ISD::SETGT: 1002 case ISD::SETGE: return IsV216; 1003 case ISD::SETULT: 1004 case ISD::SETULE: 1005 case ISD::SETUGT: 1006 case ISD::SETUGE: return !IsV216; 1007 default: return false; 1008 } 1009 } 1010 1011 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { 1012 EVT Ty = N->getValueType(0); 1013 1014 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 1015 return SDValue(); 1016 1017 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get())) 1018 return SDValue(); 1019 1020 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0), 1021 N->getOperand(1), N->getOperand(2)); 1022 } 1023 1024 static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { 1025 EVT Ty = N->getValueType(0); 1026 1027 if (Ty.is128BitVector() && Ty.isInteger()) { 1028 // Try the following combines: 1029 // (vselect (setcc $a, $b, SETLT), $b, $a)) -> (vsmax $a, $b) 1030 // (vselect (setcc $a, $b, SETLE), $b, $a)) -> (vsmax $a, $b) 1031 // (vselect (setcc $a, $b, SETLT), $a, $b)) -> (vsmin $a, $b) 1032 // (vselect (setcc $a, $b, SETLE), $a, $b)) -> (vsmin $a, $b) 1033 // (vselect (setcc $a, $b, SETULT), $b, $a)) -> (vumax $a, $b) 1034 // (vselect (setcc $a, $b, SETULE), $b, $a)) -> (vumax $a, $b) 1035 // (vselect (setcc $a, $b, SETULT), $a, $b)) -> (vumin $a, $b) 1036 // (vselect (setcc $a, $b, SETULE), $a, $b)) -> (vumin $a, $b) 1037 // SETGT/SETGE/SETUGT/SETUGE variants of these will show up initially but 1038 // will be expanded to equivalent SETLT/SETLE/SETULT/SETULE versions by the 1039 // legalizer. 1040 SDValue Op0 = N->getOperand(0); 1041 1042 if (Op0->getOpcode() != ISD::SETCC) 1043 return SDValue(); 1044 1045 ISD::CondCode CondCode = cast<CondCodeSDNode>(Op0->getOperand(2))->get(); 1046 bool Signed; 1047 1048 if (CondCode == ISD::SETLT || CondCode == ISD::SETLE) 1049 Signed = true; 1050 else if (CondCode == ISD::SETULT || CondCode == ISD::SETULE) 1051 Signed = false; 1052 else 1053 return SDValue(); 1054 1055 SDValue Op1 = N->getOperand(1); 1056 SDValue Op2 = N->getOperand(2); 1057 SDValue Op0Op0 = Op0->getOperand(0); 1058 SDValue Op0Op1 = Op0->getOperand(1); 1059 1060 if (Op1 == Op0Op0 && Op2 == Op0Op1) 1061 return DAG.getNode(Signed ? MipsISD::VSMIN : MipsISD::VUMIN, SDLoc(N), 1062 Ty, Op1, Op2); 1063 else if (Op1 == Op0Op1 && Op2 == Op0Op0) 1064 return DAG.getNode(Signed ? MipsISD::VSMAX : MipsISD::VUMAX, SDLoc(N), 1065 Ty, Op1, Op2); 1066 } else if ((Ty == MVT::v2i16) || (Ty == MVT::v4i8)) { 1067 SDValue SetCC = N->getOperand(0); 1068 1069 if (SetCC.getOpcode() != MipsISD::SETCC_DSP) 1070 return SDValue(); 1071 1072 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, 1073 SetCC.getOperand(0), SetCC.getOperand(1), 1074 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); 1075 } 1076 1077 return SDValue(); 1078 } 1079 1080 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 1081 const MipsSubtarget &Subtarget) { 1082 EVT Ty = N->getValueType(0); 1083 1084 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { 1085 // Try the following combines: 1086 // (xor (or $a, $b), (build_vector allones)) 1087 // (xor (or $a, $b), (bitcast (build_vector allones))) 1088 SDValue Op0 = N->getOperand(0); 1089 SDValue Op1 = N->getOperand(1); 1090 SDValue NotOp; 1091 1092 if (ISD::isBuildVectorAllOnes(Op0.getNode())) 1093 NotOp = Op1; 1094 else if (ISD::isBuildVectorAllOnes(Op1.getNode())) 1095 NotOp = Op0; 1096 else 1097 return SDValue(); 1098 1099 if (NotOp->getOpcode() == ISD::OR) 1100 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), 1101 NotOp->getOperand(1)); 1102 } 1103 1104 return SDValue(); 1105 } 1106 1107 SDValue 1108 MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 1109 SelectionDAG &DAG = DCI.DAG; 1110 SDValue Val; 1111 1112 switch (N->getOpcode()) { 1113 case ISD::ADDE: 1114 return performADDECombine(N, DAG, DCI, Subtarget); 1115 case ISD::AND: 1116 Val = performANDCombine(N, DAG, DCI, Subtarget); 1117 break; 1118 case ISD::OR: 1119 Val = performORCombine(N, DAG, DCI, Subtarget); 1120 break; 1121 case ISD::SUBE: 1122 return performSUBECombine(N, DAG, DCI, Subtarget); 1123 case ISD::MUL: 1124 return performMULCombine(N, DAG, DCI, this); 1125 case ISD::SHL: 1126 return performSHLCombine(N, DAG, DCI, Subtarget); 1127 case ISD::SRA: 1128 return performSRACombine(N, DAG, DCI, Subtarget); 1129 case ISD::SRL: 1130 return performSRLCombine(N, DAG, DCI, Subtarget); 1131 case ISD::VSELECT: 1132 return performVSELECTCombine(N, DAG); 1133 case ISD::XOR: 1134 Val = performXORCombine(N, DAG, Subtarget); 1135 break; 1136 case ISD::SETCC: 1137 Val = performSETCCCombine(N, DAG); 1138 break; 1139 } 1140 1141 if (Val.getNode()) { 1142 DEBUG(dbgs() << "\nMipsSE DAG Combine:\n"; 1143 N->printrWithDepth(dbgs(), &DAG); 1144 dbgs() << "\n=> \n"; 1145 Val.getNode()->printrWithDepth(dbgs(), &DAG); 1146 dbgs() << "\n"); 1147 return Val; 1148 } 1149 1150 return MipsTargetLowering::PerformDAGCombine(N, DCI); 1151 } 1152 1153 MachineBasicBlock * 1154 MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 1155 MachineBasicBlock *BB) const { 1156 switch (MI.getOpcode()) { 1157 default: 1158 return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); 1159 case Mips::BPOSGE32_PSEUDO: 1160 return emitBPOSGE32(MI, BB); 1161 case Mips::SNZ_B_PSEUDO: 1162 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B); 1163 case Mips::SNZ_H_PSEUDO: 1164 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H); 1165 case Mips::SNZ_W_PSEUDO: 1166 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W); 1167 case Mips::SNZ_D_PSEUDO: 1168 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D); 1169 case Mips::SNZ_V_PSEUDO: 1170 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V); 1171 case Mips::SZ_B_PSEUDO: 1172 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B); 1173 case Mips::SZ_H_PSEUDO: 1174 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H); 1175 case Mips::SZ_W_PSEUDO: 1176 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W); 1177 case Mips::SZ_D_PSEUDO: 1178 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D); 1179 case Mips::SZ_V_PSEUDO: 1180 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V); 1181 case Mips::COPY_FW_PSEUDO: 1182 return emitCOPY_FW(MI, BB); 1183 case Mips::COPY_FD_PSEUDO: 1184 return emitCOPY_FD(MI, BB); 1185 case Mips::INSERT_FW_PSEUDO: 1186 return emitINSERT_FW(MI, BB); 1187 case Mips::INSERT_FD_PSEUDO: 1188 return emitINSERT_FD(MI, BB); 1189 case Mips::INSERT_B_VIDX_PSEUDO: 1190 case Mips::INSERT_B_VIDX64_PSEUDO: 1191 return emitINSERT_DF_VIDX(MI, BB, 1, false); 1192 case Mips::INSERT_H_VIDX_PSEUDO: 1193 case Mips::INSERT_H_VIDX64_PSEUDO: 1194 return emitINSERT_DF_VIDX(MI, BB, 2, false); 1195 case Mips::INSERT_W_VIDX_PSEUDO: 1196 case Mips::INSERT_W_VIDX64_PSEUDO: 1197 return emitINSERT_DF_VIDX(MI, BB, 4, false); 1198 case Mips::INSERT_D_VIDX_PSEUDO: 1199 case Mips::INSERT_D_VIDX64_PSEUDO: 1200 return emitINSERT_DF_VIDX(MI, BB, 8, false); 1201 case Mips::INSERT_FW_VIDX_PSEUDO: 1202 case Mips::INSERT_FW_VIDX64_PSEUDO: 1203 return emitINSERT_DF_VIDX(MI, BB, 4, true); 1204 case Mips::INSERT_FD_VIDX_PSEUDO: 1205 case Mips::INSERT_FD_VIDX64_PSEUDO: 1206 return emitINSERT_DF_VIDX(MI, BB, 8, true); 1207 case Mips::FILL_FW_PSEUDO: 1208 return emitFILL_FW(MI, BB); 1209 case Mips::FILL_FD_PSEUDO: 1210 return emitFILL_FD(MI, BB); 1211 case Mips::FEXP2_W_1_PSEUDO: 1212 return emitFEXP2_W_1(MI, BB); 1213 case Mips::FEXP2_D_1_PSEUDO: 1214 return emitFEXP2_D_1(MI, BB); 1215 case Mips::ST_F16: 1216 return emitST_F16_PSEUDO(MI, BB); 1217 case Mips::LD_F16: 1218 return emitLD_F16_PSEUDO(MI, BB); 1219 case Mips::MSA_FP_EXTEND_W_PSEUDO: 1220 return emitFPEXTEND_PSEUDO(MI, BB, false); 1221 case Mips::MSA_FP_ROUND_W_PSEUDO: 1222 return emitFPROUND_PSEUDO(MI, BB, false); 1223 case Mips::MSA_FP_EXTEND_D_PSEUDO: 1224 return emitFPEXTEND_PSEUDO(MI, BB, true); 1225 case Mips::MSA_FP_ROUND_D_PSEUDO: 1226 return emitFPROUND_PSEUDO(MI, BB, true); 1227 } 1228 } 1229 1230 bool MipsSETargetLowering::isEligibleForTailCallOptimization( 1231 const CCState &CCInfo, unsigned NextStackOffset, 1232 const MipsFunctionInfo &FI) const { 1233 if (!UseMipsTailCalls) 1234 return false; 1235 1236 // Exception has to be cleared with eret. 1237 if (FI.isISR()) 1238 return false; 1239 1240 // Return false if either the callee or caller has a byval argument. 1241 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) 1242 return false; 1243 1244 // Return true if the callee's argument area is no larger than the 1245 // caller's. 1246 return NextStackOffset <= FI.getIncomingArgSize(); 1247 } 1248 1249 void MipsSETargetLowering:: 1250 getOpndList(SmallVectorImpl<SDValue> &Ops, 1251 std::deque< std::pair<unsigned, SDValue> > &RegsToPass, 1252 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, 1253 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, 1254 SDValue Chain) const { 1255 Ops.push_back(Callee); 1256 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, 1257 InternalLinkage, IsCallReloc, CLI, Callee, 1258 Chain); 1259 } 1260 1261 SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1262 LoadSDNode &Nd = *cast<LoadSDNode>(Op); 1263 1264 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1265 return MipsTargetLowering::lowerLOAD(Op, DAG); 1266 1267 // Replace a double precision load with two i32 loads and a buildpair64. 1268 SDLoc DL(Op); 1269 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1270 EVT PtrVT = Ptr.getValueType(); 1271 1272 // i32 load from lower address. 1273 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(), 1274 Nd.getAlignment(), Nd.getMemOperand()->getFlags()); 1275 1276 // i32 load from higher address. 1277 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1278 SDValue Hi = DAG.getLoad( 1279 MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(), 1280 std::min(Nd.getAlignment(), 4U), Nd.getMemOperand()->getFlags()); 1281 1282 if (!Subtarget.isLittle()) 1283 std::swap(Lo, Hi); 1284 1285 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1286 SDValue Ops[2] = {BP, Hi.getValue(1)}; 1287 return DAG.getMergeValues(Ops, DL); 1288 } 1289 1290 SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1291 StoreSDNode &Nd = *cast<StoreSDNode>(Op); 1292 1293 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1294 return MipsTargetLowering::lowerSTORE(Op, DAG); 1295 1296 // Replace a double precision store with two extractelement64s and i32 stores. 1297 SDLoc DL(Op); 1298 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1299 EVT PtrVT = Ptr.getValueType(); 1300 SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1301 Val, DAG.getConstant(0, DL, MVT::i32)); 1302 SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1303 Val, DAG.getConstant(1, DL, MVT::i32)); 1304 1305 if (!Subtarget.isLittle()) 1306 std::swap(Lo, Hi); 1307 1308 // i32 store to lower address. 1309 Chain = 1310 DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlignment(), 1311 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1312 1313 // i32 store to higher address. 1314 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1315 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), 1316 std::min(Nd.getAlignment(), 4U), 1317 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1318 } 1319 1320 SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, 1321 bool HasLo, bool HasHi, 1322 SelectionDAG &DAG) const { 1323 // MIPS32r6/MIPS64r6 removed accumulator based multiplies. 1324 assert(!Subtarget.hasMips32r6()); 1325 1326 EVT Ty = Op.getOperand(0).getValueType(); 1327 SDLoc DL(Op); 1328 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, 1329 Op.getOperand(0), Op.getOperand(1)); 1330 SDValue Lo, Hi; 1331 1332 if (HasLo) 1333 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult); 1334 if (HasHi) 1335 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult); 1336 1337 if (!HasLo || !HasHi) 1338 return HasLo ? Lo : Hi; 1339 1340 SDValue Vals[] = { Lo, Hi }; 1341 return DAG.getMergeValues(Vals, DL); 1342 } 1343 1344 static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) { 1345 SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1346 DAG.getConstant(0, DL, MVT::i32)); 1347 SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1348 DAG.getConstant(1, DL, MVT::i32)); 1349 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi); 1350 } 1351 1352 static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) { 1353 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op); 1354 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op); 1355 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1356 } 1357 1358 // This function expands mips intrinsic nodes which have 64-bit input operands 1359 // or output values. 1360 // 1361 // out64 = intrinsic-node in64 1362 // => 1363 // lo = copy (extract-element (in64, 0)) 1364 // hi = copy (extract-element (in64, 1)) 1365 // mips-specific-node 1366 // v0 = copy lo 1367 // v1 = copy hi 1368 // out64 = merge-values (v0, v1) 1369 // 1370 static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1371 SDLoc DL(Op); 1372 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; 1373 SmallVector<SDValue, 3> Ops; 1374 unsigned OpNo = 0; 1375 1376 // See if Op has a chain input. 1377 if (HasChainIn) 1378 Ops.push_back(Op->getOperand(OpNo++)); 1379 1380 // The next operand is the intrinsic opcode. 1381 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); 1382 1383 // See if the next operand has type i64. 1384 SDValue Opnd = Op->getOperand(++OpNo), In64; 1385 1386 if (Opnd.getValueType() == MVT::i64) 1387 In64 = initAccumulator(Opnd, DL, DAG); 1388 else 1389 Ops.push_back(Opnd); 1390 1391 // Push the remaining operands. 1392 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) 1393 Ops.push_back(Op->getOperand(OpNo)); 1394 1395 // Add In64 to the end of the list. 1396 if (In64.getNode()) 1397 Ops.push_back(In64); 1398 1399 // Scan output. 1400 SmallVector<EVT, 2> ResTys; 1401 1402 for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end(); 1403 I != E; ++I) 1404 ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I); 1405 1406 // Create node. 1407 SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops); 1408 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; 1409 1410 if (!HasChainIn) 1411 return Out; 1412 1413 assert(Val->getValueType(1) == MVT::Other); 1414 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; 1415 return DAG.getMergeValues(Vals, DL); 1416 } 1417 1418 // Lower an MSA copy intrinsic into the specified SelectionDAG node 1419 static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1420 SDLoc DL(Op); 1421 SDValue Vec = Op->getOperand(1); 1422 SDValue Idx = Op->getOperand(2); 1423 EVT ResTy = Op->getValueType(0); 1424 EVT EltTy = Vec->getValueType(0).getVectorElementType(); 1425 1426 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx, 1427 DAG.getValueType(EltTy)); 1428 1429 return Result; 1430 } 1431 1432 static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { 1433 EVT ResVecTy = Op->getValueType(0); 1434 EVT ViaVecTy = ResVecTy; 1435 SDLoc DL(Op); 1436 1437 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and 1438 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating 1439 // lanes. 1440 SDValue LaneA; 1441 SDValue LaneB = Op->getOperand(2); 1442 1443 if (ResVecTy == MVT::v2i64) { 1444 LaneA = DAG.getConstant(0, DL, MVT::i32); 1445 ViaVecTy = MVT::v4i32; 1446 } else 1447 LaneA = LaneB; 1448 1449 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, 1450 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; 1451 1452 SDValue Result = DAG.getBuildVector( 1453 ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1454 1455 if (ViaVecTy != ResVecTy) 1456 Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, Result); 1457 1458 return Result; 1459 } 1460 1461 static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, 1462 bool IsSigned = false) { 1463 return DAG.getConstant( 1464 APInt(Op->getValueType(0).getScalarType().getSizeInBits(), 1465 Op->getConstantOperandVal(ImmOp), IsSigned), 1466 SDLoc(Op), Op->getValueType(0)); 1467 } 1468 1469 static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, 1470 bool BigEndian, SelectionDAG &DAG) { 1471 EVT ViaVecTy = VecTy; 1472 SDValue SplatValueA = SplatValue; 1473 SDValue SplatValueB = SplatValue; 1474 SDLoc DL(SplatValue); 1475 1476 if (VecTy == MVT::v2i64) { 1477 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. 1478 ViaVecTy = MVT::v4i32; 1479 1480 SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); 1481 SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, 1482 DAG.getConstant(32, DL, MVT::i32)); 1483 SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); 1484 } 1485 1486 // We currently hold the parts in little endian order. Swap them if 1487 // necessary. 1488 if (BigEndian) 1489 std::swap(SplatValueA, SplatValueB); 1490 1491 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1492 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1493 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1494 SplatValueA, SplatValueB, SplatValueA, SplatValueB }; 1495 1496 SDValue Result = DAG.getBuildVector( 1497 ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1498 1499 if (VecTy != ViaVecTy) 1500 Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); 1501 1502 return Result; 1503 } 1504 1505 static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, 1506 unsigned Opc, SDValue Imm, 1507 bool BigEndian) { 1508 EVT VecTy = Op->getValueType(0); 1509 SDValue Exp2Imm; 1510 SDLoc DL(Op); 1511 1512 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it 1513 // here for now. 1514 if (VecTy == MVT::v2i64) { 1515 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) { 1516 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue(); 1517 1518 SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL, 1519 MVT::i32); 1520 SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32); 1521 1522 if (BigEndian) 1523 std::swap(BitImmLoOp, BitImmHiOp); 1524 1525 Exp2Imm = DAG.getNode( 1526 ISD::BITCAST, DL, MVT::v2i64, 1527 DAG.getBuildVector(MVT::v4i32, DL, 1528 {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp})); 1529 } 1530 } 1531 1532 if (!Exp2Imm.getNode()) { 1533 // We couldnt constant fold, do a vector shift instead 1534 1535 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since 1536 // only values 0-63 are valid. 1537 if (VecTy == MVT::v2i64) 1538 Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm); 1539 1540 Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG); 1541 1542 Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy), 1543 Exp2Imm); 1544 } 1545 1546 return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); 1547 } 1548 1549 static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { 1550 EVT ResTy = Op->getValueType(0); 1551 SDLoc DL(Op); 1552 SDValue One = DAG.getConstant(1, DL, ResTy); 1553 SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, Op->getOperand(2)); 1554 1555 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), 1556 DAG.getNOT(DL, Bit, ResTy)); 1557 } 1558 1559 static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { 1560 SDLoc DL(Op); 1561 EVT ResTy = Op->getValueType(0); 1562 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) 1563 << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue(); 1564 SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy); 1565 1566 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask); 1567 } 1568 1569 SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 1570 SelectionDAG &DAG) const { 1571 SDLoc DL(Op); 1572 unsigned Intrinsic = cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue(); 1573 switch (Intrinsic) { 1574 default: 1575 return SDValue(); 1576 case Intrinsic::mips_shilo: 1577 return lowerDSPIntr(Op, DAG, MipsISD::SHILO); 1578 case Intrinsic::mips_dpau_h_qbl: 1579 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); 1580 case Intrinsic::mips_dpau_h_qbr: 1581 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); 1582 case Intrinsic::mips_dpsu_h_qbl: 1583 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); 1584 case Intrinsic::mips_dpsu_h_qbr: 1585 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); 1586 case Intrinsic::mips_dpa_w_ph: 1587 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); 1588 case Intrinsic::mips_dps_w_ph: 1589 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); 1590 case Intrinsic::mips_dpax_w_ph: 1591 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); 1592 case Intrinsic::mips_dpsx_w_ph: 1593 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); 1594 case Intrinsic::mips_mulsa_w_ph: 1595 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); 1596 case Intrinsic::mips_mult: 1597 return lowerDSPIntr(Op, DAG, MipsISD::Mult); 1598 case Intrinsic::mips_multu: 1599 return lowerDSPIntr(Op, DAG, MipsISD::Multu); 1600 case Intrinsic::mips_madd: 1601 return lowerDSPIntr(Op, DAG, MipsISD::MAdd); 1602 case Intrinsic::mips_maddu: 1603 return lowerDSPIntr(Op, DAG, MipsISD::MAddu); 1604 case Intrinsic::mips_msub: 1605 return lowerDSPIntr(Op, DAG, MipsISD::MSub); 1606 case Intrinsic::mips_msubu: 1607 return lowerDSPIntr(Op, DAG, MipsISD::MSubu); 1608 case Intrinsic::mips_addv_b: 1609 case Intrinsic::mips_addv_h: 1610 case Intrinsic::mips_addv_w: 1611 case Intrinsic::mips_addv_d: 1612 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1613 Op->getOperand(2)); 1614 case Intrinsic::mips_addvi_b: 1615 case Intrinsic::mips_addvi_h: 1616 case Intrinsic::mips_addvi_w: 1617 case Intrinsic::mips_addvi_d: 1618 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1619 lowerMSASplatImm(Op, 2, DAG)); 1620 case Intrinsic::mips_and_v: 1621 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1622 Op->getOperand(2)); 1623 case Intrinsic::mips_andi_b: 1624 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1625 lowerMSASplatImm(Op, 2, DAG)); 1626 case Intrinsic::mips_bclr_b: 1627 case Intrinsic::mips_bclr_h: 1628 case Intrinsic::mips_bclr_w: 1629 case Intrinsic::mips_bclr_d: 1630 return lowerMSABitClear(Op, DAG); 1631 case Intrinsic::mips_bclri_b: 1632 case Intrinsic::mips_bclri_h: 1633 case Intrinsic::mips_bclri_w: 1634 case Intrinsic::mips_bclri_d: 1635 return lowerMSABitClearImm(Op, DAG); 1636 case Intrinsic::mips_binsli_b: 1637 case Intrinsic::mips_binsli_h: 1638 case Intrinsic::mips_binsli_w: 1639 case Intrinsic::mips_binsli_d: { 1640 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear) 1641 EVT VecTy = Op->getValueType(0); 1642 EVT EltTy = VecTy.getVectorElementType(); 1643 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1644 report_fatal_error("Immediate out of range"); 1645 APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), 1646 Op->getConstantOperandVal(3)); 1647 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1648 DAG.getConstant(Mask, DL, VecTy, true), 1649 Op->getOperand(2), Op->getOperand(1)); 1650 } 1651 case Intrinsic::mips_binsri_b: 1652 case Intrinsic::mips_binsri_h: 1653 case Intrinsic::mips_binsri_w: 1654 case Intrinsic::mips_binsri_d: { 1655 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear) 1656 EVT VecTy = Op->getValueType(0); 1657 EVT EltTy = VecTy.getVectorElementType(); 1658 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1659 report_fatal_error("Immediate out of range"); 1660 APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), 1661 Op->getConstantOperandVal(3)); 1662 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1663 DAG.getConstant(Mask, DL, VecTy, true), 1664 Op->getOperand(2), Op->getOperand(1)); 1665 } 1666 case Intrinsic::mips_bmnz_v: 1667 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1668 Op->getOperand(2), Op->getOperand(1)); 1669 case Intrinsic::mips_bmnzi_b: 1670 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1671 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2), 1672 Op->getOperand(1)); 1673 case Intrinsic::mips_bmz_v: 1674 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1675 Op->getOperand(1), Op->getOperand(2)); 1676 case Intrinsic::mips_bmzi_b: 1677 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1678 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1), 1679 Op->getOperand(2)); 1680 case Intrinsic::mips_bneg_b: 1681 case Intrinsic::mips_bneg_h: 1682 case Intrinsic::mips_bneg_w: 1683 case Intrinsic::mips_bneg_d: { 1684 EVT VecTy = Op->getValueType(0); 1685 SDValue One = DAG.getConstant(1, DL, VecTy); 1686 1687 return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), 1688 DAG.getNode(ISD::SHL, DL, VecTy, One, 1689 Op->getOperand(2))); 1690 } 1691 case Intrinsic::mips_bnegi_b: 1692 case Intrinsic::mips_bnegi_h: 1693 case Intrinsic::mips_bnegi_w: 1694 case Intrinsic::mips_bnegi_d: 1695 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2), 1696 !Subtarget.isLittle()); 1697 case Intrinsic::mips_bnz_b: 1698 case Intrinsic::mips_bnz_h: 1699 case Intrinsic::mips_bnz_w: 1700 case Intrinsic::mips_bnz_d: 1701 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0), 1702 Op->getOperand(1)); 1703 case Intrinsic::mips_bnz_v: 1704 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0), 1705 Op->getOperand(1)); 1706 case Intrinsic::mips_bsel_v: 1707 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1708 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1709 Op->getOperand(1), Op->getOperand(3), 1710 Op->getOperand(2)); 1711 case Intrinsic::mips_bseli_b: 1712 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1713 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1714 Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG), 1715 Op->getOperand(2)); 1716 case Intrinsic::mips_bset_b: 1717 case Intrinsic::mips_bset_h: 1718 case Intrinsic::mips_bset_w: 1719 case Intrinsic::mips_bset_d: { 1720 EVT VecTy = Op->getValueType(0); 1721 SDValue One = DAG.getConstant(1, DL, VecTy); 1722 1723 return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), 1724 DAG.getNode(ISD::SHL, DL, VecTy, One, 1725 Op->getOperand(2))); 1726 } 1727 case Intrinsic::mips_bseti_b: 1728 case Intrinsic::mips_bseti_h: 1729 case Intrinsic::mips_bseti_w: 1730 case Intrinsic::mips_bseti_d: 1731 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2), 1732 !Subtarget.isLittle()); 1733 case Intrinsic::mips_bz_b: 1734 case Intrinsic::mips_bz_h: 1735 case Intrinsic::mips_bz_w: 1736 case Intrinsic::mips_bz_d: 1737 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0), 1738 Op->getOperand(1)); 1739 case Intrinsic::mips_bz_v: 1740 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0), 1741 Op->getOperand(1)); 1742 case Intrinsic::mips_ceq_b: 1743 case Intrinsic::mips_ceq_h: 1744 case Intrinsic::mips_ceq_w: 1745 case Intrinsic::mips_ceq_d: 1746 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1747 Op->getOperand(2), ISD::SETEQ); 1748 case Intrinsic::mips_ceqi_b: 1749 case Intrinsic::mips_ceqi_h: 1750 case Intrinsic::mips_ceqi_w: 1751 case Intrinsic::mips_ceqi_d: 1752 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1753 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ); 1754 case Intrinsic::mips_cle_s_b: 1755 case Intrinsic::mips_cle_s_h: 1756 case Intrinsic::mips_cle_s_w: 1757 case Intrinsic::mips_cle_s_d: 1758 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1759 Op->getOperand(2), ISD::SETLE); 1760 case Intrinsic::mips_clei_s_b: 1761 case Intrinsic::mips_clei_s_h: 1762 case Intrinsic::mips_clei_s_w: 1763 case Intrinsic::mips_clei_s_d: 1764 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1765 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE); 1766 case Intrinsic::mips_cle_u_b: 1767 case Intrinsic::mips_cle_u_h: 1768 case Intrinsic::mips_cle_u_w: 1769 case Intrinsic::mips_cle_u_d: 1770 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1771 Op->getOperand(2), ISD::SETULE); 1772 case Intrinsic::mips_clei_u_b: 1773 case Intrinsic::mips_clei_u_h: 1774 case Intrinsic::mips_clei_u_w: 1775 case Intrinsic::mips_clei_u_d: 1776 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1777 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE); 1778 case Intrinsic::mips_clt_s_b: 1779 case Intrinsic::mips_clt_s_h: 1780 case Intrinsic::mips_clt_s_w: 1781 case Intrinsic::mips_clt_s_d: 1782 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1783 Op->getOperand(2), ISD::SETLT); 1784 case Intrinsic::mips_clti_s_b: 1785 case Intrinsic::mips_clti_s_h: 1786 case Intrinsic::mips_clti_s_w: 1787 case Intrinsic::mips_clti_s_d: 1788 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1789 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT); 1790 case Intrinsic::mips_clt_u_b: 1791 case Intrinsic::mips_clt_u_h: 1792 case Intrinsic::mips_clt_u_w: 1793 case Intrinsic::mips_clt_u_d: 1794 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1795 Op->getOperand(2), ISD::SETULT); 1796 case Intrinsic::mips_clti_u_b: 1797 case Intrinsic::mips_clti_u_h: 1798 case Intrinsic::mips_clti_u_w: 1799 case Intrinsic::mips_clti_u_d: 1800 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1801 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT); 1802 case Intrinsic::mips_copy_s_b: 1803 case Intrinsic::mips_copy_s_h: 1804 case Intrinsic::mips_copy_s_w: 1805 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1806 case Intrinsic::mips_copy_s_d: 1807 if (Subtarget.hasMips64()) 1808 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64. 1809 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1810 else { 1811 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1812 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1813 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1814 Op->getValueType(0), Op->getOperand(1), 1815 Op->getOperand(2)); 1816 } 1817 case Intrinsic::mips_copy_u_b: 1818 case Intrinsic::mips_copy_u_h: 1819 case Intrinsic::mips_copy_u_w: 1820 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1821 case Intrinsic::mips_copy_u_d: 1822 if (Subtarget.hasMips64()) 1823 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64. 1824 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1825 else { 1826 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1827 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1828 // Note: When i64 is illegal, this results in copy_s.w instructions 1829 // instead of copy_u.w instructions. This makes no difference to the 1830 // behaviour since i64 is only illegal when the register file is 32-bit. 1831 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1832 Op->getValueType(0), Op->getOperand(1), 1833 Op->getOperand(2)); 1834 } 1835 case Intrinsic::mips_div_s_b: 1836 case Intrinsic::mips_div_s_h: 1837 case Intrinsic::mips_div_s_w: 1838 case Intrinsic::mips_div_s_d: 1839 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), 1840 Op->getOperand(2)); 1841 case Intrinsic::mips_div_u_b: 1842 case Intrinsic::mips_div_u_h: 1843 case Intrinsic::mips_div_u_w: 1844 case Intrinsic::mips_div_u_d: 1845 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), 1846 Op->getOperand(2)); 1847 case Intrinsic::mips_fadd_w: 1848 case Intrinsic::mips_fadd_d: { 1849 // TODO: If intrinsics have fast-math-flags, propagate them. 1850 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), 1851 Op->getOperand(2)); 1852 } 1853 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away 1854 case Intrinsic::mips_fceq_w: 1855 case Intrinsic::mips_fceq_d: 1856 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1857 Op->getOperand(2), ISD::SETOEQ); 1858 case Intrinsic::mips_fcle_w: 1859 case Intrinsic::mips_fcle_d: 1860 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1861 Op->getOperand(2), ISD::SETOLE); 1862 case Intrinsic::mips_fclt_w: 1863 case Intrinsic::mips_fclt_d: 1864 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1865 Op->getOperand(2), ISD::SETOLT); 1866 case Intrinsic::mips_fcne_w: 1867 case Intrinsic::mips_fcne_d: 1868 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1869 Op->getOperand(2), ISD::SETONE); 1870 case Intrinsic::mips_fcor_w: 1871 case Intrinsic::mips_fcor_d: 1872 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1873 Op->getOperand(2), ISD::SETO); 1874 case Intrinsic::mips_fcueq_w: 1875 case Intrinsic::mips_fcueq_d: 1876 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1877 Op->getOperand(2), ISD::SETUEQ); 1878 case Intrinsic::mips_fcule_w: 1879 case Intrinsic::mips_fcule_d: 1880 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1881 Op->getOperand(2), ISD::SETULE); 1882 case Intrinsic::mips_fcult_w: 1883 case Intrinsic::mips_fcult_d: 1884 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1885 Op->getOperand(2), ISD::SETULT); 1886 case Intrinsic::mips_fcun_w: 1887 case Intrinsic::mips_fcun_d: 1888 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1889 Op->getOperand(2), ISD::SETUO); 1890 case Intrinsic::mips_fcune_w: 1891 case Intrinsic::mips_fcune_d: 1892 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1893 Op->getOperand(2), ISD::SETUNE); 1894 case Intrinsic::mips_fdiv_w: 1895 case Intrinsic::mips_fdiv_d: { 1896 // TODO: If intrinsics have fast-math-flags, propagate them. 1897 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), 1898 Op->getOperand(2)); 1899 } 1900 case Intrinsic::mips_ffint_u_w: 1901 case Intrinsic::mips_ffint_u_d: 1902 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), 1903 Op->getOperand(1)); 1904 case Intrinsic::mips_ffint_s_w: 1905 case Intrinsic::mips_ffint_s_d: 1906 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), 1907 Op->getOperand(1)); 1908 case Intrinsic::mips_fill_b: 1909 case Intrinsic::mips_fill_h: 1910 case Intrinsic::mips_fill_w: 1911 case Intrinsic::mips_fill_d: { 1912 EVT ResTy = Op->getValueType(0); 1913 SmallVector<SDValue, 16> Ops(ResTy.getVectorNumElements(), 1914 Op->getOperand(1)); 1915 1916 // If ResTy is v2i64 then the type legalizer will break this node down into 1917 // an equivalent v4i32. 1918 return DAG.getBuildVector(ResTy, DL, Ops); 1919 } 1920 case Intrinsic::mips_fexp2_w: 1921 case Intrinsic::mips_fexp2_d: { 1922 // TODO: If intrinsics have fast-math-flags, propagate them. 1923 EVT ResTy = Op->getValueType(0); 1924 return DAG.getNode( 1925 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1), 1926 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2))); 1927 } 1928 case Intrinsic::mips_flog2_w: 1929 case Intrinsic::mips_flog2_d: 1930 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1)); 1931 case Intrinsic::mips_fmadd_w: 1932 case Intrinsic::mips_fmadd_d: 1933 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0), 1934 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1935 case Intrinsic::mips_fmul_w: 1936 case Intrinsic::mips_fmul_d: { 1937 // TODO: If intrinsics have fast-math-flags, propagate them. 1938 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), 1939 Op->getOperand(2)); 1940 } 1941 case Intrinsic::mips_fmsub_w: 1942 case Intrinsic::mips_fmsub_d: { 1943 // TODO: If intrinsics have fast-math-flags, propagate them. 1944 EVT ResTy = Op->getValueType(0); 1945 return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1), 1946 DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy, 1947 Op->getOperand(2), Op->getOperand(3))); 1948 } 1949 case Intrinsic::mips_frint_w: 1950 case Intrinsic::mips_frint_d: 1951 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); 1952 case Intrinsic::mips_fsqrt_w: 1953 case Intrinsic::mips_fsqrt_d: 1954 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); 1955 case Intrinsic::mips_fsub_w: 1956 case Intrinsic::mips_fsub_d: { 1957 // TODO: If intrinsics have fast-math-flags, propagate them. 1958 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), 1959 Op->getOperand(2)); 1960 } 1961 case Intrinsic::mips_ftrunc_u_w: 1962 case Intrinsic::mips_ftrunc_u_d: 1963 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), 1964 Op->getOperand(1)); 1965 case Intrinsic::mips_ftrunc_s_w: 1966 case Intrinsic::mips_ftrunc_s_d: 1967 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0), 1968 Op->getOperand(1)); 1969 case Intrinsic::mips_ilvev_b: 1970 case Intrinsic::mips_ilvev_h: 1971 case Intrinsic::mips_ilvev_w: 1972 case Intrinsic::mips_ilvev_d: 1973 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0), 1974 Op->getOperand(1), Op->getOperand(2)); 1975 case Intrinsic::mips_ilvl_b: 1976 case Intrinsic::mips_ilvl_h: 1977 case Intrinsic::mips_ilvl_w: 1978 case Intrinsic::mips_ilvl_d: 1979 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0), 1980 Op->getOperand(1), Op->getOperand(2)); 1981 case Intrinsic::mips_ilvod_b: 1982 case Intrinsic::mips_ilvod_h: 1983 case Intrinsic::mips_ilvod_w: 1984 case Intrinsic::mips_ilvod_d: 1985 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0), 1986 Op->getOperand(1), Op->getOperand(2)); 1987 case Intrinsic::mips_ilvr_b: 1988 case Intrinsic::mips_ilvr_h: 1989 case Intrinsic::mips_ilvr_w: 1990 case Intrinsic::mips_ilvr_d: 1991 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0), 1992 Op->getOperand(1), Op->getOperand(2)); 1993 case Intrinsic::mips_insert_b: 1994 case Intrinsic::mips_insert_h: 1995 case Intrinsic::mips_insert_w: 1996 case Intrinsic::mips_insert_d: 1997 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 1998 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2)); 1999 case Intrinsic::mips_insve_b: 2000 case Intrinsic::mips_insve_h: 2001 case Intrinsic::mips_insve_w: 2002 case Intrinsic::mips_insve_d: { 2003 // Report an error for out of range values. 2004 int64_t Max; 2005 switch (Intrinsic) { 2006 case Intrinsic::mips_insve_b: Max = 15; break; 2007 case Intrinsic::mips_insve_h: Max = 7; break; 2008 case Intrinsic::mips_insve_w: Max = 3; break; 2009 case Intrinsic::mips_insve_d: Max = 1; break; 2010 default: llvm_unreachable("Unmatched intrinsic"); 2011 } 2012 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2013 if (Value < 0 || Value > Max) 2014 report_fatal_error("Immediate out of range"); 2015 return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0), 2016 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3), 2017 DAG.getConstant(0, DL, MVT::i32)); 2018 } 2019 case Intrinsic::mips_ldi_b: 2020 case Intrinsic::mips_ldi_h: 2021 case Intrinsic::mips_ldi_w: 2022 case Intrinsic::mips_ldi_d: 2023 return lowerMSASplatImm(Op, 1, DAG, true); 2024 case Intrinsic::mips_lsa: 2025 case Intrinsic::mips_dlsa: { 2026 EVT ResTy = Op->getValueType(0); 2027 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 2028 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy, 2029 Op->getOperand(2), Op->getOperand(3))); 2030 } 2031 case Intrinsic::mips_maddv_b: 2032 case Intrinsic::mips_maddv_h: 2033 case Intrinsic::mips_maddv_w: 2034 case Intrinsic::mips_maddv_d: { 2035 EVT ResTy = Op->getValueType(0); 2036 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 2037 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 2038 Op->getOperand(2), Op->getOperand(3))); 2039 } 2040 case Intrinsic::mips_max_s_b: 2041 case Intrinsic::mips_max_s_h: 2042 case Intrinsic::mips_max_s_w: 2043 case Intrinsic::mips_max_s_d: 2044 return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), 2045 Op->getOperand(1), Op->getOperand(2)); 2046 case Intrinsic::mips_max_u_b: 2047 case Intrinsic::mips_max_u_h: 2048 case Intrinsic::mips_max_u_w: 2049 case Intrinsic::mips_max_u_d: 2050 return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), 2051 Op->getOperand(1), Op->getOperand(2)); 2052 case Intrinsic::mips_maxi_s_b: 2053 case Intrinsic::mips_maxi_s_h: 2054 case Intrinsic::mips_maxi_s_w: 2055 case Intrinsic::mips_maxi_s_d: 2056 return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), 2057 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 2058 case Intrinsic::mips_maxi_u_b: 2059 case Intrinsic::mips_maxi_u_h: 2060 case Intrinsic::mips_maxi_u_w: 2061 case Intrinsic::mips_maxi_u_d: 2062 return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), 2063 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2064 case Intrinsic::mips_min_s_b: 2065 case Intrinsic::mips_min_s_h: 2066 case Intrinsic::mips_min_s_w: 2067 case Intrinsic::mips_min_s_d: 2068 return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), 2069 Op->getOperand(1), Op->getOperand(2)); 2070 case Intrinsic::mips_min_u_b: 2071 case Intrinsic::mips_min_u_h: 2072 case Intrinsic::mips_min_u_w: 2073 case Intrinsic::mips_min_u_d: 2074 return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), 2075 Op->getOperand(1), Op->getOperand(2)); 2076 case Intrinsic::mips_mini_s_b: 2077 case Intrinsic::mips_mini_s_h: 2078 case Intrinsic::mips_mini_s_w: 2079 case Intrinsic::mips_mini_s_d: 2080 return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), 2081 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 2082 case Intrinsic::mips_mini_u_b: 2083 case Intrinsic::mips_mini_u_h: 2084 case Intrinsic::mips_mini_u_w: 2085 case Intrinsic::mips_mini_u_d: 2086 return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), 2087 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2088 case Intrinsic::mips_mod_s_b: 2089 case Intrinsic::mips_mod_s_h: 2090 case Intrinsic::mips_mod_s_w: 2091 case Intrinsic::mips_mod_s_d: 2092 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), 2093 Op->getOperand(2)); 2094 case Intrinsic::mips_mod_u_b: 2095 case Intrinsic::mips_mod_u_h: 2096 case Intrinsic::mips_mod_u_w: 2097 case Intrinsic::mips_mod_u_d: 2098 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), 2099 Op->getOperand(2)); 2100 case Intrinsic::mips_mulv_b: 2101 case Intrinsic::mips_mulv_h: 2102 case Intrinsic::mips_mulv_w: 2103 case Intrinsic::mips_mulv_d: 2104 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), 2105 Op->getOperand(2)); 2106 case Intrinsic::mips_msubv_b: 2107 case Intrinsic::mips_msubv_h: 2108 case Intrinsic::mips_msubv_w: 2109 case Intrinsic::mips_msubv_d: { 2110 EVT ResTy = Op->getValueType(0); 2111 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), 2112 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 2113 Op->getOperand(2), Op->getOperand(3))); 2114 } 2115 case Intrinsic::mips_nlzc_b: 2116 case Intrinsic::mips_nlzc_h: 2117 case Intrinsic::mips_nlzc_w: 2118 case Intrinsic::mips_nlzc_d: 2119 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); 2120 case Intrinsic::mips_nor_v: { 2121 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2122 Op->getOperand(1), Op->getOperand(2)); 2123 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2124 } 2125 case Intrinsic::mips_nori_b: { 2126 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2127 Op->getOperand(1), 2128 lowerMSASplatImm(Op, 2, DAG)); 2129 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2130 } 2131 case Intrinsic::mips_or_v: 2132 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), 2133 Op->getOperand(2)); 2134 case Intrinsic::mips_ori_b: 2135 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2136 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2137 case Intrinsic::mips_pckev_b: 2138 case Intrinsic::mips_pckev_h: 2139 case Intrinsic::mips_pckev_w: 2140 case Intrinsic::mips_pckev_d: 2141 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0), 2142 Op->getOperand(1), Op->getOperand(2)); 2143 case Intrinsic::mips_pckod_b: 2144 case Intrinsic::mips_pckod_h: 2145 case Intrinsic::mips_pckod_w: 2146 case Intrinsic::mips_pckod_d: 2147 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0), 2148 Op->getOperand(1), Op->getOperand(2)); 2149 case Intrinsic::mips_pcnt_b: 2150 case Intrinsic::mips_pcnt_h: 2151 case Intrinsic::mips_pcnt_w: 2152 case Intrinsic::mips_pcnt_d: 2153 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); 2154 case Intrinsic::mips_sat_s_b: 2155 case Intrinsic::mips_sat_s_h: 2156 case Intrinsic::mips_sat_s_w: 2157 case Intrinsic::mips_sat_s_d: 2158 case Intrinsic::mips_sat_u_b: 2159 case Intrinsic::mips_sat_u_h: 2160 case Intrinsic::mips_sat_u_w: 2161 case Intrinsic::mips_sat_u_d: { 2162 // Report an error for out of range values. 2163 int64_t Max; 2164 switch (Intrinsic) { 2165 case Intrinsic::mips_sat_s_b: 2166 case Intrinsic::mips_sat_u_b: Max = 7; break; 2167 case Intrinsic::mips_sat_s_h: 2168 case Intrinsic::mips_sat_u_h: Max = 15; break; 2169 case Intrinsic::mips_sat_s_w: 2170 case Intrinsic::mips_sat_u_w: Max = 31; break; 2171 case Intrinsic::mips_sat_s_d: 2172 case Intrinsic::mips_sat_u_d: Max = 63; break; 2173 default: llvm_unreachable("Unmatched intrinsic"); 2174 } 2175 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2176 if (Value < 0 || Value > Max) 2177 report_fatal_error("Immediate out of range"); 2178 return SDValue(); 2179 } 2180 case Intrinsic::mips_shf_b: 2181 case Intrinsic::mips_shf_h: 2182 case Intrinsic::mips_shf_w: { 2183 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2184 if (Value < 0 || Value > 255) 2185 report_fatal_error("Immediate out of range"); 2186 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0), 2187 Op->getOperand(2), Op->getOperand(1)); 2188 } 2189 case Intrinsic::mips_sldi_b: 2190 case Intrinsic::mips_sldi_h: 2191 case Intrinsic::mips_sldi_w: 2192 case Intrinsic::mips_sldi_d: { 2193 // Report an error for out of range values. 2194 int64_t Max; 2195 switch (Intrinsic) { 2196 case Intrinsic::mips_sldi_b: Max = 15; break; 2197 case Intrinsic::mips_sldi_h: Max = 7; break; 2198 case Intrinsic::mips_sldi_w: Max = 3; break; 2199 case Intrinsic::mips_sldi_d: Max = 1; break; 2200 default: llvm_unreachable("Unmatched intrinsic"); 2201 } 2202 int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue(); 2203 if (Value < 0 || Value > Max) 2204 report_fatal_error("Immediate out of range"); 2205 return SDValue(); 2206 } 2207 case Intrinsic::mips_sll_b: 2208 case Intrinsic::mips_sll_h: 2209 case Intrinsic::mips_sll_w: 2210 case Intrinsic::mips_sll_d: 2211 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), 2212 Op->getOperand(2)); 2213 case Intrinsic::mips_slli_b: 2214 case Intrinsic::mips_slli_h: 2215 case Intrinsic::mips_slli_w: 2216 case Intrinsic::mips_slli_d: 2217 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), 2218 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2219 case Intrinsic::mips_splat_b: 2220 case Intrinsic::mips_splat_h: 2221 case Intrinsic::mips_splat_w: 2222 case Intrinsic::mips_splat_d: 2223 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle 2224 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because 2225 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32. 2226 // Instead we lower to MipsISD::VSHF and match from there. 2227 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2228 lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1), 2229 Op->getOperand(1)); 2230 case Intrinsic::mips_splati_b: 2231 case Intrinsic::mips_splati_h: 2232 case Intrinsic::mips_splati_w: 2233 case Intrinsic::mips_splati_d: 2234 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2235 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), 2236 Op->getOperand(1)); 2237 case Intrinsic::mips_sra_b: 2238 case Intrinsic::mips_sra_h: 2239 case Intrinsic::mips_sra_w: 2240 case Intrinsic::mips_sra_d: 2241 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), 2242 Op->getOperand(2)); 2243 case Intrinsic::mips_srai_b: 2244 case Intrinsic::mips_srai_h: 2245 case Intrinsic::mips_srai_w: 2246 case Intrinsic::mips_srai_d: 2247 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), 2248 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2249 case Intrinsic::mips_srari_b: 2250 case Intrinsic::mips_srari_h: 2251 case Intrinsic::mips_srari_w: 2252 case Intrinsic::mips_srari_d: { 2253 // Report an error for out of range values. 2254 int64_t Max; 2255 switch (Intrinsic) { 2256 case Intrinsic::mips_srari_b: Max = 7; break; 2257 case Intrinsic::mips_srari_h: Max = 15; break; 2258 case Intrinsic::mips_srari_w: Max = 31; break; 2259 case Intrinsic::mips_srari_d: Max = 63; break; 2260 default: llvm_unreachable("Unmatched intrinsic"); 2261 } 2262 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2263 if (Value < 0 || Value > Max) 2264 report_fatal_error("Immediate out of range"); 2265 return SDValue(); 2266 } 2267 case Intrinsic::mips_srl_b: 2268 case Intrinsic::mips_srl_h: 2269 case Intrinsic::mips_srl_w: 2270 case Intrinsic::mips_srl_d: 2271 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), 2272 Op->getOperand(2)); 2273 case Intrinsic::mips_srli_b: 2274 case Intrinsic::mips_srli_h: 2275 case Intrinsic::mips_srli_w: 2276 case Intrinsic::mips_srli_d: 2277 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), 2278 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2279 case Intrinsic::mips_srlri_b: 2280 case Intrinsic::mips_srlri_h: 2281 case Intrinsic::mips_srlri_w: 2282 case Intrinsic::mips_srlri_d: { 2283 // Report an error for out of range values. 2284 int64_t Max; 2285 switch (Intrinsic) { 2286 case Intrinsic::mips_srlri_b: Max = 7; break; 2287 case Intrinsic::mips_srlri_h: Max = 15; break; 2288 case Intrinsic::mips_srlri_w: Max = 31; break; 2289 case Intrinsic::mips_srlri_d: Max = 63; break; 2290 default: llvm_unreachable("Unmatched intrinsic"); 2291 } 2292 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2293 if (Value < 0 || Value > Max) 2294 report_fatal_error("Immediate out of range"); 2295 return SDValue(); 2296 } 2297 case Intrinsic::mips_subv_b: 2298 case Intrinsic::mips_subv_h: 2299 case Intrinsic::mips_subv_w: 2300 case Intrinsic::mips_subv_d: 2301 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), 2302 Op->getOperand(2)); 2303 case Intrinsic::mips_subvi_b: 2304 case Intrinsic::mips_subvi_h: 2305 case Intrinsic::mips_subvi_w: 2306 case Intrinsic::mips_subvi_d: 2307 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), 2308 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2309 case Intrinsic::mips_vshf_b: 2310 case Intrinsic::mips_vshf_h: 2311 case Intrinsic::mips_vshf_w: 2312 case Intrinsic::mips_vshf_d: 2313 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2314 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 2315 case Intrinsic::mips_xor_v: 2316 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), 2317 Op->getOperand(2)); 2318 case Intrinsic::mips_xori_b: 2319 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), 2320 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2321 case Intrinsic::thread_pointer: { 2322 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2323 return DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT); 2324 } 2325 } 2326 } 2327 2328 static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2329 const MipsSubtarget &Subtarget) { 2330 SDLoc DL(Op); 2331 SDValue ChainIn = Op->getOperand(0); 2332 SDValue Address = Op->getOperand(2); 2333 SDValue Offset = Op->getOperand(3); 2334 EVT ResTy = Op->getValueType(0); 2335 EVT PtrTy = Address->getValueType(0); 2336 2337 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2338 // however takes an i32 signed constant offset. The actual type of the 2339 // intrinsic is a scaled signed i10. 2340 if (Subtarget.isABI_N64()) 2341 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2342 2343 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2344 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), 2345 /* Alignment = */ 16); 2346 } 2347 2348 SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 2349 SelectionDAG &DAG) const { 2350 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2351 switch (Intr) { 2352 default: 2353 return SDValue(); 2354 case Intrinsic::mips_extp: 2355 return lowerDSPIntr(Op, DAG, MipsISD::EXTP); 2356 case Intrinsic::mips_extpdp: 2357 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); 2358 case Intrinsic::mips_extr_w: 2359 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); 2360 case Intrinsic::mips_extr_r_w: 2361 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); 2362 case Intrinsic::mips_extr_rs_w: 2363 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); 2364 case Intrinsic::mips_extr_s_h: 2365 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); 2366 case Intrinsic::mips_mthlip: 2367 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); 2368 case Intrinsic::mips_mulsaq_s_w_ph: 2369 return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); 2370 case Intrinsic::mips_maq_s_w_phl: 2371 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); 2372 case Intrinsic::mips_maq_s_w_phr: 2373 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); 2374 case Intrinsic::mips_maq_sa_w_phl: 2375 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); 2376 case Intrinsic::mips_maq_sa_w_phr: 2377 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); 2378 case Intrinsic::mips_dpaq_s_w_ph: 2379 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); 2380 case Intrinsic::mips_dpsq_s_w_ph: 2381 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); 2382 case Intrinsic::mips_dpaq_sa_l_w: 2383 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); 2384 case Intrinsic::mips_dpsq_sa_l_w: 2385 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); 2386 case Intrinsic::mips_dpaqx_s_w_ph: 2387 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); 2388 case Intrinsic::mips_dpaqx_sa_w_ph: 2389 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); 2390 case Intrinsic::mips_dpsqx_s_w_ph: 2391 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); 2392 case Intrinsic::mips_dpsqx_sa_w_ph: 2393 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); 2394 case Intrinsic::mips_ld_b: 2395 case Intrinsic::mips_ld_h: 2396 case Intrinsic::mips_ld_w: 2397 case Intrinsic::mips_ld_d: 2398 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget); 2399 } 2400 } 2401 2402 static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2403 const MipsSubtarget &Subtarget) { 2404 SDLoc DL(Op); 2405 SDValue ChainIn = Op->getOperand(0); 2406 SDValue Value = Op->getOperand(2); 2407 SDValue Address = Op->getOperand(3); 2408 SDValue Offset = Op->getOperand(4); 2409 EVT PtrTy = Address->getValueType(0); 2410 2411 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2412 // however takes an i32 signed constant offset. The actual type of the 2413 // intrinsic is a scaled signed i10. 2414 if (Subtarget.isABI_N64()) 2415 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2416 2417 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2418 2419 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), 2420 /* Alignment = */ 16); 2421 } 2422 2423 SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, 2424 SelectionDAG &DAG) const { 2425 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2426 switch (Intr) { 2427 default: 2428 return SDValue(); 2429 case Intrinsic::mips_st_b: 2430 case Intrinsic::mips_st_h: 2431 case Intrinsic::mips_st_w: 2432 case Intrinsic::mips_st_d: 2433 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget); 2434 } 2435 } 2436 2437 /// \brief Check if the given BuildVectorSDNode is a splat. 2438 /// This method currently relies on DAG nodes being reused when equivalent, 2439 /// so it's possible for this to return false even when isConstantSplat returns 2440 /// true. 2441 static bool isSplatVector(const BuildVectorSDNode *N) { 2442 unsigned int nOps = N->getNumOperands(); 2443 assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector"); 2444 2445 SDValue Operand0 = N->getOperand(0); 2446 2447 for (unsigned int i = 1; i < nOps; ++i) { 2448 if (N->getOperand(i) != Operand0) 2449 return false; 2450 } 2451 2452 return true; 2453 } 2454 2455 // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. 2456 // 2457 // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We 2458 // choose to sign-extend but we could have equally chosen zero-extend. The 2459 // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT 2460 // result into this node later (possibly changing it to a zero-extend in the 2461 // process). 2462 SDValue MipsSETargetLowering:: 2463 lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { 2464 SDLoc DL(Op); 2465 EVT ResTy = Op->getValueType(0); 2466 SDValue Op0 = Op->getOperand(0); 2467 EVT VecTy = Op0->getValueType(0); 2468 2469 if (!VecTy.is128BitVector()) 2470 return SDValue(); 2471 2472 if (ResTy.isInteger()) { 2473 SDValue Op1 = Op->getOperand(1); 2474 EVT EltTy = VecTy.getVectorElementType(); 2475 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, 2476 DAG.getValueType(EltTy)); 2477 } 2478 2479 return Op; 2480 } 2481 2482 static bool isConstantOrUndef(const SDValue Op) { 2483 if (Op->isUndef()) 2484 return true; 2485 if (isa<ConstantSDNode>(Op)) 2486 return true; 2487 if (isa<ConstantFPSDNode>(Op)) 2488 return true; 2489 return false; 2490 } 2491 2492 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 2493 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 2494 if (isConstantOrUndef(Op->getOperand(i))) 2495 return true; 2496 return false; 2497 } 2498 2499 // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the 2500 // backend. 2501 // 2502 // Lowers according to the following rules: 2503 // - Constant splats are legal as-is as long as the SplatBitSize is a power of 2504 // 2 less than or equal to 64 and the value fits into a signed 10-bit 2505 // immediate 2506 // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize 2507 // is a power of 2 less than or equal to 64 and the value does not fit into a 2508 // signed 10-bit immediate 2509 // - Non-constant splats are legal as-is. 2510 // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. 2511 // - All others are illegal and must be expanded. 2512 SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, 2513 SelectionDAG &DAG) const { 2514 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 2515 EVT ResTy = Op->getValueType(0); 2516 SDLoc DL(Op); 2517 APInt SplatValue, SplatUndef; 2518 unsigned SplatBitSize; 2519 bool HasAnyUndefs; 2520 2521 if (!Subtarget.hasMSA() || !ResTy.is128BitVector()) 2522 return SDValue(); 2523 2524 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 2525 HasAnyUndefs, 8, 2526 !Subtarget.isLittle()) && SplatBitSize <= 64) { 2527 // We can only cope with 8, 16, 32, or 64-bit elements 2528 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 2529 SplatBitSize != 64) 2530 return SDValue(); 2531 2532 // If the value fits into a simm10 then we can use ldi.[bhwd] 2533 // However, if it isn't an integer type we will have to bitcast from an 2534 // integer type first. Also, if there are any undefs, we must lower them 2535 // to defined values first. 2536 if (ResTy.isInteger() && !HasAnyUndefs && SplatValue.isSignedIntN(10)) 2537 return Op; 2538 2539 EVT ViaVecTy; 2540 2541 switch (SplatBitSize) { 2542 default: 2543 return SDValue(); 2544 case 8: 2545 ViaVecTy = MVT::v16i8; 2546 break; 2547 case 16: 2548 ViaVecTy = MVT::v8i16; 2549 break; 2550 case 32: 2551 ViaVecTy = MVT::v4i32; 2552 break; 2553 case 64: 2554 // There's no fill.d to fall back on for 64-bit values 2555 return SDValue(); 2556 } 2557 2558 // SelectionDAG::getConstant will promote SplatValue appropriately. 2559 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); 2560 2561 // Bitcast to the type we originally wanted 2562 if (ViaVecTy != ResTy) 2563 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 2564 2565 return Result; 2566 } else if (isSplatVector(Node)) 2567 return Op; 2568 else if (!isConstantOrUndefBUILD_VECTOR(Node)) { 2569 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 2570 // The resulting code is the same length as the expansion, but it doesn't 2571 // use memory operations 2572 EVT ResTy = Node->getValueType(0); 2573 2574 assert(ResTy.isVector()); 2575 2576 unsigned NumElts = ResTy.getVectorNumElements(); 2577 SDValue Vector = DAG.getUNDEF(ResTy); 2578 for (unsigned i = 0; i < NumElts; ++i) { 2579 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 2580 Node->getOperand(i), 2581 DAG.getConstant(i, DL, MVT::i32)); 2582 } 2583 return Vector; 2584 } 2585 2586 return SDValue(); 2587 } 2588 2589 // Lower VECTOR_SHUFFLE into SHF (if possible). 2590 // 2591 // SHF splits the vector into blocks of four elements, then shuffles these 2592 // elements according to a <4 x i2> constant (encoded as an integer immediate). 2593 // 2594 // It is therefore possible to lower into SHF when the mask takes the form: 2595 // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> 2596 // When undef's appear they are treated as if they were whatever value is 2597 // necessary in order to fit the above forms. 2598 // 2599 // For example: 2600 // %2 = shufflevector <8 x i16> %0, <8 x i16> undef, 2601 // <8 x i32> <i32 3, i32 2, i32 1, i32 0, 2602 // i32 7, i32 6, i32 5, i32 4> 2603 // is lowered to: 2604 // (SHF_H $w0, $w1, 27) 2605 // where the 27 comes from: 2606 // 3 + (2 << 2) + (1 << 4) + (0 << 6) 2607 static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, 2608 SmallVector<int, 16> Indices, 2609 SelectionDAG &DAG) { 2610 int SHFIndices[4] = { -1, -1, -1, -1 }; 2611 2612 if (Indices.size() < 4) 2613 return SDValue(); 2614 2615 for (unsigned i = 0; i < 4; ++i) { 2616 for (unsigned j = i; j < Indices.size(); j += 4) { 2617 int Idx = Indices[j]; 2618 2619 // Convert from vector index to 4-element subvector index 2620 // If an index refers to an element outside of the subvector then give up 2621 if (Idx != -1) { 2622 Idx -= 4 * (j / 4); 2623 if (Idx < 0 || Idx >= 4) 2624 return SDValue(); 2625 } 2626 2627 // If the mask has an undef, replace it with the current index. 2628 // Note that it might still be undef if the current index is also undef 2629 if (SHFIndices[i] == -1) 2630 SHFIndices[i] = Idx; 2631 2632 // Check that non-undef values are the same as in the mask. If they 2633 // aren't then give up 2634 if (!(Idx == -1 || Idx == SHFIndices[i])) 2635 return SDValue(); 2636 } 2637 } 2638 2639 // Calculate the immediate. Replace any remaining undefs with zero 2640 APInt Imm(32, 0); 2641 for (int i = 3; i >= 0; --i) { 2642 int Idx = SHFIndices[i]; 2643 2644 if (Idx == -1) 2645 Idx = 0; 2646 2647 Imm <<= 2; 2648 Imm |= Idx & 0x3; 2649 } 2650 2651 SDLoc DL(Op); 2652 return DAG.getNode(MipsISD::SHF, DL, ResTy, 2653 DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0)); 2654 } 2655 2656 /// Determine whether a range fits a regular pattern of values. 2657 /// This function accounts for the possibility of jumping over the End iterator. 2658 template <typename ValType> 2659 static bool 2660 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, 2661 unsigned CheckStride, 2662 typename SmallVectorImpl<ValType>::const_iterator End, 2663 ValType ExpectedIndex, unsigned ExpectedIndexStride) { 2664 auto &I = Begin; 2665 2666 while (I != End) { 2667 if (*I != -1 && *I != ExpectedIndex) 2668 return false; 2669 ExpectedIndex += ExpectedIndexStride; 2670 2671 // Incrementing past End is undefined behaviour so we must increment one 2672 // step at a time and check for End at each step. 2673 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) 2674 ; // Empty loop body. 2675 } 2676 return true; 2677 } 2678 2679 // Determine whether VECTOR_SHUFFLE is a SPLATI. 2680 // 2681 // It is a SPLATI when the mask is: 2682 // <x, x, x, ...> 2683 // where x is any valid index. 2684 // 2685 // When undef's appear in the mask they are treated as if they were whatever 2686 // value is necessary in order to fit the above form. 2687 static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, 2688 SmallVector<int, 16> Indices, 2689 SelectionDAG &DAG) { 2690 assert((Indices.size() % 2) == 0); 2691 2692 int SplatIndex = -1; 2693 for (const auto &V : Indices) { 2694 if (V != -1) { 2695 SplatIndex = V; 2696 break; 2697 } 2698 } 2699 2700 return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex, 2701 0); 2702 } 2703 2704 // Lower VECTOR_SHUFFLE into ILVEV (if possible). 2705 // 2706 // ILVEV interleaves the even elements from each vector. 2707 // 2708 // It is possible to lower into ILVEV when the mask consists of two of the 2709 // following forms interleaved: 2710 // <0, 2, 4, ...> 2711 // <n, n+2, n+4, ...> 2712 // where n is the number of elements in the vector. 2713 // For example: 2714 // <0, 0, 2, 2, 4, 4, ...> 2715 // <0, n, 2, n+2, 4, n+4, ...> 2716 // 2717 // When undef's appear in the mask they are treated as if they were whatever 2718 // value is necessary in order to fit the above forms. 2719 static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, 2720 SmallVector<int, 16> Indices, 2721 SelectionDAG &DAG) { 2722 assert((Indices.size() % 2) == 0); 2723 2724 SDValue Wt; 2725 SDValue Ws; 2726 const auto &Begin = Indices.begin(); 2727 const auto &End = Indices.end(); 2728 2729 // Check even elements are taken from the even elements of one half or the 2730 // other and pick an operand accordingly. 2731 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2)) 2732 Wt = Op->getOperand(0); 2733 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2)) 2734 Wt = Op->getOperand(1); 2735 else 2736 return SDValue(); 2737 2738 // Check odd elements are taken from the even elements of one half or the 2739 // other and pick an operand accordingly. 2740 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2)) 2741 Ws = Op->getOperand(0); 2742 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2)) 2743 Ws = Op->getOperand(1); 2744 else 2745 return SDValue(); 2746 2747 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt); 2748 } 2749 2750 // Lower VECTOR_SHUFFLE into ILVOD (if possible). 2751 // 2752 // ILVOD interleaves the odd elements from each vector. 2753 // 2754 // It is possible to lower into ILVOD when the mask consists of two of the 2755 // following forms interleaved: 2756 // <1, 3, 5, ...> 2757 // <n+1, n+3, n+5, ...> 2758 // where n is the number of elements in the vector. 2759 // For example: 2760 // <1, 1, 3, 3, 5, 5, ...> 2761 // <1, n+1, 3, n+3, 5, n+5, ...> 2762 // 2763 // When undef's appear in the mask they are treated as if they were whatever 2764 // value is necessary in order to fit the above forms. 2765 static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, 2766 SmallVector<int, 16> Indices, 2767 SelectionDAG &DAG) { 2768 assert((Indices.size() % 2) == 0); 2769 2770 SDValue Wt; 2771 SDValue Ws; 2772 const auto &Begin = Indices.begin(); 2773 const auto &End = Indices.end(); 2774 2775 // Check even elements are taken from the odd elements of one half or the 2776 // other and pick an operand accordingly. 2777 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2)) 2778 Wt = Op->getOperand(0); 2779 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2)) 2780 Wt = Op->getOperand(1); 2781 else 2782 return SDValue(); 2783 2784 // Check odd elements are taken from the odd elements of one half or the 2785 // other and pick an operand accordingly. 2786 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2)) 2787 Ws = Op->getOperand(0); 2788 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2)) 2789 Ws = Op->getOperand(1); 2790 else 2791 return SDValue(); 2792 2793 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Wt, Ws); 2794 } 2795 2796 // Lower VECTOR_SHUFFLE into ILVR (if possible). 2797 // 2798 // ILVR interleaves consecutive elements from the right (lowest-indexed) half of 2799 // each vector. 2800 // 2801 // It is possible to lower into ILVR when the mask consists of two of the 2802 // following forms interleaved: 2803 // <0, 1, 2, ...> 2804 // <n, n+1, n+2, ...> 2805 // where n is the number of elements in the vector. 2806 // For example: 2807 // <0, 0, 1, 1, 2, 2, ...> 2808 // <0, n, 1, n+1, 2, n+2, ...> 2809 // 2810 // When undef's appear in the mask they are treated as if they were whatever 2811 // value is necessary in order to fit the above forms. 2812 static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, 2813 SmallVector<int, 16> Indices, 2814 SelectionDAG &DAG) { 2815 assert((Indices.size() % 2) == 0); 2816 2817 SDValue Wt; 2818 SDValue Ws; 2819 const auto &Begin = Indices.begin(); 2820 const auto &End = Indices.end(); 2821 2822 // Check even elements are taken from the right (lowest-indexed) elements of 2823 // one half or the other and pick an operand accordingly. 2824 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1)) 2825 Wt = Op->getOperand(0); 2826 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1)) 2827 Wt = Op->getOperand(1); 2828 else 2829 return SDValue(); 2830 2831 // Check odd elements are taken from the right (lowest-indexed) elements of 2832 // one half or the other and pick an operand accordingly. 2833 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1)) 2834 Ws = Op->getOperand(0); 2835 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1)) 2836 Ws = Op->getOperand(1); 2837 else 2838 return SDValue(); 2839 2840 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt); 2841 } 2842 2843 // Lower VECTOR_SHUFFLE into ILVL (if possible). 2844 // 2845 // ILVL interleaves consecutive elements from the left (highest-indexed) half 2846 // of each vector. 2847 // 2848 // It is possible to lower into ILVL when the mask consists of two of the 2849 // following forms interleaved: 2850 // <x, x+1, x+2, ...> 2851 // <n+x, n+x+1, n+x+2, ...> 2852 // where n is the number of elements in the vector and x is half n. 2853 // For example: 2854 // <x, x, x+1, x+1, x+2, x+2, ...> 2855 // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> 2856 // 2857 // When undef's appear in the mask they are treated as if they were whatever 2858 // value is necessary in order to fit the above forms. 2859 static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, 2860 SmallVector<int, 16> Indices, 2861 SelectionDAG &DAG) { 2862 assert((Indices.size() % 2) == 0); 2863 2864 unsigned HalfSize = Indices.size() / 2; 2865 SDValue Wt; 2866 SDValue Ws; 2867 const auto &Begin = Indices.begin(); 2868 const auto &End = Indices.end(); 2869 2870 // Check even elements are taken from the left (highest-indexed) elements of 2871 // one half or the other and pick an operand accordingly. 2872 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1)) 2873 Wt = Op->getOperand(0); 2874 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1)) 2875 Wt = Op->getOperand(1); 2876 else 2877 return SDValue(); 2878 2879 // Check odd elements are taken from the left (highest-indexed) elements of 2880 // one half or the other and pick an operand accordingly. 2881 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1)) 2882 Ws = Op->getOperand(0); 2883 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize, 2884 1)) 2885 Ws = Op->getOperand(1); 2886 else 2887 return SDValue(); 2888 2889 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt); 2890 } 2891 2892 // Lower VECTOR_SHUFFLE into PCKEV (if possible). 2893 // 2894 // PCKEV copies the even elements of each vector into the result vector. 2895 // 2896 // It is possible to lower into PCKEV when the mask consists of two of the 2897 // following forms concatenated: 2898 // <0, 2, 4, ...> 2899 // <n, n+2, n+4, ...> 2900 // where n is the number of elements in the vector. 2901 // For example: 2902 // <0, 2, 4, ..., 0, 2, 4, ...> 2903 // <0, 2, 4, ..., n, n+2, n+4, ...> 2904 // 2905 // When undef's appear in the mask they are treated as if they were whatever 2906 // value is necessary in order to fit the above forms. 2907 static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, 2908 SmallVector<int, 16> Indices, 2909 SelectionDAG &DAG) { 2910 assert((Indices.size() % 2) == 0); 2911 2912 SDValue Wt; 2913 SDValue Ws; 2914 const auto &Begin = Indices.begin(); 2915 const auto &Mid = Indices.begin() + Indices.size() / 2; 2916 const auto &End = Indices.end(); 2917 2918 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2)) 2919 Wt = Op->getOperand(0); 2920 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2)) 2921 Wt = Op->getOperand(1); 2922 else 2923 return SDValue(); 2924 2925 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2)) 2926 Ws = Op->getOperand(0); 2927 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2)) 2928 Ws = Op->getOperand(1); 2929 else 2930 return SDValue(); 2931 2932 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt); 2933 } 2934 2935 // Lower VECTOR_SHUFFLE into PCKOD (if possible). 2936 // 2937 // PCKOD copies the odd elements of each vector into the result vector. 2938 // 2939 // It is possible to lower into PCKOD when the mask consists of two of the 2940 // following forms concatenated: 2941 // <1, 3, 5, ...> 2942 // <n+1, n+3, n+5, ...> 2943 // where n is the number of elements in the vector. 2944 // For example: 2945 // <1, 3, 5, ..., 1, 3, 5, ...> 2946 // <1, 3, 5, ..., n+1, n+3, n+5, ...> 2947 // 2948 // When undef's appear in the mask they are treated as if they were whatever 2949 // value is necessary in order to fit the above forms. 2950 static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, 2951 SmallVector<int, 16> Indices, 2952 SelectionDAG &DAG) { 2953 assert((Indices.size() % 2) == 0); 2954 2955 SDValue Wt; 2956 SDValue Ws; 2957 const auto &Begin = Indices.begin(); 2958 const auto &Mid = Indices.begin() + Indices.size() / 2; 2959 const auto &End = Indices.end(); 2960 2961 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2)) 2962 Wt = Op->getOperand(0); 2963 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2)) 2964 Wt = Op->getOperand(1); 2965 else 2966 return SDValue(); 2967 2968 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2)) 2969 Ws = Op->getOperand(0); 2970 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2)) 2971 Ws = Op->getOperand(1); 2972 else 2973 return SDValue(); 2974 2975 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt); 2976 } 2977 2978 // Lower VECTOR_SHUFFLE into VSHF. 2979 // 2980 // This mostly consists of converting the shuffle indices in Indices into a 2981 // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is 2982 // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, 2983 // if the type is v8i16 and all the indices are less than 8 then the second 2984 // operand is unused and can be replaced with anything. We choose to replace it 2985 // with the used operand since this reduces the number of instructions overall. 2986 static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, 2987 SmallVector<int, 16> Indices, 2988 SelectionDAG &DAG) { 2989 SmallVector<SDValue, 16> Ops; 2990 SDValue Op0; 2991 SDValue Op1; 2992 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); 2993 EVT MaskEltTy = MaskVecTy.getVectorElementType(); 2994 bool Using1stVec = false; 2995 bool Using2ndVec = false; 2996 SDLoc DL(Op); 2997 int ResTyNumElts = ResTy.getVectorNumElements(); 2998 2999 for (int i = 0; i < ResTyNumElts; ++i) { 3000 // Idx == -1 means UNDEF 3001 int Idx = Indices[i]; 3002 3003 if (0 <= Idx && Idx < ResTyNumElts) 3004 Using1stVec = true; 3005 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) 3006 Using2ndVec = true; 3007 } 3008 3009 for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end(); 3010 ++I) 3011 Ops.push_back(DAG.getTargetConstant(*I, DL, MaskEltTy)); 3012 3013 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); 3014 3015 if (Using1stVec && Using2ndVec) { 3016 Op0 = Op->getOperand(0); 3017 Op1 = Op->getOperand(1); 3018 } else if (Using1stVec) 3019 Op0 = Op1 = Op->getOperand(0); 3020 else if (Using2ndVec) 3021 Op0 = Op1 = Op->getOperand(1); 3022 else 3023 llvm_unreachable("shuffle vector mask references neither vector operand?"); 3024 3025 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. 3026 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> 3027 // VSHF concatenates the vectors in a bitwise fashion: 3028 // <0b00, 0b01> + <0b10, 0b11> -> 3029 // 0b0100 + 0b1110 -> 0b01001110 3030 // <0b10, 0b11, 0b00, 0b01> 3031 // We must therefore swap the operands to get the correct result. 3032 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0); 3033 } 3034 3035 // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the 3036 // indices in the shuffle. 3037 SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 3038 SelectionDAG &DAG) const { 3039 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op); 3040 EVT ResTy = Op->getValueType(0); 3041 3042 if (!ResTy.is128BitVector()) 3043 return SDValue(); 3044 3045 int ResTyNumElts = ResTy.getVectorNumElements(); 3046 SmallVector<int, 16> Indices; 3047 3048 for (int i = 0; i < ResTyNumElts; ++i) 3049 Indices.push_back(Node->getMaskElt(i)); 3050 3051 // splati.[bhwd] is preferable to the others but is matched from 3052 // MipsISD::VSHF. 3053 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG)) 3054 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 3055 SDValue Result; 3056 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG))) 3057 return Result; 3058 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG))) 3059 return Result; 3060 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG))) 3061 return Result; 3062 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG))) 3063 return Result; 3064 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG))) 3065 return Result; 3066 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG))) 3067 return Result; 3068 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG))) 3069 return Result; 3070 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 3071 } 3072 3073 MachineBasicBlock * 3074 MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI, 3075 MachineBasicBlock *BB) const { 3076 // $bb: 3077 // bposge32_pseudo $vr0 3078 // => 3079 // $bb: 3080 // bposge32 $tbb 3081 // $fbb: 3082 // li $vr2, 0 3083 // b $sink 3084 // $tbb: 3085 // li $vr1, 1 3086 // $sink: 3087 // $vr0 = phi($vr2, $fbb, $vr1, $tbb) 3088 3089 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3090 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3091 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3092 DebugLoc DL = MI.getDebugLoc(); 3093 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3094 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3095 MachineFunction *F = BB->getParent(); 3096 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3097 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3098 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3099 F->insert(It, FBB); 3100 F->insert(It, TBB); 3101 F->insert(It, Sink); 3102 3103 // Transfer the remainder of BB and its successor edges to Sink. 3104 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3105 BB->end()); 3106 Sink->transferSuccessorsAndUpdatePHIs(BB); 3107 3108 // Add successors. 3109 BB->addSuccessor(FBB); 3110 BB->addSuccessor(TBB); 3111 FBB->addSuccessor(Sink); 3112 TBB->addSuccessor(Sink); 3113 3114 // Insert the real bposge32 instruction to $BB. 3115 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); 3116 // Insert the real bposge32c instruction to $BB. 3117 BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB); 3118 3119 // Fill $FBB. 3120 unsigned VR2 = RegInfo.createVirtualRegister(RC); 3121 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) 3122 .addReg(Mips::ZERO).addImm(0); 3123 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3124 3125 // Fill $TBB. 3126 unsigned VR1 = RegInfo.createVirtualRegister(RC); 3127 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) 3128 .addReg(Mips::ZERO).addImm(1); 3129 3130 // Insert phi function to $Sink. 3131 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3132 MI.getOperand(0).getReg()) 3133 .addReg(VR2) 3134 .addMBB(FBB) 3135 .addReg(VR1) 3136 .addMBB(TBB); 3137 3138 MI.eraseFromParent(); // The pseudo instruction is gone now. 3139 return Sink; 3140 } 3141 3142 MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo( 3143 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { 3144 // $bb: 3145 // vany_nonzero $rd, $ws 3146 // => 3147 // $bb: 3148 // bnz.b $ws, $tbb 3149 // b $fbb 3150 // $fbb: 3151 // li $rd1, 0 3152 // b $sink 3153 // $tbb: 3154 // li $rd2, 1 3155 // $sink: 3156 // $rd = phi($rd1, $fbb, $rd2, $tbb) 3157 3158 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3159 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3160 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3161 DebugLoc DL = MI.getDebugLoc(); 3162 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3163 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3164 MachineFunction *F = BB->getParent(); 3165 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3166 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3167 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3168 F->insert(It, FBB); 3169 F->insert(It, TBB); 3170 F->insert(It, Sink); 3171 3172 // Transfer the remainder of BB and its successor edges to Sink. 3173 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3174 BB->end()); 3175 Sink->transferSuccessorsAndUpdatePHIs(BB); 3176 3177 // Add successors. 3178 BB->addSuccessor(FBB); 3179 BB->addSuccessor(TBB); 3180 FBB->addSuccessor(Sink); 3181 TBB->addSuccessor(Sink); 3182 3183 // Insert the real bnz.b instruction to $BB. 3184 BuildMI(BB, DL, TII->get(BranchOp)) 3185 .addReg(MI.getOperand(1).getReg()) 3186 .addMBB(TBB); 3187 3188 // Fill $FBB. 3189 unsigned RD1 = RegInfo.createVirtualRegister(RC); 3190 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) 3191 .addReg(Mips::ZERO).addImm(0); 3192 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3193 3194 // Fill $TBB. 3195 unsigned RD2 = RegInfo.createVirtualRegister(RC); 3196 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) 3197 .addReg(Mips::ZERO).addImm(1); 3198 3199 // Insert phi function to $Sink. 3200 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3201 MI.getOperand(0).getReg()) 3202 .addReg(RD1) 3203 .addMBB(FBB) 3204 .addReg(RD2) 3205 .addMBB(TBB); 3206 3207 MI.eraseFromParent(); // The pseudo instruction is gone now. 3208 return Sink; 3209 } 3210 3211 // Emit the COPY_FW pseudo instruction. 3212 // 3213 // copy_fw_pseudo $fd, $ws, n 3214 // => 3215 // copy_u_w $rt, $ws, $n 3216 // mtc1 $rt, $fd 3217 // 3218 // When n is zero, the equivalent operation can be performed with (potentially) 3219 // zero instructions due to register overlaps. This optimization is never valid 3220 // for lane 1 because it would require FR=0 mode which isn't supported by MSA. 3221 MachineBasicBlock * 3222 MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI, 3223 MachineBasicBlock *BB) const { 3224 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3225 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3226 DebugLoc DL = MI.getDebugLoc(); 3227 unsigned Fd = MI.getOperand(0).getReg(); 3228 unsigned Ws = MI.getOperand(1).getReg(); 3229 unsigned Lane = MI.getOperand(2).getImm(); 3230 3231 if (Lane == 0) { 3232 unsigned Wt = Ws; 3233 if (!Subtarget.useOddSPReg()) { 3234 // We must copy to an even-numbered MSA register so that the 3235 // single-precision sub-register is also guaranteed to be even-numbered. 3236 Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass); 3237 3238 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws); 3239 } 3240 3241 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3242 } else { 3243 unsigned Wt = RegInfo.createVirtualRegister( 3244 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : 3245 &Mips::MSA128WEvensRegClass); 3246 3247 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); 3248 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3249 } 3250 3251 MI.eraseFromParent(); // The pseudo instruction is gone now. 3252 return BB; 3253 } 3254 3255 // Emit the COPY_FD pseudo instruction. 3256 // 3257 // copy_fd_pseudo $fd, $ws, n 3258 // => 3259 // splati.d $wt, $ws, $n 3260 // copy $fd, $wt:sub_64 3261 // 3262 // When n is zero, the equivalent operation can be performed with (potentially) 3263 // zero instructions due to register overlaps. This optimization is always 3264 // valid because FR=1 mode which is the only supported mode in MSA. 3265 MachineBasicBlock * 3266 MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI, 3267 MachineBasicBlock *BB) const { 3268 assert(Subtarget.isFP64bit()); 3269 3270 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3271 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3272 unsigned Fd = MI.getOperand(0).getReg(); 3273 unsigned Ws = MI.getOperand(1).getReg(); 3274 unsigned Lane = MI.getOperand(2).getImm() * 2; 3275 DebugLoc DL = MI.getDebugLoc(); 3276 3277 if (Lane == 0) 3278 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64); 3279 else { 3280 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3281 3282 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1); 3283 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64); 3284 } 3285 3286 MI.eraseFromParent(); // The pseudo instruction is gone now. 3287 return BB; 3288 } 3289 3290 // Emit the INSERT_FW pseudo instruction. 3291 // 3292 // insert_fw_pseudo $wd, $wd_in, $n, $fs 3293 // => 3294 // subreg_to_reg $wt:sub_lo, $fs 3295 // insve_w $wd[$n], $wd_in, $wt[0] 3296 MachineBasicBlock * 3297 MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI, 3298 MachineBasicBlock *BB) const { 3299 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3300 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3301 DebugLoc DL = MI.getDebugLoc(); 3302 unsigned Wd = MI.getOperand(0).getReg(); 3303 unsigned Wd_in = MI.getOperand(1).getReg(); 3304 unsigned Lane = MI.getOperand(2).getImm(); 3305 unsigned Fs = MI.getOperand(3).getReg(); 3306 unsigned Wt = RegInfo.createVirtualRegister( 3307 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : 3308 &Mips::MSA128WEvensRegClass); 3309 3310 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3311 .addImm(0) 3312 .addReg(Fs) 3313 .addImm(Mips::sub_lo); 3314 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd) 3315 .addReg(Wd_in) 3316 .addImm(Lane) 3317 .addReg(Wt) 3318 .addImm(0); 3319 3320 MI.eraseFromParent(); // The pseudo instruction is gone now. 3321 return BB; 3322 } 3323 3324 // Emit the INSERT_FD pseudo instruction. 3325 // 3326 // insert_fd_pseudo $wd, $fs, n 3327 // => 3328 // subreg_to_reg $wt:sub_64, $fs 3329 // insve_d $wd[$n], $wd_in, $wt[0] 3330 MachineBasicBlock * 3331 MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI, 3332 MachineBasicBlock *BB) const { 3333 assert(Subtarget.isFP64bit()); 3334 3335 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3336 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3337 DebugLoc DL = MI.getDebugLoc(); 3338 unsigned Wd = MI.getOperand(0).getReg(); 3339 unsigned Wd_in = MI.getOperand(1).getReg(); 3340 unsigned Lane = MI.getOperand(2).getImm(); 3341 unsigned Fs = MI.getOperand(3).getReg(); 3342 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3343 3344 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3345 .addImm(0) 3346 .addReg(Fs) 3347 .addImm(Mips::sub_64); 3348 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd) 3349 .addReg(Wd_in) 3350 .addImm(Lane) 3351 .addReg(Wt) 3352 .addImm(0); 3353 3354 MI.eraseFromParent(); // The pseudo instruction is gone now. 3355 return BB; 3356 } 3357 3358 // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. 3359 // 3360 // For integer: 3361 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) 3362 // => 3363 // (SLL $lanetmp1, $lane, <log2size) 3364 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3365 // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs) 3366 // (NEG $lanetmp2, $lanetmp1) 3367 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3368 // 3369 // For floating point: 3370 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs) 3371 // => 3372 // (SUBREG_TO_REG $wt, $fs, <subreg>) 3373 // (SLL $lanetmp1, $lane, <log2size) 3374 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3375 // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0) 3376 // (NEG $lanetmp2, $lanetmp1) 3377 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3378 MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( 3379 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes, 3380 bool IsFP) const { 3381 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3382 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3383 DebugLoc DL = MI.getDebugLoc(); 3384 unsigned Wd = MI.getOperand(0).getReg(); 3385 unsigned SrcVecReg = MI.getOperand(1).getReg(); 3386 unsigned LaneReg = MI.getOperand(2).getReg(); 3387 unsigned SrcValReg = MI.getOperand(3).getReg(); 3388 3389 const TargetRegisterClass *VecRC = nullptr; 3390 // FIXME: This should be true for N32 too. 3391 const TargetRegisterClass *GPRRC = 3392 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3393 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0; 3394 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL; 3395 unsigned EltLog2Size; 3396 unsigned InsertOp = 0; 3397 unsigned InsveOp = 0; 3398 switch (EltSizeInBytes) { 3399 default: 3400 llvm_unreachable("Unexpected size"); 3401 case 1: 3402 EltLog2Size = 0; 3403 InsertOp = Mips::INSERT_B; 3404 InsveOp = Mips::INSVE_B; 3405 VecRC = &Mips::MSA128BRegClass; 3406 break; 3407 case 2: 3408 EltLog2Size = 1; 3409 InsertOp = Mips::INSERT_H; 3410 InsveOp = Mips::INSVE_H; 3411 VecRC = &Mips::MSA128HRegClass; 3412 break; 3413 case 4: 3414 EltLog2Size = 2; 3415 InsertOp = Mips::INSERT_W; 3416 InsveOp = Mips::INSVE_W; 3417 VecRC = &Mips::MSA128WRegClass; 3418 break; 3419 case 8: 3420 EltLog2Size = 3; 3421 InsertOp = Mips::INSERT_D; 3422 InsveOp = Mips::INSVE_D; 3423 VecRC = &Mips::MSA128DRegClass; 3424 break; 3425 } 3426 3427 if (IsFP) { 3428 unsigned Wt = RegInfo.createVirtualRegister(VecRC); 3429 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3430 .addImm(0) 3431 .addReg(SrcValReg) 3432 .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo); 3433 SrcValReg = Wt; 3434 } 3435 3436 // Convert the lane index into a byte index 3437 if (EltSizeInBytes != 1) { 3438 unsigned LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); 3439 BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1) 3440 .addReg(LaneReg) 3441 .addImm(EltLog2Size); 3442 LaneReg = LaneTmp1; 3443 } 3444 3445 // Rotate bytes around so that the desired lane is element zero 3446 unsigned WdTmp1 = RegInfo.createVirtualRegister(VecRC); 3447 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1) 3448 .addReg(SrcVecReg) 3449 .addReg(SrcVecReg) 3450 .addReg(LaneReg, 0, SubRegIdx); 3451 3452 unsigned WdTmp2 = RegInfo.createVirtualRegister(VecRC); 3453 if (IsFP) { 3454 // Use insve.df to insert to element zero 3455 BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2) 3456 .addReg(WdTmp1) 3457 .addImm(0) 3458 .addReg(SrcValReg) 3459 .addImm(0); 3460 } else { 3461 // Use insert.df to insert to element zero 3462 BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2) 3463 .addReg(WdTmp1) 3464 .addReg(SrcValReg) 3465 .addImm(0); 3466 } 3467 3468 // Rotate elements the rest of the way for a full rotation. 3469 // sld.df inteprets $rt modulo the number of columns so we only need to negate 3470 // the lane index to do this. 3471 unsigned LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); 3472 BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB), 3473 LaneTmp2) 3474 .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO) 3475 .addReg(LaneReg); 3476 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd) 3477 .addReg(WdTmp2) 3478 .addReg(WdTmp2) 3479 .addReg(LaneTmp2, 0, SubRegIdx); 3480 3481 MI.eraseFromParent(); // The pseudo instruction is gone now. 3482 return BB; 3483 } 3484 3485 // Emit the FILL_FW pseudo instruction. 3486 // 3487 // fill_fw_pseudo $wd, $fs 3488 // => 3489 // implicit_def $wt1 3490 // insert_subreg $wt2:subreg_lo, $wt1, $fs 3491 // splati.w $wd, $wt2[0] 3492 MachineBasicBlock * 3493 MipsSETargetLowering::emitFILL_FW(MachineInstr &MI, 3494 MachineBasicBlock *BB) const { 3495 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3496 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3497 DebugLoc DL = MI.getDebugLoc(); 3498 unsigned Wd = MI.getOperand(0).getReg(); 3499 unsigned Fs = MI.getOperand(1).getReg(); 3500 unsigned Wt1 = RegInfo.createVirtualRegister( 3501 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3502 : &Mips::MSA128WEvensRegClass); 3503 unsigned Wt2 = RegInfo.createVirtualRegister( 3504 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3505 : &Mips::MSA128WEvensRegClass); 3506 3507 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3508 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3509 .addReg(Wt1) 3510 .addReg(Fs) 3511 .addImm(Mips::sub_lo); 3512 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0); 3513 3514 MI.eraseFromParent(); // The pseudo instruction is gone now. 3515 return BB; 3516 } 3517 3518 // Emit the FILL_FD pseudo instruction. 3519 // 3520 // fill_fd_pseudo $wd, $fs 3521 // => 3522 // implicit_def $wt1 3523 // insert_subreg $wt2:subreg_64, $wt1, $fs 3524 // splati.d $wd, $wt2[0] 3525 MachineBasicBlock * 3526 MipsSETargetLowering::emitFILL_FD(MachineInstr &MI, 3527 MachineBasicBlock *BB) const { 3528 assert(Subtarget.isFP64bit()); 3529 3530 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3531 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3532 DebugLoc DL = MI.getDebugLoc(); 3533 unsigned Wd = MI.getOperand(0).getReg(); 3534 unsigned Fs = MI.getOperand(1).getReg(); 3535 unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3536 unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3537 3538 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3539 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3540 .addReg(Wt1) 3541 .addReg(Fs) 3542 .addImm(Mips::sub_64); 3543 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0); 3544 3545 MI.eraseFromParent(); // The pseudo instruction is gone now. 3546 return BB; 3547 } 3548 3549 // Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA 3550 // register. 3551 // 3552 // STF16 MSA128F16:$wd, mem_simm10:$addr 3553 // => 3554 // copy_u.h $rtemp,$wd[0] 3555 // sh $rtemp, $addr 3556 // 3557 // Safety: We can't use st.h & co as they would over write the memory after 3558 // the destination. It would require half floats be allocated 16 bytes(!) of 3559 // space. 3560 MachineBasicBlock * 3561 MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, 3562 MachineBasicBlock *BB) const { 3563 3564 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3565 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3566 DebugLoc DL = MI.getDebugLoc(); 3567 unsigned Ws = MI.getOperand(0).getReg(); 3568 unsigned Rt = MI.getOperand(1).getReg(); 3569 const MachineMemOperand &MMO = **MI.memoperands_begin(); 3570 unsigned Imm = MMO.getOffset(); 3571 3572 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3573 // spill and reload can expand as a GPR64 operand. Examine the 3574 // operand in detail and default to ABI. 3575 const TargetRegisterClass *RC = 3576 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3577 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3578 : &Mips::GPR64RegClass); 3579 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3580 unsigned Rs = RegInfo.createVirtualRegister(RC); 3581 3582 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0); 3583 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64)) 3584 .addReg(Rs) 3585 .addReg(Rt) 3586 .addImm(Imm) 3587 .addMemOperand(BB->getParent()->getMachineMemOperand( 3588 &MMO, MMO.getOffset(), MMO.getSize())); 3589 3590 MI.eraseFromParent(); 3591 return BB; 3592 } 3593 3594 // Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register. 3595 // 3596 // LD_F16 MSA128F16:$wd, mem_simm10:$addr 3597 // => 3598 // lh $rtemp, $addr 3599 // fill.h $wd, $rtemp 3600 // 3601 // Safety: We can't use ld.h & co as they over-read from the source. 3602 // Additionally, if the address is not modulo 16, 2 cases can occur: 3603 // a) Segmentation fault as the load instruction reads from a memory page 3604 // memory it's not supposed to. 3605 // b) The load crosses an implementation specific boundary, requiring OS 3606 // intervention. 3607 // 3608 MachineBasicBlock * 3609 MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, 3610 MachineBasicBlock *BB) const { 3611 3612 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3613 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3614 DebugLoc DL = MI.getDebugLoc(); 3615 unsigned Wd = MI.getOperand(0).getReg(); 3616 3617 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3618 // spill and reload can expand as a GPR64 operand. Examine the 3619 // operand in detail and default to ABI. 3620 const TargetRegisterClass *RC = 3621 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3622 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3623 : &Mips::GPR64RegClass); 3624 3625 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3626 unsigned Rt = RegInfo.createVirtualRegister(RC); 3627 3628 MachineInstrBuilder MIB = 3629 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt); 3630 for (unsigned i = 1; i < MI.getNumOperands(); i++) 3631 MIB.addOperand(MI.getOperand(i)); 3632 3633 BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt); 3634 3635 MI.eraseFromParent(); 3636 return BB; 3637 } 3638 3639 // Emit the FPROUND_PSEUDO instruction. 3640 // 3641 // Round an FGR64Opnd, FGR32Opnd to an f16. 3642 // 3643 // Safety: Cycle the operand through the GPRs so the result always ends up 3644 // the correct MSA register. 3645 // 3646 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs 3647 // / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register 3648 // (which they can be, as the MSA registers are defined to alias the 3649 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3650 // the correct register class. That requires operands be tie-able across 3651 // register classes which have a sub/super register class relationship. 3652 // 3653 // For FPG32Opnd: 3654 // 3655 // FPROUND MSA128F16:$wd, FGR32Opnd:$fs 3656 // => 3657 // mfc1 $rtemp, $fs 3658 // fill.w $rtemp, $wtemp 3659 // fexdo.w $wd, $wtemp, $wtemp 3660 // 3661 // For FPG64Opnd on mips32r2+: 3662 // 3663 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3664 // => 3665 // mfc1 $rtemp, $fs 3666 // fill.w $rtemp, $wtemp 3667 // mfhc1 $rtemp2, $fs 3668 // insert.w $wtemp[1], $rtemp2 3669 // insert.w $wtemp[3], $rtemp2 3670 // fexdo.w $wtemp2, $wtemp, $wtemp 3671 // fexdo.h $wd, $temp2, $temp2 3672 // 3673 // For FGR64Opnd on mips64r2+: 3674 // 3675 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3676 // => 3677 // dmfc1 $rtemp, $fs 3678 // fill.d $rtemp, $wtemp 3679 // fexdo.w $wtemp2, $wtemp, $wtemp 3680 // fexdo.h $wd, $wtemp2, $wtemp2 3681 // 3682 // Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the 3683 // undef bits are "just right" and the exception enable bits are 3684 // set. By using fill.w to replicate $fs into all elements over 3685 // insert.w for one element, we avoid that potiential case. If 3686 // fexdo.[hw] causes an exception in, the exception is valid and it 3687 // occurs for all elements. 3688 // 3689 MachineBasicBlock * 3690 MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, 3691 MachineBasicBlock *BB, 3692 bool IsFGR64) const { 3693 3694 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3695 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3696 // it. 3697 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3698 3699 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3700 3701 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3702 DebugLoc DL = MI.getDebugLoc(); 3703 unsigned Wd = MI.getOperand(0).getReg(); 3704 unsigned Fs = MI.getOperand(1).getReg(); 3705 3706 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3707 unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3708 const TargetRegisterClass *GPRRC = 3709 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3710 unsigned MFC1Opc = IsFGR64onMips64 ? Mips::DMFC1 : Mips::MFC1; 3711 unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; 3712 3713 // Perform the register class copy as mentioned above. 3714 unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC); 3715 BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs); 3716 BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp); 3717 unsigned WPHI = Wtemp; 3718 3719 if (!Subtarget.hasMips64() && IsFGR64) { 3720 unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3721 BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs); 3722 unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3723 unsigned Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3724 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2) 3725 .addReg(Wtemp) 3726 .addReg(Rtemp2) 3727 .addImm(1); 3728 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3) 3729 .addReg(Wtemp2) 3730 .addReg(Rtemp2) 3731 .addImm(3); 3732 WPHI = Wtemp3; 3733 } 3734 3735 if (IsFGR64) { 3736 unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3737 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2) 3738 .addReg(WPHI) 3739 .addReg(WPHI); 3740 WPHI = Wtemp2; 3741 } 3742 3743 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI); 3744 3745 MI.eraseFromParent(); 3746 return BB; 3747 } 3748 3749 // Emit the FPEXTEND_PSEUDO instruction. 3750 // 3751 // Expand an f16 to either a FGR32Opnd or FGR64Opnd. 3752 // 3753 // Safety: Cycle the result through the GPRs so the result always ends up 3754 // the correct floating point register. 3755 // 3756 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd 3757 // / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register 3758 // (which they can be, as the MSA registers are defined to alias the 3759 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3760 // the correct register class. That requires operands be tie-able across 3761 // register classes which have a sub/super register class relationship. I 3762 // haven't checked. 3763 // 3764 // For FGR32Opnd: 3765 // 3766 // FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws 3767 // => 3768 // fexupr.w $wtemp, $ws 3769 // copy_s.w $rtemp, $ws[0] 3770 // mtc1 $rtemp, $fd 3771 // 3772 // For FGR64Opnd on Mips64: 3773 // 3774 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3775 // => 3776 // fexupr.w $wtemp, $ws 3777 // fexupr.d $wtemp2, $wtemp 3778 // copy_s.d $rtemp, $wtemp2s[0] 3779 // dmtc1 $rtemp, $fd 3780 // 3781 // For FGR64Opnd on Mips32: 3782 // 3783 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3784 // => 3785 // fexupr.w $wtemp, $ws 3786 // fexupr.d $wtemp2, $wtemp 3787 // copy_s.w $rtemp, $wtemp2[0] 3788 // mtc1 $rtemp, $ftemp 3789 // copy_s.w $rtemp2, $wtemp2[1] 3790 // $fd = mthc1 $rtemp2, $ftemp 3791 // 3792 MachineBasicBlock * 3793 MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI, 3794 MachineBasicBlock *BB, 3795 bool IsFGR64) const { 3796 3797 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3798 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3799 // it. 3800 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3801 3802 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3803 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; 3804 3805 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3806 DebugLoc DL = MI.getDebugLoc(); 3807 unsigned Fd = MI.getOperand(0).getReg(); 3808 unsigned Ws = MI.getOperand(1).getReg(); 3809 3810 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3811 const TargetRegisterClass *GPRRC = 3812 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3813 unsigned MTC1Opc = IsFGR64onMips64 ? Mips::DMTC1 : Mips::MTC1; 3814 unsigned COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W; 3815 3816 unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3817 unsigned WPHI = Wtemp; 3818 3819 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws); 3820 if (IsFGR64) { 3821 WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3822 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp); 3823 } 3824 3825 // Perform the safety regclass copy mentioned above. 3826 unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC); 3827 unsigned FPRPHI = IsFGR64onMips32 3828 ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass) 3829 : Fd; 3830 BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0); 3831 BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp); 3832 3833 if (IsFGR64onMips32) { 3834 unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3835 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2) 3836 .addReg(WPHI) 3837 .addImm(1); 3838 BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd) 3839 .addReg(FPRPHI) 3840 .addReg(Rtemp2); 3841 } 3842 3843 MI.eraseFromParent(); 3844 return BB; 3845 } 3846 3847 // Emit the FEXP2_W_1 pseudo instructions. 3848 // 3849 // fexp2_w_1_pseudo $wd, $wt 3850 // => 3851 // ldi.w $ws, 1 3852 // fexp2.w $wd, $ws, $wt 3853 MachineBasicBlock * 3854 MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI, 3855 MachineBasicBlock *BB) const { 3856 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3857 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3858 const TargetRegisterClass *RC = &Mips::MSA128WRegClass; 3859 unsigned Ws1 = RegInfo.createVirtualRegister(RC); 3860 unsigned Ws2 = RegInfo.createVirtualRegister(RC); 3861 DebugLoc DL = MI.getDebugLoc(); 3862 3863 // Splat 1.0 into a vector 3864 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1); 3865 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1); 3866 3867 // Emit 1.0 * fexp2(Wt) 3868 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI.getOperand(0).getReg()) 3869 .addReg(Ws2) 3870 .addReg(MI.getOperand(1).getReg()); 3871 3872 MI.eraseFromParent(); // The pseudo instruction is gone now. 3873 return BB; 3874 } 3875 3876 // Emit the FEXP2_D_1 pseudo instructions. 3877 // 3878 // fexp2_d_1_pseudo $wd, $wt 3879 // => 3880 // ldi.d $ws, 1 3881 // fexp2.d $wd, $ws, $wt 3882 MachineBasicBlock * 3883 MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI, 3884 MachineBasicBlock *BB) const { 3885 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3886 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3887 const TargetRegisterClass *RC = &Mips::MSA128DRegClass; 3888 unsigned Ws1 = RegInfo.createVirtualRegister(RC); 3889 unsigned Ws2 = RegInfo.createVirtualRegister(RC); 3890 DebugLoc DL = MI.getDebugLoc(); 3891 3892 // Splat 1.0 into a vector 3893 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1); 3894 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1); 3895 3896 // Emit 1.0 * fexp2(Wt) 3897 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI.getOperand(0).getReg()) 3898 .addReg(Ws2) 3899 .addReg(MI.getOperand(1).getReg()); 3900 3901 MI.eraseFromParent(); // The pseudo instruction is gone now. 3902 return BB; 3903 } 3904