1 //===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Subclass of MipsTargetLowering specialized for mips32/64. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "MipsSEISelLowering.h" 14 #include "MipsMachineFunction.h" 15 #include "MipsRegisterInfo.h" 16 #include "MipsTargetMachine.h" 17 #include "llvm/ADT/APInt.h" 18 #include "llvm/CodeGen/MachineInstrBuilder.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/IR/Intrinsics.h" 21 #include "llvm/Support/CommandLine.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/ErrorHandling.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include "llvm/Target/TargetInstrInfo.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "mips-isel" 30 31 static cl::opt<bool> 32 UseMipsTailCalls("mips-tail-calls", cl::Hidden, 33 cl::desc("MIPS: permit tail calls."), cl::init(false)); 34 35 static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), 36 cl::desc("Expand double precision loads and " 37 "stores to their single precision " 38 "counterparts")); 39 40 MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, 41 const MipsSubtarget &STI) 42 : MipsTargetLowering(TM, STI) { 43 // Set up the register classes 44 addRegisterClass(MVT::i32, &Mips::GPR32RegClass); 45 46 if (Subtarget.isGP64bit()) 47 addRegisterClass(MVT::i64, &Mips::GPR64RegClass); 48 49 if (Subtarget.hasDSP() || Subtarget.hasMSA()) { 50 // Expand all truncating stores and extending loads. 51 for (MVT VT0 : MVT::vector_valuetypes()) { 52 for (MVT VT1 : MVT::vector_valuetypes()) { 53 setTruncStoreAction(VT0, VT1, Expand); 54 setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); 55 setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); 56 setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); 57 } 58 } 59 } 60 61 if (Subtarget.hasDSP()) { 62 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; 63 64 for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { 65 addRegisterClass(VecTys[i], &Mips::DSPRRegClass); 66 67 // Expand all builtin opcodes. 68 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 69 setOperationAction(Opc, VecTys[i], Expand); 70 71 setOperationAction(ISD::ADD, VecTys[i], Legal); 72 setOperationAction(ISD::SUB, VecTys[i], Legal); 73 setOperationAction(ISD::LOAD, VecTys[i], Legal); 74 setOperationAction(ISD::STORE, VecTys[i], Legal); 75 setOperationAction(ISD::BITCAST, VecTys[i], Legal); 76 } 77 78 setTargetDAGCombine(ISD::SHL); 79 setTargetDAGCombine(ISD::SRA); 80 setTargetDAGCombine(ISD::SRL); 81 setTargetDAGCombine(ISD::SETCC); 82 setTargetDAGCombine(ISD::VSELECT); 83 } 84 85 if (Subtarget.hasDSPR2()) 86 setOperationAction(ISD::MUL, MVT::v2i16, Legal); 87 88 if (Subtarget.hasMSA()) { 89 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); 90 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); 91 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); 92 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass); 93 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass); 94 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); 95 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); 96 97 // f16 is a storage-only type, always promote it to f32. 98 addRegisterClass(MVT::f16, &Mips::MSA128HRegClass); 99 setOperationAction(ISD::SETCC, MVT::f16, Promote); 100 setOperationAction(ISD::BR_CC, MVT::f16, Promote); 101 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); 102 setOperationAction(ISD::SELECT, MVT::f16, Promote); 103 setOperationAction(ISD::FADD, MVT::f16, Promote); 104 setOperationAction(ISD::FSUB, MVT::f16, Promote); 105 setOperationAction(ISD::FMUL, MVT::f16, Promote); 106 setOperationAction(ISD::FDIV, MVT::f16, Promote); 107 setOperationAction(ISD::FREM, MVT::f16, Promote); 108 setOperationAction(ISD::FMA, MVT::f16, Promote); 109 setOperationAction(ISD::FNEG, MVT::f16, Promote); 110 setOperationAction(ISD::FABS, MVT::f16, Promote); 111 setOperationAction(ISD::FCEIL, MVT::f16, Promote); 112 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); 113 setOperationAction(ISD::FCOS, MVT::f16, Promote); 114 setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); 115 setOperationAction(ISD::FFLOOR, MVT::f16, Promote); 116 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); 117 setOperationAction(ISD::FPOW, MVT::f16, Promote); 118 setOperationAction(ISD::FPOWI, MVT::f16, Promote); 119 setOperationAction(ISD::FRINT, MVT::f16, Promote); 120 setOperationAction(ISD::FSIN, MVT::f16, Promote); 121 setOperationAction(ISD::FSINCOS, MVT::f16, Promote); 122 setOperationAction(ISD::FSQRT, MVT::f16, Promote); 123 setOperationAction(ISD::FEXP, MVT::f16, Promote); 124 setOperationAction(ISD::FEXP2, MVT::f16, Promote); 125 setOperationAction(ISD::FLOG, MVT::f16, Promote); 126 setOperationAction(ISD::FLOG2, MVT::f16, Promote); 127 setOperationAction(ISD::FLOG10, MVT::f16, Promote); 128 setOperationAction(ISD::FROUND, MVT::f16, Promote); 129 setOperationAction(ISD::FTRUNC, MVT::f16, Promote); 130 setOperationAction(ISD::FMINNUM, MVT::f16, Promote); 131 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); 132 setOperationAction(ISD::FMINNAN, MVT::f16, Promote); 133 setOperationAction(ISD::FMAXNAN, MVT::f16, Promote); 134 135 setTargetDAGCombine(ISD::AND); 136 setTargetDAGCombine(ISD::OR); 137 setTargetDAGCombine(ISD::SRA); 138 setTargetDAGCombine(ISD::VSELECT); 139 setTargetDAGCombine(ISD::XOR); 140 } 141 142 if (!Subtarget.useSoftFloat()) { 143 addRegisterClass(MVT::f32, &Mips::FGR32RegClass); 144 145 // When dealing with single precision only, use libcalls 146 if (!Subtarget.isSingleFloat()) { 147 if (Subtarget.isFP64bit()) 148 addRegisterClass(MVT::f64, &Mips::FGR64RegClass); 149 else 150 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); 151 } 152 } 153 154 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); 155 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); 156 setOperationAction(ISD::MULHS, MVT::i32, Custom); 157 setOperationAction(ISD::MULHU, MVT::i32, Custom); 158 159 if (Subtarget.hasCnMips()) 160 setOperationAction(ISD::MUL, MVT::i64, Legal); 161 else if (Subtarget.isGP64bit()) 162 setOperationAction(ISD::MUL, MVT::i64, Custom); 163 164 if (Subtarget.isGP64bit()) { 165 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); 166 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); 167 setOperationAction(ISD::MULHS, MVT::i64, Custom); 168 setOperationAction(ISD::MULHU, MVT::i64, Custom); 169 setOperationAction(ISD::SDIVREM, MVT::i64, Custom); 170 setOperationAction(ISD::UDIVREM, MVT::i64, Custom); 171 } 172 173 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 174 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 175 176 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 177 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 178 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 179 setOperationAction(ISD::LOAD, MVT::i32, Custom); 180 setOperationAction(ISD::STORE, MVT::i32, Custom); 181 182 setTargetDAGCombine(ISD::MUL); 183 184 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 185 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 186 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 187 188 if (NoDPLoadStore) { 189 setOperationAction(ISD::LOAD, MVT::f64, Custom); 190 setOperationAction(ISD::STORE, MVT::f64, Custom); 191 } 192 193 if (Subtarget.hasMips32r6()) { 194 // MIPS32r6 replaces the accumulator-based multiplies with a three register 195 // instruction 196 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 197 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 198 setOperationAction(ISD::MUL, MVT::i32, Legal); 199 setOperationAction(ISD::MULHS, MVT::i32, Legal); 200 setOperationAction(ISD::MULHU, MVT::i32, Legal); 201 202 // MIPS32r6 replaces the accumulator-based division/remainder with separate 203 // three register division and remainder instructions. 204 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 205 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 206 setOperationAction(ISD::SDIV, MVT::i32, Legal); 207 setOperationAction(ISD::UDIV, MVT::i32, Legal); 208 setOperationAction(ISD::SREM, MVT::i32, Legal); 209 setOperationAction(ISD::UREM, MVT::i32, Legal); 210 211 // MIPS32r6 replaces conditional moves with an equivalent that removes the 212 // need for three GPR read ports. 213 setOperationAction(ISD::SETCC, MVT::i32, Legal); 214 setOperationAction(ISD::SELECT, MVT::i32, Legal); 215 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 216 217 setOperationAction(ISD::SETCC, MVT::f32, Legal); 218 setOperationAction(ISD::SELECT, MVT::f32, Legal); 219 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 220 221 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6"); 222 setOperationAction(ISD::SETCC, MVT::f64, Legal); 223 setOperationAction(ISD::SELECT, MVT::f64, Legal); 224 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 225 226 setOperationAction(ISD::BRCOND, MVT::Other, Legal); 227 228 // Floating point > and >= are supported via < and <= 229 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 230 setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); 231 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); 232 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 233 234 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 235 setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); 236 setCondCodeAction(ISD::SETUGE, MVT::f64, Expand); 237 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 238 } 239 240 if (Subtarget.hasMips64r6()) { 241 // MIPS64r6 replaces the accumulator-based multiplies with a three register 242 // instruction 243 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 244 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 245 setOperationAction(ISD::MUL, MVT::i64, Legal); 246 setOperationAction(ISD::MULHS, MVT::i64, Legal); 247 setOperationAction(ISD::MULHU, MVT::i64, Legal); 248 249 // MIPS32r6 replaces the accumulator-based division/remainder with separate 250 // three register division and remainder instructions. 251 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 252 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 253 setOperationAction(ISD::SDIV, MVT::i64, Legal); 254 setOperationAction(ISD::UDIV, MVT::i64, Legal); 255 setOperationAction(ISD::SREM, MVT::i64, Legal); 256 setOperationAction(ISD::UREM, MVT::i64, Legal); 257 258 // MIPS64r6 replaces conditional moves with an equivalent that removes the 259 // need for three GPR read ports. 260 setOperationAction(ISD::SETCC, MVT::i64, Legal); 261 setOperationAction(ISD::SELECT, MVT::i64, Legal); 262 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 263 } 264 265 computeRegisterProperties(Subtarget.getRegisterInfo()); 266 } 267 268 const MipsTargetLowering * 269 llvm::createMipsSETargetLowering(const MipsTargetMachine &TM, 270 const MipsSubtarget &STI) { 271 return new MipsSETargetLowering(TM, STI); 272 } 273 274 const TargetRegisterClass * 275 MipsSETargetLowering::getRepRegClassFor(MVT VT) const { 276 if (VT == MVT::Untyped) 277 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass; 278 279 return TargetLowering::getRepRegClassFor(VT); 280 } 281 282 // Enable MSA support for the given integer type and Register class. 283 void MipsSETargetLowering:: 284 addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 285 addRegisterClass(Ty, RC); 286 287 // Expand all builtin opcodes. 288 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 289 setOperationAction(Opc, Ty, Expand); 290 291 setOperationAction(ISD::BITCAST, Ty, Legal); 292 setOperationAction(ISD::LOAD, Ty, Legal); 293 setOperationAction(ISD::STORE, Ty, Legal); 294 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); 295 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 296 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 297 298 setOperationAction(ISD::ADD, Ty, Legal); 299 setOperationAction(ISD::AND, Ty, Legal); 300 setOperationAction(ISD::CTLZ, Ty, Legal); 301 setOperationAction(ISD::CTPOP, Ty, Legal); 302 setOperationAction(ISD::MUL, Ty, Legal); 303 setOperationAction(ISD::OR, Ty, Legal); 304 setOperationAction(ISD::SDIV, Ty, Legal); 305 setOperationAction(ISD::SREM, Ty, Legal); 306 setOperationAction(ISD::SHL, Ty, Legal); 307 setOperationAction(ISD::SRA, Ty, Legal); 308 setOperationAction(ISD::SRL, Ty, Legal); 309 setOperationAction(ISD::SUB, Ty, Legal); 310 setOperationAction(ISD::UDIV, Ty, Legal); 311 setOperationAction(ISD::UREM, Ty, Legal); 312 setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); 313 setOperationAction(ISD::VSELECT, Ty, Legal); 314 setOperationAction(ISD::XOR, Ty, Legal); 315 316 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { 317 setOperationAction(ISD::FP_TO_SINT, Ty, Legal); 318 setOperationAction(ISD::FP_TO_UINT, Ty, Legal); 319 setOperationAction(ISD::SINT_TO_FP, Ty, Legal); 320 setOperationAction(ISD::UINT_TO_FP, Ty, Legal); 321 } 322 323 setOperationAction(ISD::SETCC, Ty, Legal); 324 setCondCodeAction(ISD::SETNE, Ty, Expand); 325 setCondCodeAction(ISD::SETGE, Ty, Expand); 326 setCondCodeAction(ISD::SETGT, Ty, Expand); 327 setCondCodeAction(ISD::SETUGE, Ty, Expand); 328 setCondCodeAction(ISD::SETUGT, Ty, Expand); 329 } 330 331 // Enable MSA support for the given floating-point type and Register class. 332 void MipsSETargetLowering:: 333 addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 334 addRegisterClass(Ty, RC); 335 336 // Expand all builtin opcodes. 337 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 338 setOperationAction(Opc, Ty, Expand); 339 340 setOperationAction(ISD::LOAD, Ty, Legal); 341 setOperationAction(ISD::STORE, Ty, Legal); 342 setOperationAction(ISD::BITCAST, Ty, Legal); 343 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); 344 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 345 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 346 347 if (Ty != MVT::v8f16) { 348 setOperationAction(ISD::FABS, Ty, Legal); 349 setOperationAction(ISD::FADD, Ty, Legal); 350 setOperationAction(ISD::FDIV, Ty, Legal); 351 setOperationAction(ISD::FEXP2, Ty, Legal); 352 setOperationAction(ISD::FLOG2, Ty, Legal); 353 setOperationAction(ISD::FMA, Ty, Legal); 354 setOperationAction(ISD::FMUL, Ty, Legal); 355 setOperationAction(ISD::FRINT, Ty, Legal); 356 setOperationAction(ISD::FSQRT, Ty, Legal); 357 setOperationAction(ISD::FSUB, Ty, Legal); 358 setOperationAction(ISD::VSELECT, Ty, Legal); 359 360 setOperationAction(ISD::SETCC, Ty, Legal); 361 setCondCodeAction(ISD::SETOGE, Ty, Expand); 362 setCondCodeAction(ISD::SETOGT, Ty, Expand); 363 setCondCodeAction(ISD::SETUGE, Ty, Expand); 364 setCondCodeAction(ISD::SETUGT, Ty, Expand); 365 setCondCodeAction(ISD::SETGE, Ty, Expand); 366 setCondCodeAction(ISD::SETGT, Ty, Expand); 367 } 368 } 369 370 bool 371 MipsSETargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 372 unsigned, 373 unsigned, 374 bool *Fast) const { 375 MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; 376 377 if (Subtarget.systemSupportsUnalignedAccess()) { 378 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's 379 // implementation defined whether this is handled by hardware, software, or 380 // a hybrid of the two but it's expected that most implementations will 381 // handle the majority of cases in hardware. 382 if (Fast) 383 *Fast = true; 384 return true; 385 } 386 387 switch (SVT) { 388 case MVT::i64: 389 case MVT::i32: 390 if (Fast) 391 *Fast = true; 392 return true; 393 default: 394 return false; 395 } 396 } 397 398 SDValue MipsSETargetLowering::LowerOperation(SDValue Op, 399 SelectionDAG &DAG) const { 400 switch(Op.getOpcode()) { 401 case ISD::LOAD: return lowerLOAD(Op, DAG); 402 case ISD::STORE: return lowerSTORE(Op, DAG); 403 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); 404 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); 405 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); 406 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG); 407 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG); 408 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG); 409 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, 410 DAG); 411 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); 412 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); 413 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); 414 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); 415 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); 416 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); 417 } 418 419 return MipsTargetLowering::LowerOperation(Op, DAG); 420 } 421 422 // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT 423 // 424 // Performs the following transformations: 425 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its 426 // sign/zero-extension is completely overwritten by the new one performed by 427 // the ISD::AND. 428 // - Removes redundant zero extensions performed by an ISD::AND. 429 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 430 TargetLowering::DAGCombinerInfo &DCI, 431 const MipsSubtarget &Subtarget) { 432 if (!Subtarget.hasMSA()) 433 return SDValue(); 434 435 SDValue Op0 = N->getOperand(0); 436 SDValue Op1 = N->getOperand(1); 437 unsigned Op0Opcode = Op0->getOpcode(); 438 439 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) 440 // where $d + 1 == 2^n and n == 32 441 // or $d + 1 == 2^n and n <= 32 and ZExt 442 // -> (MipsVExtractZExt $a, $b, $c) 443 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || 444 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { 445 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1); 446 447 if (!Mask) 448 return SDValue(); 449 450 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); 451 452 if (Log2IfPositive <= 0) 453 return SDValue(); // Mask+1 is not a power of 2 454 455 SDValue Op0Op2 = Op0->getOperand(2); 456 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT(); 457 unsigned ExtendTySize = ExtendTy.getSizeInBits(); 458 unsigned Log2 = Log2IfPositive; 459 460 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || 461 Log2 == ExtendTySize) { 462 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; 463 return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0), 464 Op0->getVTList(), 465 makeArrayRef(Ops, Op0->getNumOperands())); 466 } 467 } 468 469 return SDValue(); 470 } 471 472 // Determine if the specified node is a constant vector splat. 473 // 474 // Returns true and sets Imm if: 475 // * N is a ISD::BUILD_VECTOR representing a constant splat 476 // 477 // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The 478 // differences are that it assumes the MSA has already been checked and the 479 // arbitrary requirement for a maximum of 32-bit integers isn't applied (and 480 // must not be in order for binsri.d to be selectable). 481 static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { 482 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode()); 483 484 if (!Node) 485 return false; 486 487 APInt SplatValue, SplatUndef; 488 unsigned SplatBitSize; 489 bool HasAnyUndefs; 490 491 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 492 8, !IsLittleEndian)) 493 return false; 494 495 Imm = SplatValue; 496 497 return true; 498 } 499 500 // Test whether the given node is an all-ones build_vector. 501 static bool isVectorAllOnes(SDValue N) { 502 // Look through bitcasts. Endianness doesn't matter because we are looking 503 // for an all-ones value. 504 if (N->getOpcode() == ISD::BITCAST) 505 N = N->getOperand(0); 506 507 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); 508 509 if (!BVN) 510 return false; 511 512 APInt SplatValue, SplatUndef; 513 unsigned SplatBitSize; 514 bool HasAnyUndefs; 515 516 // Endianness doesn't matter in this context because we are looking for 517 // an all-ones value. 518 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) 519 return SplatValue.isAllOnesValue(); 520 521 return false; 522 } 523 524 // Test whether N is the bitwise inverse of OfNode. 525 static bool isBitwiseInverse(SDValue N, SDValue OfNode) { 526 if (N->getOpcode() != ISD::XOR) 527 return false; 528 529 if (isVectorAllOnes(N->getOperand(0))) 530 return N->getOperand(1) == OfNode; 531 532 if (isVectorAllOnes(N->getOperand(1))) 533 return N->getOperand(0) == OfNode; 534 535 return false; 536 } 537 538 // Perform combines where ISD::OR is the root node. 539 // 540 // Performs the following transformations: 541 // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) 542 // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit 543 // vector type. 544 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 545 TargetLowering::DAGCombinerInfo &DCI, 546 const MipsSubtarget &Subtarget) { 547 if (!Subtarget.hasMSA()) 548 return SDValue(); 549 550 EVT Ty = N->getValueType(0); 551 552 if (!Ty.is128BitVector()) 553 return SDValue(); 554 555 SDValue Op0 = N->getOperand(0); 556 SDValue Op1 = N->getOperand(1); 557 558 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { 559 SDValue Op0Op0 = Op0->getOperand(0); 560 SDValue Op0Op1 = Op0->getOperand(1); 561 SDValue Op1Op0 = Op1->getOperand(0); 562 SDValue Op1Op1 = Op1->getOperand(1); 563 bool IsLittleEndian = !Subtarget.isLittle(); 564 565 SDValue IfSet, IfClr, Cond; 566 bool IsConstantMask = false; 567 APInt Mask, InvMask; 568 569 // If Op0Op0 is an appropriate mask, try to find it's inverse in either 570 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while 571 // looking. 572 // IfClr will be set if we find a valid match. 573 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) { 574 Cond = Op0Op0; 575 IfSet = Op0Op1; 576 577 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 578 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 579 IfClr = Op1Op1; 580 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 581 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 582 IfClr = Op1Op0; 583 584 IsConstantMask = true; 585 } 586 587 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same 588 // thing again using this mask. 589 // IfClr will be set if we find a valid match. 590 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) { 591 Cond = Op0Op1; 592 IfSet = Op0Op0; 593 594 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 595 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 596 IfClr = Op1Op1; 597 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 598 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 599 IfClr = Op1Op0; 600 601 IsConstantMask = true; 602 } 603 604 // If IfClr is not yet set, try looking for a non-constant match. 605 // IfClr will be set if we find a valid match amongst the eight 606 // possibilities. 607 if (!IfClr.getNode()) { 608 if (isBitwiseInverse(Op0Op0, Op1Op0)) { 609 Cond = Op1Op0; 610 IfSet = Op1Op1; 611 IfClr = Op0Op1; 612 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { 613 Cond = Op1Op0; 614 IfSet = Op1Op1; 615 IfClr = Op0Op0; 616 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { 617 Cond = Op1Op1; 618 IfSet = Op1Op0; 619 IfClr = Op0Op1; 620 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { 621 Cond = Op1Op1; 622 IfSet = Op1Op0; 623 IfClr = Op0Op0; 624 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { 625 Cond = Op0Op0; 626 IfSet = Op0Op1; 627 IfClr = Op1Op1; 628 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { 629 Cond = Op0Op0; 630 IfSet = Op0Op1; 631 IfClr = Op1Op0; 632 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { 633 Cond = Op0Op1; 634 IfSet = Op0Op0; 635 IfClr = Op1Op1; 636 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { 637 Cond = Op0Op1; 638 IfSet = Op0Op0; 639 IfClr = Op1Op0; 640 } 641 } 642 643 // At this point, IfClr will be set if we have a valid match. 644 if (!IfClr.getNode()) 645 return SDValue(); 646 647 assert(Cond.getNode() && IfSet.getNode()); 648 649 // Fold degenerate cases. 650 if (IsConstantMask) { 651 if (Mask.isAllOnesValue()) 652 return IfSet; 653 else if (Mask == 0) 654 return IfClr; 655 } 656 657 // Transform the DAG into an equivalent VSELECT. 658 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr); 659 } 660 661 return SDValue(); 662 } 663 664 static SDValue genConstMult(SDValue X, uint64_t C, const SDLoc &DL, EVT VT, 665 EVT ShiftTy, SelectionDAG &DAG) { 666 // Clear the upper (64 - VT.sizeInBits) bits. 667 C &= ((uint64_t)-1) >> (64 - VT.getSizeInBits()); 668 669 // Return 0. 670 if (C == 0) 671 return DAG.getConstant(0, DL, VT); 672 673 // Return x. 674 if (C == 1) 675 return X; 676 677 // If c is power of 2, return (shl x, log2(c)). 678 if (isPowerOf2_64(C)) 679 return DAG.getNode(ISD::SHL, DL, VT, X, 680 DAG.getConstant(Log2_64(C), DL, ShiftTy)); 681 682 unsigned Log2Ceil = Log2_64_Ceil(C); 683 uint64_t Floor = 1LL << Log2_64(C); 684 uint64_t Ceil = Log2Ceil == 64 ? 0LL : 1LL << Log2Ceil; 685 686 // If |c - floor_c| <= |c - ceil_c|, 687 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), 688 // return (add constMult(x, floor_c), constMult(x, c - floor_c)). 689 if (C - Floor <= Ceil - C) { 690 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); 691 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); 692 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); 693 } 694 695 // If |c - floor_c| > |c - ceil_c|, 696 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). 697 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); 698 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); 699 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); 700 } 701 702 static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, 703 const TargetLowering::DAGCombinerInfo &DCI, 704 const MipsSETargetLowering *TL) { 705 EVT VT = N->getValueType(0); 706 707 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 708 if (!VT.isVector()) 709 return genConstMult(N->getOperand(0), C->getZExtValue(), SDLoc(N), VT, 710 TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), 711 DAG); 712 713 return SDValue(N, 0); 714 } 715 716 static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, 717 SelectionDAG &DAG, 718 const MipsSubtarget &Subtarget) { 719 // See if this is a vector splat immediate node. 720 APInt SplatValue, SplatUndef; 721 unsigned SplatBitSize; 722 bool HasAnyUndefs; 723 unsigned EltSize = Ty.getScalarSizeInBits(); 724 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 725 726 if (!Subtarget.hasDSP()) 727 return SDValue(); 728 729 if (!BV || 730 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 731 EltSize, !Subtarget.isLittle()) || 732 (SplatBitSize != EltSize) || 733 (SplatValue.getZExtValue() >= EltSize)) 734 return SDValue(); 735 736 SDLoc DL(N); 737 return DAG.getNode(Opc, DL, Ty, N->getOperand(0), 738 DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32)); 739 } 740 741 static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, 742 TargetLowering::DAGCombinerInfo &DCI, 743 const MipsSubtarget &Subtarget) { 744 EVT Ty = N->getValueType(0); 745 746 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 747 return SDValue(); 748 749 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); 750 } 751 752 // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold 753 // constant splats into MipsISD::SHRA_DSP for DSPr2. 754 // 755 // Performs the following transformations: 756 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its 757 // sign/zero-extension is completely overwritten by the new one performed by 758 // the ISD::SRA and ISD::SHL nodes. 759 // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL 760 // sequence. 761 // 762 // See performDSPShiftCombine for more information about the transformation 763 // used for DSPr2. 764 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 765 TargetLowering::DAGCombinerInfo &DCI, 766 const MipsSubtarget &Subtarget) { 767 EVT Ty = N->getValueType(0); 768 769 if (Subtarget.hasMSA()) { 770 SDValue Op0 = N->getOperand(0); 771 SDValue Op1 = N->getOperand(1); 772 773 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) 774 // where $d + sizeof($c) == 32 775 // or $d + sizeof($c) <= 32 and SExt 776 // -> (MipsVExtractSExt $a, $b, $c) 777 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { 778 SDValue Op0Op0 = Op0->getOperand(0); 779 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1); 780 781 if (!ShAmount) 782 return SDValue(); 783 784 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && 785 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) 786 return SDValue(); 787 788 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT(); 789 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); 790 791 if (TotalBits == 32 || 792 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && 793 TotalBits <= 32)) { 794 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), 795 Op0Op0->getOperand(2) }; 796 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0), 797 Op0Op0->getVTList(), 798 makeArrayRef(Ops, Op0Op0->getNumOperands())); 799 } 800 } 801 } 802 803 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2())) 804 return SDValue(); 805 806 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); 807 } 808 809 810 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 811 TargetLowering::DAGCombinerInfo &DCI, 812 const MipsSubtarget &Subtarget) { 813 EVT Ty = N->getValueType(0); 814 815 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8)) 816 return SDValue(); 817 818 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); 819 } 820 821 static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { 822 bool IsV216 = (Ty == MVT::v2i16); 823 824 switch (CC) { 825 case ISD::SETEQ: 826 case ISD::SETNE: return true; 827 case ISD::SETLT: 828 case ISD::SETLE: 829 case ISD::SETGT: 830 case ISD::SETGE: return IsV216; 831 case ISD::SETULT: 832 case ISD::SETULE: 833 case ISD::SETUGT: 834 case ISD::SETUGE: return !IsV216; 835 default: return false; 836 } 837 } 838 839 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { 840 EVT Ty = N->getValueType(0); 841 842 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 843 return SDValue(); 844 845 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get())) 846 return SDValue(); 847 848 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0), 849 N->getOperand(1), N->getOperand(2)); 850 } 851 852 static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { 853 EVT Ty = N->getValueType(0); 854 855 if (Ty.is128BitVector() && Ty.isInteger()) { 856 // Try the following combines: 857 // (vselect (setcc $a, $b, SETLT), $b, $a)) -> (vsmax $a, $b) 858 // (vselect (setcc $a, $b, SETLE), $b, $a)) -> (vsmax $a, $b) 859 // (vselect (setcc $a, $b, SETLT), $a, $b)) -> (vsmin $a, $b) 860 // (vselect (setcc $a, $b, SETLE), $a, $b)) -> (vsmin $a, $b) 861 // (vselect (setcc $a, $b, SETULT), $b, $a)) -> (vumax $a, $b) 862 // (vselect (setcc $a, $b, SETULE), $b, $a)) -> (vumax $a, $b) 863 // (vselect (setcc $a, $b, SETULT), $a, $b)) -> (vumin $a, $b) 864 // (vselect (setcc $a, $b, SETULE), $a, $b)) -> (vumin $a, $b) 865 // SETGT/SETGE/SETUGT/SETUGE variants of these will show up initially but 866 // will be expanded to equivalent SETLT/SETLE/SETULT/SETULE versions by the 867 // legalizer. 868 SDValue Op0 = N->getOperand(0); 869 870 if (Op0->getOpcode() != ISD::SETCC) 871 return SDValue(); 872 873 ISD::CondCode CondCode = cast<CondCodeSDNode>(Op0->getOperand(2))->get(); 874 bool Signed; 875 876 if (CondCode == ISD::SETLT || CondCode == ISD::SETLE) 877 Signed = true; 878 else if (CondCode == ISD::SETULT || CondCode == ISD::SETULE) 879 Signed = false; 880 else 881 return SDValue(); 882 883 SDValue Op1 = N->getOperand(1); 884 SDValue Op2 = N->getOperand(2); 885 SDValue Op0Op0 = Op0->getOperand(0); 886 SDValue Op0Op1 = Op0->getOperand(1); 887 888 if (Op1 == Op0Op0 && Op2 == Op0Op1) 889 return DAG.getNode(Signed ? MipsISD::VSMIN : MipsISD::VUMIN, SDLoc(N), 890 Ty, Op1, Op2); 891 else if (Op1 == Op0Op1 && Op2 == Op0Op0) 892 return DAG.getNode(Signed ? MipsISD::VSMAX : MipsISD::VUMAX, SDLoc(N), 893 Ty, Op1, Op2); 894 } else if ((Ty == MVT::v2i16) || (Ty == MVT::v4i8)) { 895 SDValue SetCC = N->getOperand(0); 896 897 if (SetCC.getOpcode() != MipsISD::SETCC_DSP) 898 return SDValue(); 899 900 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, 901 SetCC.getOperand(0), SetCC.getOperand(1), 902 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); 903 } 904 905 return SDValue(); 906 } 907 908 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 909 const MipsSubtarget &Subtarget) { 910 EVT Ty = N->getValueType(0); 911 912 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { 913 // Try the following combines: 914 // (xor (or $a, $b), (build_vector allones)) 915 // (xor (or $a, $b), (bitcast (build_vector allones))) 916 SDValue Op0 = N->getOperand(0); 917 SDValue Op1 = N->getOperand(1); 918 SDValue NotOp; 919 920 if (ISD::isBuildVectorAllOnes(Op0.getNode())) 921 NotOp = Op1; 922 else if (ISD::isBuildVectorAllOnes(Op1.getNode())) 923 NotOp = Op0; 924 else 925 return SDValue(); 926 927 if (NotOp->getOpcode() == ISD::OR) 928 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), 929 NotOp->getOperand(1)); 930 } 931 932 return SDValue(); 933 } 934 935 SDValue 936 MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 937 SelectionDAG &DAG = DCI.DAG; 938 SDValue Val; 939 940 switch (N->getOpcode()) { 941 case ISD::AND: 942 Val = performANDCombine(N, DAG, DCI, Subtarget); 943 break; 944 case ISD::OR: 945 Val = performORCombine(N, DAG, DCI, Subtarget); 946 break; 947 case ISD::MUL: 948 return performMULCombine(N, DAG, DCI, this); 949 case ISD::SHL: 950 Val = performSHLCombine(N, DAG, DCI, Subtarget); 951 break; 952 case ISD::SRA: 953 return performSRACombine(N, DAG, DCI, Subtarget); 954 case ISD::SRL: 955 return performSRLCombine(N, DAG, DCI, Subtarget); 956 case ISD::VSELECT: 957 return performVSELECTCombine(N, DAG); 958 case ISD::XOR: 959 Val = performXORCombine(N, DAG, Subtarget); 960 break; 961 case ISD::SETCC: 962 Val = performSETCCCombine(N, DAG); 963 break; 964 } 965 966 if (Val.getNode()) { 967 DEBUG(dbgs() << "\nMipsSE DAG Combine:\n"; 968 N->printrWithDepth(dbgs(), &DAG); 969 dbgs() << "\n=> \n"; 970 Val.getNode()->printrWithDepth(dbgs(), &DAG); 971 dbgs() << "\n"); 972 return Val; 973 } 974 975 return MipsTargetLowering::PerformDAGCombine(N, DCI); 976 } 977 978 MachineBasicBlock * 979 MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 980 MachineBasicBlock *BB) const { 981 switch (MI.getOpcode()) { 982 default: 983 return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); 984 case Mips::BPOSGE32_PSEUDO: 985 return emitBPOSGE32(MI, BB); 986 case Mips::SNZ_B_PSEUDO: 987 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B); 988 case Mips::SNZ_H_PSEUDO: 989 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H); 990 case Mips::SNZ_W_PSEUDO: 991 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W); 992 case Mips::SNZ_D_PSEUDO: 993 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D); 994 case Mips::SNZ_V_PSEUDO: 995 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V); 996 case Mips::SZ_B_PSEUDO: 997 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B); 998 case Mips::SZ_H_PSEUDO: 999 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H); 1000 case Mips::SZ_W_PSEUDO: 1001 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W); 1002 case Mips::SZ_D_PSEUDO: 1003 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D); 1004 case Mips::SZ_V_PSEUDO: 1005 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V); 1006 case Mips::COPY_FW_PSEUDO: 1007 return emitCOPY_FW(MI, BB); 1008 case Mips::COPY_FD_PSEUDO: 1009 return emitCOPY_FD(MI, BB); 1010 case Mips::INSERT_FW_PSEUDO: 1011 return emitINSERT_FW(MI, BB); 1012 case Mips::INSERT_FD_PSEUDO: 1013 return emitINSERT_FD(MI, BB); 1014 case Mips::INSERT_B_VIDX_PSEUDO: 1015 case Mips::INSERT_B_VIDX64_PSEUDO: 1016 return emitINSERT_DF_VIDX(MI, BB, 1, false); 1017 case Mips::INSERT_H_VIDX_PSEUDO: 1018 case Mips::INSERT_H_VIDX64_PSEUDO: 1019 return emitINSERT_DF_VIDX(MI, BB, 2, false); 1020 case Mips::INSERT_W_VIDX_PSEUDO: 1021 case Mips::INSERT_W_VIDX64_PSEUDO: 1022 return emitINSERT_DF_VIDX(MI, BB, 4, false); 1023 case Mips::INSERT_D_VIDX_PSEUDO: 1024 case Mips::INSERT_D_VIDX64_PSEUDO: 1025 return emitINSERT_DF_VIDX(MI, BB, 8, false); 1026 case Mips::INSERT_FW_VIDX_PSEUDO: 1027 case Mips::INSERT_FW_VIDX64_PSEUDO: 1028 return emitINSERT_DF_VIDX(MI, BB, 4, true); 1029 case Mips::INSERT_FD_VIDX_PSEUDO: 1030 case Mips::INSERT_FD_VIDX64_PSEUDO: 1031 return emitINSERT_DF_VIDX(MI, BB, 8, true); 1032 case Mips::FILL_FW_PSEUDO: 1033 return emitFILL_FW(MI, BB); 1034 case Mips::FILL_FD_PSEUDO: 1035 return emitFILL_FD(MI, BB); 1036 case Mips::FEXP2_W_1_PSEUDO: 1037 return emitFEXP2_W_1(MI, BB); 1038 case Mips::FEXP2_D_1_PSEUDO: 1039 return emitFEXP2_D_1(MI, BB); 1040 case Mips::ST_F16: 1041 return emitST_F16_PSEUDO(MI, BB); 1042 case Mips::LD_F16: 1043 return emitLD_F16_PSEUDO(MI, BB); 1044 case Mips::MSA_FP_EXTEND_W_PSEUDO: 1045 return emitFPEXTEND_PSEUDO(MI, BB, false); 1046 case Mips::MSA_FP_ROUND_W_PSEUDO: 1047 return emitFPROUND_PSEUDO(MI, BB, false); 1048 case Mips::MSA_FP_EXTEND_D_PSEUDO: 1049 return emitFPEXTEND_PSEUDO(MI, BB, true); 1050 case Mips::MSA_FP_ROUND_D_PSEUDO: 1051 return emitFPROUND_PSEUDO(MI, BB, true); 1052 } 1053 } 1054 1055 bool MipsSETargetLowering::isEligibleForTailCallOptimization( 1056 const CCState &CCInfo, unsigned NextStackOffset, 1057 const MipsFunctionInfo &FI) const { 1058 if (!UseMipsTailCalls) 1059 return false; 1060 1061 // Exception has to be cleared with eret. 1062 if (FI.isISR()) 1063 return false; 1064 1065 // Return false if either the callee or caller has a byval argument. 1066 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) 1067 return false; 1068 1069 // Return true if the callee's argument area is no larger than the 1070 // caller's. 1071 return NextStackOffset <= FI.getIncomingArgSize(); 1072 } 1073 1074 void MipsSETargetLowering:: 1075 getOpndList(SmallVectorImpl<SDValue> &Ops, 1076 std::deque< std::pair<unsigned, SDValue> > &RegsToPass, 1077 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, 1078 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, 1079 SDValue Chain) const { 1080 Ops.push_back(Callee); 1081 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, 1082 InternalLinkage, IsCallReloc, CLI, Callee, 1083 Chain); 1084 } 1085 1086 SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1087 LoadSDNode &Nd = *cast<LoadSDNode>(Op); 1088 1089 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1090 return MipsTargetLowering::lowerLOAD(Op, DAG); 1091 1092 // Replace a double precision load with two i32 loads and a buildpair64. 1093 SDLoc DL(Op); 1094 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1095 EVT PtrVT = Ptr.getValueType(); 1096 1097 // i32 load from lower address. 1098 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(), 1099 Nd.getAlignment(), Nd.getMemOperand()->getFlags()); 1100 1101 // i32 load from higher address. 1102 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1103 SDValue Hi = DAG.getLoad( 1104 MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(), 1105 std::min(Nd.getAlignment(), 4U), Nd.getMemOperand()->getFlags()); 1106 1107 if (!Subtarget.isLittle()) 1108 std::swap(Lo, Hi); 1109 1110 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1111 SDValue Ops[2] = {BP, Hi.getValue(1)}; 1112 return DAG.getMergeValues(Ops, DL); 1113 } 1114 1115 SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1116 StoreSDNode &Nd = *cast<StoreSDNode>(Op); 1117 1118 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1119 return MipsTargetLowering::lowerSTORE(Op, DAG); 1120 1121 // Replace a double precision store with two extractelement64s and i32 stores. 1122 SDLoc DL(Op); 1123 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1124 EVT PtrVT = Ptr.getValueType(); 1125 SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1126 Val, DAG.getConstant(0, DL, MVT::i32)); 1127 SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1128 Val, DAG.getConstant(1, DL, MVT::i32)); 1129 1130 if (!Subtarget.isLittle()) 1131 std::swap(Lo, Hi); 1132 1133 // i32 store to lower address. 1134 Chain = 1135 DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlignment(), 1136 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1137 1138 // i32 store to higher address. 1139 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1140 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), 1141 std::min(Nd.getAlignment(), 4U), 1142 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1143 } 1144 1145 SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, 1146 bool HasLo, bool HasHi, 1147 SelectionDAG &DAG) const { 1148 // MIPS32r6/MIPS64r6 removed accumulator based multiplies. 1149 assert(!Subtarget.hasMips32r6()); 1150 1151 EVT Ty = Op.getOperand(0).getValueType(); 1152 SDLoc DL(Op); 1153 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, 1154 Op.getOperand(0), Op.getOperand(1)); 1155 SDValue Lo, Hi; 1156 1157 if (HasLo) 1158 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult); 1159 if (HasHi) 1160 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult); 1161 1162 if (!HasLo || !HasHi) 1163 return HasLo ? Lo : Hi; 1164 1165 SDValue Vals[] = { Lo, Hi }; 1166 return DAG.getMergeValues(Vals, DL); 1167 } 1168 1169 static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) { 1170 SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1171 DAG.getConstant(0, DL, MVT::i32)); 1172 SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1173 DAG.getConstant(1, DL, MVT::i32)); 1174 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi); 1175 } 1176 1177 static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) { 1178 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op); 1179 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op); 1180 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1181 } 1182 1183 // This function expands mips intrinsic nodes which have 64-bit input operands 1184 // or output values. 1185 // 1186 // out64 = intrinsic-node in64 1187 // => 1188 // lo = copy (extract-element (in64, 0)) 1189 // hi = copy (extract-element (in64, 1)) 1190 // mips-specific-node 1191 // v0 = copy lo 1192 // v1 = copy hi 1193 // out64 = merge-values (v0, v1) 1194 // 1195 static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1196 SDLoc DL(Op); 1197 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; 1198 SmallVector<SDValue, 3> Ops; 1199 unsigned OpNo = 0; 1200 1201 // See if Op has a chain input. 1202 if (HasChainIn) 1203 Ops.push_back(Op->getOperand(OpNo++)); 1204 1205 // The next operand is the intrinsic opcode. 1206 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); 1207 1208 // See if the next operand has type i64. 1209 SDValue Opnd = Op->getOperand(++OpNo), In64; 1210 1211 if (Opnd.getValueType() == MVT::i64) 1212 In64 = initAccumulator(Opnd, DL, DAG); 1213 else 1214 Ops.push_back(Opnd); 1215 1216 // Push the remaining operands. 1217 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) 1218 Ops.push_back(Op->getOperand(OpNo)); 1219 1220 // Add In64 to the end of the list. 1221 if (In64.getNode()) 1222 Ops.push_back(In64); 1223 1224 // Scan output. 1225 SmallVector<EVT, 2> ResTys; 1226 1227 for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end(); 1228 I != E; ++I) 1229 ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I); 1230 1231 // Create node. 1232 SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops); 1233 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; 1234 1235 if (!HasChainIn) 1236 return Out; 1237 1238 assert(Val->getValueType(1) == MVT::Other); 1239 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; 1240 return DAG.getMergeValues(Vals, DL); 1241 } 1242 1243 // Lower an MSA copy intrinsic into the specified SelectionDAG node 1244 static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1245 SDLoc DL(Op); 1246 SDValue Vec = Op->getOperand(1); 1247 SDValue Idx = Op->getOperand(2); 1248 EVT ResTy = Op->getValueType(0); 1249 EVT EltTy = Vec->getValueType(0).getVectorElementType(); 1250 1251 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx, 1252 DAG.getValueType(EltTy)); 1253 1254 return Result; 1255 } 1256 1257 static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { 1258 EVT ResVecTy = Op->getValueType(0); 1259 EVT ViaVecTy = ResVecTy; 1260 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); 1261 SDLoc DL(Op); 1262 1263 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and 1264 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating 1265 // lanes. 1266 SDValue LaneA = Op->getOperand(OpNr); 1267 SDValue LaneB; 1268 1269 if (ResVecTy == MVT::v2i64) { 1270 LaneB = DAG.getConstant(0, DL, MVT::i32); 1271 ViaVecTy = MVT::v4i32; 1272 if(BigEndian) 1273 std::swap(LaneA, LaneB); 1274 } else 1275 LaneB = LaneA; 1276 1277 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, 1278 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; 1279 1280 SDValue Result = DAG.getBuildVector( 1281 ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1282 1283 if (ViaVecTy != ResVecTy) { 1284 SDValue One = DAG.getConstant(1, DL, ViaVecTy); 1285 Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, 1286 DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One)); 1287 } 1288 1289 return Result; 1290 } 1291 1292 static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, 1293 bool IsSigned = false) { 1294 return DAG.getConstant( 1295 APInt(Op->getValueType(0).getScalarType().getSizeInBits(), 1296 Op->getConstantOperandVal(ImmOp), IsSigned), 1297 SDLoc(Op), Op->getValueType(0)); 1298 } 1299 1300 static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, 1301 bool BigEndian, SelectionDAG &DAG) { 1302 EVT ViaVecTy = VecTy; 1303 SDValue SplatValueA = SplatValue; 1304 SDValue SplatValueB = SplatValue; 1305 SDLoc DL(SplatValue); 1306 1307 if (VecTy == MVT::v2i64) { 1308 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. 1309 ViaVecTy = MVT::v4i32; 1310 1311 SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); 1312 SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, 1313 DAG.getConstant(32, DL, MVT::i32)); 1314 SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); 1315 } 1316 1317 // We currently hold the parts in little endian order. Swap them if 1318 // necessary. 1319 if (BigEndian) 1320 std::swap(SplatValueA, SplatValueB); 1321 1322 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1323 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1324 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1325 SplatValueA, SplatValueB, SplatValueA, SplatValueB }; 1326 1327 SDValue Result = DAG.getBuildVector( 1328 ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1329 1330 if (VecTy != ViaVecTy) 1331 Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); 1332 1333 return Result; 1334 } 1335 1336 static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, 1337 unsigned Opc, SDValue Imm, 1338 bool BigEndian) { 1339 EVT VecTy = Op->getValueType(0); 1340 SDValue Exp2Imm; 1341 SDLoc DL(Op); 1342 1343 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it 1344 // here for now. 1345 if (VecTy == MVT::v2i64) { 1346 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) { 1347 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue(); 1348 1349 SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL, 1350 MVT::i32); 1351 SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32); 1352 1353 if (BigEndian) 1354 std::swap(BitImmLoOp, BitImmHiOp); 1355 1356 Exp2Imm = DAG.getNode( 1357 ISD::BITCAST, DL, MVT::v2i64, 1358 DAG.getBuildVector(MVT::v4i32, DL, 1359 {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp})); 1360 } 1361 } 1362 1363 if (!Exp2Imm.getNode()) { 1364 // We couldnt constant fold, do a vector shift instead 1365 1366 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since 1367 // only values 0-63 are valid. 1368 if (VecTy == MVT::v2i64) 1369 Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm); 1370 1371 Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG); 1372 1373 Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy), 1374 Exp2Imm); 1375 } 1376 1377 return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); 1378 } 1379 1380 static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { 1381 SDLoc DL(Op); 1382 EVT ResTy = Op->getValueType(0); 1383 SDValue Vec = Op->getOperand(2); 1384 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); 1385 MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32; 1386 SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, 1387 DL, ResEltTy); 1388 SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG); 1389 1390 return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec); 1391 } 1392 1393 static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { 1394 EVT ResTy = Op->getValueType(0); 1395 SDLoc DL(Op); 1396 SDValue One = DAG.getConstant(1, DL, ResTy); 1397 SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG)); 1398 1399 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), 1400 DAG.getNOT(DL, Bit, ResTy)); 1401 } 1402 1403 static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { 1404 SDLoc DL(Op); 1405 EVT ResTy = Op->getValueType(0); 1406 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) 1407 << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue(); 1408 SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy); 1409 1410 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask); 1411 } 1412 1413 SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 1414 SelectionDAG &DAG) const { 1415 SDLoc DL(Op); 1416 unsigned Intrinsic = cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue(); 1417 switch (Intrinsic) { 1418 default: 1419 return SDValue(); 1420 case Intrinsic::mips_shilo: 1421 return lowerDSPIntr(Op, DAG, MipsISD::SHILO); 1422 case Intrinsic::mips_dpau_h_qbl: 1423 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); 1424 case Intrinsic::mips_dpau_h_qbr: 1425 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); 1426 case Intrinsic::mips_dpsu_h_qbl: 1427 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); 1428 case Intrinsic::mips_dpsu_h_qbr: 1429 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); 1430 case Intrinsic::mips_dpa_w_ph: 1431 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); 1432 case Intrinsic::mips_dps_w_ph: 1433 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); 1434 case Intrinsic::mips_dpax_w_ph: 1435 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); 1436 case Intrinsic::mips_dpsx_w_ph: 1437 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); 1438 case Intrinsic::mips_mulsa_w_ph: 1439 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); 1440 case Intrinsic::mips_mult: 1441 return lowerDSPIntr(Op, DAG, MipsISD::Mult); 1442 case Intrinsic::mips_multu: 1443 return lowerDSPIntr(Op, DAG, MipsISD::Multu); 1444 case Intrinsic::mips_madd: 1445 return lowerDSPIntr(Op, DAG, MipsISD::MAdd); 1446 case Intrinsic::mips_maddu: 1447 return lowerDSPIntr(Op, DAG, MipsISD::MAddu); 1448 case Intrinsic::mips_msub: 1449 return lowerDSPIntr(Op, DAG, MipsISD::MSub); 1450 case Intrinsic::mips_msubu: 1451 return lowerDSPIntr(Op, DAG, MipsISD::MSubu); 1452 case Intrinsic::mips_addv_b: 1453 case Intrinsic::mips_addv_h: 1454 case Intrinsic::mips_addv_w: 1455 case Intrinsic::mips_addv_d: 1456 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1457 Op->getOperand(2)); 1458 case Intrinsic::mips_addvi_b: 1459 case Intrinsic::mips_addvi_h: 1460 case Intrinsic::mips_addvi_w: 1461 case Intrinsic::mips_addvi_d: 1462 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1463 lowerMSASplatImm(Op, 2, DAG)); 1464 case Intrinsic::mips_and_v: 1465 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1466 Op->getOperand(2)); 1467 case Intrinsic::mips_andi_b: 1468 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1469 lowerMSASplatImm(Op, 2, DAG)); 1470 case Intrinsic::mips_bclr_b: 1471 case Intrinsic::mips_bclr_h: 1472 case Intrinsic::mips_bclr_w: 1473 case Intrinsic::mips_bclr_d: 1474 return lowerMSABitClear(Op, DAG); 1475 case Intrinsic::mips_bclri_b: 1476 case Intrinsic::mips_bclri_h: 1477 case Intrinsic::mips_bclri_w: 1478 case Intrinsic::mips_bclri_d: 1479 return lowerMSABitClearImm(Op, DAG); 1480 case Intrinsic::mips_binsli_b: 1481 case Intrinsic::mips_binsli_h: 1482 case Intrinsic::mips_binsli_w: 1483 case Intrinsic::mips_binsli_d: { 1484 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear) 1485 EVT VecTy = Op->getValueType(0); 1486 EVT EltTy = VecTy.getVectorElementType(); 1487 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1488 report_fatal_error("Immediate out of range"); 1489 APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), 1490 Op->getConstantOperandVal(3) + 1); 1491 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1492 DAG.getConstant(Mask, DL, VecTy, true), 1493 Op->getOperand(2), Op->getOperand(1)); 1494 } 1495 case Intrinsic::mips_binsri_b: 1496 case Intrinsic::mips_binsri_h: 1497 case Intrinsic::mips_binsri_w: 1498 case Intrinsic::mips_binsri_d: { 1499 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear) 1500 EVT VecTy = Op->getValueType(0); 1501 EVT EltTy = VecTy.getVectorElementType(); 1502 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1503 report_fatal_error("Immediate out of range"); 1504 APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), 1505 Op->getConstantOperandVal(3) + 1); 1506 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1507 DAG.getConstant(Mask, DL, VecTy, true), 1508 Op->getOperand(2), Op->getOperand(1)); 1509 } 1510 case Intrinsic::mips_bmnz_v: 1511 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1512 Op->getOperand(2), Op->getOperand(1)); 1513 case Intrinsic::mips_bmnzi_b: 1514 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1515 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2), 1516 Op->getOperand(1)); 1517 case Intrinsic::mips_bmz_v: 1518 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1519 Op->getOperand(1), Op->getOperand(2)); 1520 case Intrinsic::mips_bmzi_b: 1521 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1522 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1), 1523 Op->getOperand(2)); 1524 case Intrinsic::mips_bneg_b: 1525 case Intrinsic::mips_bneg_h: 1526 case Intrinsic::mips_bneg_w: 1527 case Intrinsic::mips_bneg_d: { 1528 EVT VecTy = Op->getValueType(0); 1529 SDValue One = DAG.getConstant(1, DL, VecTy); 1530 1531 return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), 1532 DAG.getNode(ISD::SHL, DL, VecTy, One, 1533 truncateVecElts(Op, DAG))); 1534 } 1535 case Intrinsic::mips_bnegi_b: 1536 case Intrinsic::mips_bnegi_h: 1537 case Intrinsic::mips_bnegi_w: 1538 case Intrinsic::mips_bnegi_d: 1539 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2), 1540 !Subtarget.isLittle()); 1541 case Intrinsic::mips_bnz_b: 1542 case Intrinsic::mips_bnz_h: 1543 case Intrinsic::mips_bnz_w: 1544 case Intrinsic::mips_bnz_d: 1545 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0), 1546 Op->getOperand(1)); 1547 case Intrinsic::mips_bnz_v: 1548 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0), 1549 Op->getOperand(1)); 1550 case Intrinsic::mips_bsel_v: 1551 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1552 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1553 Op->getOperand(1), Op->getOperand(3), 1554 Op->getOperand(2)); 1555 case Intrinsic::mips_bseli_b: 1556 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1557 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1558 Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG), 1559 Op->getOperand(2)); 1560 case Intrinsic::mips_bset_b: 1561 case Intrinsic::mips_bset_h: 1562 case Intrinsic::mips_bset_w: 1563 case Intrinsic::mips_bset_d: { 1564 EVT VecTy = Op->getValueType(0); 1565 SDValue One = DAG.getConstant(1, DL, VecTy); 1566 1567 return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), 1568 DAG.getNode(ISD::SHL, DL, VecTy, One, 1569 truncateVecElts(Op, DAG))); 1570 } 1571 case Intrinsic::mips_bseti_b: 1572 case Intrinsic::mips_bseti_h: 1573 case Intrinsic::mips_bseti_w: 1574 case Intrinsic::mips_bseti_d: 1575 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2), 1576 !Subtarget.isLittle()); 1577 case Intrinsic::mips_bz_b: 1578 case Intrinsic::mips_bz_h: 1579 case Intrinsic::mips_bz_w: 1580 case Intrinsic::mips_bz_d: 1581 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0), 1582 Op->getOperand(1)); 1583 case Intrinsic::mips_bz_v: 1584 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0), 1585 Op->getOperand(1)); 1586 case Intrinsic::mips_ceq_b: 1587 case Intrinsic::mips_ceq_h: 1588 case Intrinsic::mips_ceq_w: 1589 case Intrinsic::mips_ceq_d: 1590 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1591 Op->getOperand(2), ISD::SETEQ); 1592 case Intrinsic::mips_ceqi_b: 1593 case Intrinsic::mips_ceqi_h: 1594 case Intrinsic::mips_ceqi_w: 1595 case Intrinsic::mips_ceqi_d: 1596 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1597 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ); 1598 case Intrinsic::mips_cle_s_b: 1599 case Intrinsic::mips_cle_s_h: 1600 case Intrinsic::mips_cle_s_w: 1601 case Intrinsic::mips_cle_s_d: 1602 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1603 Op->getOperand(2), ISD::SETLE); 1604 case Intrinsic::mips_clei_s_b: 1605 case Intrinsic::mips_clei_s_h: 1606 case Intrinsic::mips_clei_s_w: 1607 case Intrinsic::mips_clei_s_d: 1608 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1609 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE); 1610 case Intrinsic::mips_cle_u_b: 1611 case Intrinsic::mips_cle_u_h: 1612 case Intrinsic::mips_cle_u_w: 1613 case Intrinsic::mips_cle_u_d: 1614 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1615 Op->getOperand(2), ISD::SETULE); 1616 case Intrinsic::mips_clei_u_b: 1617 case Intrinsic::mips_clei_u_h: 1618 case Intrinsic::mips_clei_u_w: 1619 case Intrinsic::mips_clei_u_d: 1620 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1621 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE); 1622 case Intrinsic::mips_clt_s_b: 1623 case Intrinsic::mips_clt_s_h: 1624 case Intrinsic::mips_clt_s_w: 1625 case Intrinsic::mips_clt_s_d: 1626 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1627 Op->getOperand(2), ISD::SETLT); 1628 case Intrinsic::mips_clti_s_b: 1629 case Intrinsic::mips_clti_s_h: 1630 case Intrinsic::mips_clti_s_w: 1631 case Intrinsic::mips_clti_s_d: 1632 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1633 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT); 1634 case Intrinsic::mips_clt_u_b: 1635 case Intrinsic::mips_clt_u_h: 1636 case Intrinsic::mips_clt_u_w: 1637 case Intrinsic::mips_clt_u_d: 1638 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1639 Op->getOperand(2), ISD::SETULT); 1640 case Intrinsic::mips_clti_u_b: 1641 case Intrinsic::mips_clti_u_h: 1642 case Intrinsic::mips_clti_u_w: 1643 case Intrinsic::mips_clti_u_d: 1644 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1645 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT); 1646 case Intrinsic::mips_copy_s_b: 1647 case Intrinsic::mips_copy_s_h: 1648 case Intrinsic::mips_copy_s_w: 1649 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1650 case Intrinsic::mips_copy_s_d: 1651 if (Subtarget.hasMips64()) 1652 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64. 1653 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1654 else { 1655 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1656 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1657 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1658 Op->getValueType(0), Op->getOperand(1), 1659 Op->getOperand(2)); 1660 } 1661 case Intrinsic::mips_copy_u_b: 1662 case Intrinsic::mips_copy_u_h: 1663 case Intrinsic::mips_copy_u_w: 1664 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1665 case Intrinsic::mips_copy_u_d: 1666 if (Subtarget.hasMips64()) 1667 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64. 1668 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1669 else { 1670 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1671 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1672 // Note: When i64 is illegal, this results in copy_s.w instructions 1673 // instead of copy_u.w instructions. This makes no difference to the 1674 // behaviour since i64 is only illegal when the register file is 32-bit. 1675 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1676 Op->getValueType(0), Op->getOperand(1), 1677 Op->getOperand(2)); 1678 } 1679 case Intrinsic::mips_div_s_b: 1680 case Intrinsic::mips_div_s_h: 1681 case Intrinsic::mips_div_s_w: 1682 case Intrinsic::mips_div_s_d: 1683 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), 1684 Op->getOperand(2)); 1685 case Intrinsic::mips_div_u_b: 1686 case Intrinsic::mips_div_u_h: 1687 case Intrinsic::mips_div_u_w: 1688 case Intrinsic::mips_div_u_d: 1689 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), 1690 Op->getOperand(2)); 1691 case Intrinsic::mips_fadd_w: 1692 case Intrinsic::mips_fadd_d: { 1693 // TODO: If intrinsics have fast-math-flags, propagate them. 1694 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), 1695 Op->getOperand(2)); 1696 } 1697 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away 1698 case Intrinsic::mips_fceq_w: 1699 case Intrinsic::mips_fceq_d: 1700 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1701 Op->getOperand(2), ISD::SETOEQ); 1702 case Intrinsic::mips_fcle_w: 1703 case Intrinsic::mips_fcle_d: 1704 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1705 Op->getOperand(2), ISD::SETOLE); 1706 case Intrinsic::mips_fclt_w: 1707 case Intrinsic::mips_fclt_d: 1708 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1709 Op->getOperand(2), ISD::SETOLT); 1710 case Intrinsic::mips_fcne_w: 1711 case Intrinsic::mips_fcne_d: 1712 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1713 Op->getOperand(2), ISD::SETONE); 1714 case Intrinsic::mips_fcor_w: 1715 case Intrinsic::mips_fcor_d: 1716 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1717 Op->getOperand(2), ISD::SETO); 1718 case Intrinsic::mips_fcueq_w: 1719 case Intrinsic::mips_fcueq_d: 1720 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1721 Op->getOperand(2), ISD::SETUEQ); 1722 case Intrinsic::mips_fcule_w: 1723 case Intrinsic::mips_fcule_d: 1724 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1725 Op->getOperand(2), ISD::SETULE); 1726 case Intrinsic::mips_fcult_w: 1727 case Intrinsic::mips_fcult_d: 1728 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1729 Op->getOperand(2), ISD::SETULT); 1730 case Intrinsic::mips_fcun_w: 1731 case Intrinsic::mips_fcun_d: 1732 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1733 Op->getOperand(2), ISD::SETUO); 1734 case Intrinsic::mips_fcune_w: 1735 case Intrinsic::mips_fcune_d: 1736 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1737 Op->getOperand(2), ISD::SETUNE); 1738 case Intrinsic::mips_fdiv_w: 1739 case Intrinsic::mips_fdiv_d: { 1740 // TODO: If intrinsics have fast-math-flags, propagate them. 1741 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), 1742 Op->getOperand(2)); 1743 } 1744 case Intrinsic::mips_ffint_u_w: 1745 case Intrinsic::mips_ffint_u_d: 1746 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), 1747 Op->getOperand(1)); 1748 case Intrinsic::mips_ffint_s_w: 1749 case Intrinsic::mips_ffint_s_d: 1750 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), 1751 Op->getOperand(1)); 1752 case Intrinsic::mips_fill_b: 1753 case Intrinsic::mips_fill_h: 1754 case Intrinsic::mips_fill_w: 1755 case Intrinsic::mips_fill_d: { 1756 EVT ResTy = Op->getValueType(0); 1757 SmallVector<SDValue, 16> Ops(ResTy.getVectorNumElements(), 1758 Op->getOperand(1)); 1759 1760 // If ResTy is v2i64 then the type legalizer will break this node down into 1761 // an equivalent v4i32. 1762 return DAG.getBuildVector(ResTy, DL, Ops); 1763 } 1764 case Intrinsic::mips_fexp2_w: 1765 case Intrinsic::mips_fexp2_d: { 1766 // TODO: If intrinsics have fast-math-flags, propagate them. 1767 EVT ResTy = Op->getValueType(0); 1768 return DAG.getNode( 1769 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1), 1770 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2))); 1771 } 1772 case Intrinsic::mips_flog2_w: 1773 case Intrinsic::mips_flog2_d: 1774 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1)); 1775 case Intrinsic::mips_fmadd_w: 1776 case Intrinsic::mips_fmadd_d: 1777 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0), 1778 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1779 case Intrinsic::mips_fmul_w: 1780 case Intrinsic::mips_fmul_d: { 1781 // TODO: If intrinsics have fast-math-flags, propagate them. 1782 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), 1783 Op->getOperand(2)); 1784 } 1785 case Intrinsic::mips_fmsub_w: 1786 case Intrinsic::mips_fmsub_d: { 1787 // TODO: If intrinsics have fast-math-flags, propagate them. 1788 EVT ResTy = Op->getValueType(0); 1789 return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1), 1790 DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy, 1791 Op->getOperand(2), Op->getOperand(3))); 1792 } 1793 case Intrinsic::mips_frint_w: 1794 case Intrinsic::mips_frint_d: 1795 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); 1796 case Intrinsic::mips_fsqrt_w: 1797 case Intrinsic::mips_fsqrt_d: 1798 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); 1799 case Intrinsic::mips_fsub_w: 1800 case Intrinsic::mips_fsub_d: { 1801 // TODO: If intrinsics have fast-math-flags, propagate them. 1802 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), 1803 Op->getOperand(2)); 1804 } 1805 case Intrinsic::mips_ftrunc_u_w: 1806 case Intrinsic::mips_ftrunc_u_d: 1807 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), 1808 Op->getOperand(1)); 1809 case Intrinsic::mips_ftrunc_s_w: 1810 case Intrinsic::mips_ftrunc_s_d: 1811 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0), 1812 Op->getOperand(1)); 1813 case Intrinsic::mips_ilvev_b: 1814 case Intrinsic::mips_ilvev_h: 1815 case Intrinsic::mips_ilvev_w: 1816 case Intrinsic::mips_ilvev_d: 1817 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0), 1818 Op->getOperand(1), Op->getOperand(2)); 1819 case Intrinsic::mips_ilvl_b: 1820 case Intrinsic::mips_ilvl_h: 1821 case Intrinsic::mips_ilvl_w: 1822 case Intrinsic::mips_ilvl_d: 1823 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0), 1824 Op->getOperand(1), Op->getOperand(2)); 1825 case Intrinsic::mips_ilvod_b: 1826 case Intrinsic::mips_ilvod_h: 1827 case Intrinsic::mips_ilvod_w: 1828 case Intrinsic::mips_ilvod_d: 1829 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0), 1830 Op->getOperand(1), Op->getOperand(2)); 1831 case Intrinsic::mips_ilvr_b: 1832 case Intrinsic::mips_ilvr_h: 1833 case Intrinsic::mips_ilvr_w: 1834 case Intrinsic::mips_ilvr_d: 1835 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0), 1836 Op->getOperand(1), Op->getOperand(2)); 1837 case Intrinsic::mips_insert_b: 1838 case Intrinsic::mips_insert_h: 1839 case Intrinsic::mips_insert_w: 1840 case Intrinsic::mips_insert_d: 1841 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 1842 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2)); 1843 case Intrinsic::mips_insve_b: 1844 case Intrinsic::mips_insve_h: 1845 case Intrinsic::mips_insve_w: 1846 case Intrinsic::mips_insve_d: { 1847 // Report an error for out of range values. 1848 int64_t Max; 1849 switch (Intrinsic) { 1850 case Intrinsic::mips_insve_b: Max = 15; break; 1851 case Intrinsic::mips_insve_h: Max = 7; break; 1852 case Intrinsic::mips_insve_w: Max = 3; break; 1853 case Intrinsic::mips_insve_d: Max = 1; break; 1854 default: llvm_unreachable("Unmatched intrinsic"); 1855 } 1856 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 1857 if (Value < 0 || Value > Max) 1858 report_fatal_error("Immediate out of range"); 1859 return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0), 1860 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3), 1861 DAG.getConstant(0, DL, MVT::i32)); 1862 } 1863 case Intrinsic::mips_ldi_b: 1864 case Intrinsic::mips_ldi_h: 1865 case Intrinsic::mips_ldi_w: 1866 case Intrinsic::mips_ldi_d: 1867 return lowerMSASplatImm(Op, 1, DAG, true); 1868 case Intrinsic::mips_lsa: 1869 case Intrinsic::mips_dlsa: { 1870 EVT ResTy = Op->getValueType(0); 1871 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1872 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy, 1873 Op->getOperand(2), Op->getOperand(3))); 1874 } 1875 case Intrinsic::mips_maddv_b: 1876 case Intrinsic::mips_maddv_h: 1877 case Intrinsic::mips_maddv_w: 1878 case Intrinsic::mips_maddv_d: { 1879 EVT ResTy = Op->getValueType(0); 1880 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1881 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 1882 Op->getOperand(2), Op->getOperand(3))); 1883 } 1884 case Intrinsic::mips_max_s_b: 1885 case Intrinsic::mips_max_s_h: 1886 case Intrinsic::mips_max_s_w: 1887 case Intrinsic::mips_max_s_d: 1888 return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), 1889 Op->getOperand(1), Op->getOperand(2)); 1890 case Intrinsic::mips_max_u_b: 1891 case Intrinsic::mips_max_u_h: 1892 case Intrinsic::mips_max_u_w: 1893 case Intrinsic::mips_max_u_d: 1894 return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), 1895 Op->getOperand(1), Op->getOperand(2)); 1896 case Intrinsic::mips_maxi_s_b: 1897 case Intrinsic::mips_maxi_s_h: 1898 case Intrinsic::mips_maxi_s_w: 1899 case Intrinsic::mips_maxi_s_d: 1900 return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), 1901 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 1902 case Intrinsic::mips_maxi_u_b: 1903 case Intrinsic::mips_maxi_u_h: 1904 case Intrinsic::mips_maxi_u_w: 1905 case Intrinsic::mips_maxi_u_d: 1906 return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), 1907 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1908 case Intrinsic::mips_min_s_b: 1909 case Intrinsic::mips_min_s_h: 1910 case Intrinsic::mips_min_s_w: 1911 case Intrinsic::mips_min_s_d: 1912 return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), 1913 Op->getOperand(1), Op->getOperand(2)); 1914 case Intrinsic::mips_min_u_b: 1915 case Intrinsic::mips_min_u_h: 1916 case Intrinsic::mips_min_u_w: 1917 case Intrinsic::mips_min_u_d: 1918 return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), 1919 Op->getOperand(1), Op->getOperand(2)); 1920 case Intrinsic::mips_mini_s_b: 1921 case Intrinsic::mips_mini_s_h: 1922 case Intrinsic::mips_mini_s_w: 1923 case Intrinsic::mips_mini_s_d: 1924 return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), 1925 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 1926 case Intrinsic::mips_mini_u_b: 1927 case Intrinsic::mips_mini_u_h: 1928 case Intrinsic::mips_mini_u_w: 1929 case Intrinsic::mips_mini_u_d: 1930 return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), 1931 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1932 case Intrinsic::mips_mod_s_b: 1933 case Intrinsic::mips_mod_s_h: 1934 case Intrinsic::mips_mod_s_w: 1935 case Intrinsic::mips_mod_s_d: 1936 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), 1937 Op->getOperand(2)); 1938 case Intrinsic::mips_mod_u_b: 1939 case Intrinsic::mips_mod_u_h: 1940 case Intrinsic::mips_mod_u_w: 1941 case Intrinsic::mips_mod_u_d: 1942 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), 1943 Op->getOperand(2)); 1944 case Intrinsic::mips_mulv_b: 1945 case Intrinsic::mips_mulv_h: 1946 case Intrinsic::mips_mulv_w: 1947 case Intrinsic::mips_mulv_d: 1948 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), 1949 Op->getOperand(2)); 1950 case Intrinsic::mips_msubv_b: 1951 case Intrinsic::mips_msubv_h: 1952 case Intrinsic::mips_msubv_w: 1953 case Intrinsic::mips_msubv_d: { 1954 EVT ResTy = Op->getValueType(0); 1955 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), 1956 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 1957 Op->getOperand(2), Op->getOperand(3))); 1958 } 1959 case Intrinsic::mips_nlzc_b: 1960 case Intrinsic::mips_nlzc_h: 1961 case Intrinsic::mips_nlzc_w: 1962 case Intrinsic::mips_nlzc_d: 1963 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); 1964 case Intrinsic::mips_nor_v: { 1965 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 1966 Op->getOperand(1), Op->getOperand(2)); 1967 return DAG.getNOT(DL, Res, Res->getValueType(0)); 1968 } 1969 case Intrinsic::mips_nori_b: { 1970 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 1971 Op->getOperand(1), 1972 lowerMSASplatImm(Op, 2, DAG)); 1973 return DAG.getNOT(DL, Res, Res->getValueType(0)); 1974 } 1975 case Intrinsic::mips_or_v: 1976 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), 1977 Op->getOperand(2)); 1978 case Intrinsic::mips_ori_b: 1979 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), 1980 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1981 case Intrinsic::mips_pckev_b: 1982 case Intrinsic::mips_pckev_h: 1983 case Intrinsic::mips_pckev_w: 1984 case Intrinsic::mips_pckev_d: 1985 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0), 1986 Op->getOperand(1), Op->getOperand(2)); 1987 case Intrinsic::mips_pckod_b: 1988 case Intrinsic::mips_pckod_h: 1989 case Intrinsic::mips_pckod_w: 1990 case Intrinsic::mips_pckod_d: 1991 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0), 1992 Op->getOperand(1), Op->getOperand(2)); 1993 case Intrinsic::mips_pcnt_b: 1994 case Intrinsic::mips_pcnt_h: 1995 case Intrinsic::mips_pcnt_w: 1996 case Intrinsic::mips_pcnt_d: 1997 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); 1998 case Intrinsic::mips_sat_s_b: 1999 case Intrinsic::mips_sat_s_h: 2000 case Intrinsic::mips_sat_s_w: 2001 case Intrinsic::mips_sat_s_d: 2002 case Intrinsic::mips_sat_u_b: 2003 case Intrinsic::mips_sat_u_h: 2004 case Intrinsic::mips_sat_u_w: 2005 case Intrinsic::mips_sat_u_d: { 2006 // Report an error for out of range values. 2007 int64_t Max; 2008 switch (Intrinsic) { 2009 case Intrinsic::mips_sat_s_b: 2010 case Intrinsic::mips_sat_u_b: Max = 7; break; 2011 case Intrinsic::mips_sat_s_h: 2012 case Intrinsic::mips_sat_u_h: Max = 15; break; 2013 case Intrinsic::mips_sat_s_w: 2014 case Intrinsic::mips_sat_u_w: Max = 31; break; 2015 case Intrinsic::mips_sat_s_d: 2016 case Intrinsic::mips_sat_u_d: Max = 63; break; 2017 default: llvm_unreachable("Unmatched intrinsic"); 2018 } 2019 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2020 if (Value < 0 || Value > Max) 2021 report_fatal_error("Immediate out of range"); 2022 return SDValue(); 2023 } 2024 case Intrinsic::mips_shf_b: 2025 case Intrinsic::mips_shf_h: 2026 case Intrinsic::mips_shf_w: { 2027 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2028 if (Value < 0 || Value > 255) 2029 report_fatal_error("Immediate out of range"); 2030 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0), 2031 Op->getOperand(2), Op->getOperand(1)); 2032 } 2033 case Intrinsic::mips_sldi_b: 2034 case Intrinsic::mips_sldi_h: 2035 case Intrinsic::mips_sldi_w: 2036 case Intrinsic::mips_sldi_d: { 2037 // Report an error for out of range values. 2038 int64_t Max; 2039 switch (Intrinsic) { 2040 case Intrinsic::mips_sldi_b: Max = 15; break; 2041 case Intrinsic::mips_sldi_h: Max = 7; break; 2042 case Intrinsic::mips_sldi_w: Max = 3; break; 2043 case Intrinsic::mips_sldi_d: Max = 1; break; 2044 default: llvm_unreachable("Unmatched intrinsic"); 2045 } 2046 int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue(); 2047 if (Value < 0 || Value > Max) 2048 report_fatal_error("Immediate out of range"); 2049 return SDValue(); 2050 } 2051 case Intrinsic::mips_sll_b: 2052 case Intrinsic::mips_sll_h: 2053 case Intrinsic::mips_sll_w: 2054 case Intrinsic::mips_sll_d: 2055 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), 2056 truncateVecElts(Op, DAG)); 2057 case Intrinsic::mips_slli_b: 2058 case Intrinsic::mips_slli_h: 2059 case Intrinsic::mips_slli_w: 2060 case Intrinsic::mips_slli_d: 2061 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), 2062 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2063 case Intrinsic::mips_splat_b: 2064 case Intrinsic::mips_splat_h: 2065 case Intrinsic::mips_splat_w: 2066 case Intrinsic::mips_splat_d: 2067 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle 2068 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because 2069 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32. 2070 // Instead we lower to MipsISD::VSHF and match from there. 2071 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2072 lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1), 2073 Op->getOperand(1)); 2074 case Intrinsic::mips_splati_b: 2075 case Intrinsic::mips_splati_h: 2076 case Intrinsic::mips_splati_w: 2077 case Intrinsic::mips_splati_d: 2078 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2079 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), 2080 Op->getOperand(1)); 2081 case Intrinsic::mips_sra_b: 2082 case Intrinsic::mips_sra_h: 2083 case Intrinsic::mips_sra_w: 2084 case Intrinsic::mips_sra_d: 2085 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), 2086 truncateVecElts(Op, DAG)); 2087 case Intrinsic::mips_srai_b: 2088 case Intrinsic::mips_srai_h: 2089 case Intrinsic::mips_srai_w: 2090 case Intrinsic::mips_srai_d: 2091 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), 2092 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2093 case Intrinsic::mips_srari_b: 2094 case Intrinsic::mips_srari_h: 2095 case Intrinsic::mips_srari_w: 2096 case Intrinsic::mips_srari_d: { 2097 // Report an error for out of range values. 2098 int64_t Max; 2099 switch (Intrinsic) { 2100 case Intrinsic::mips_srari_b: Max = 7; break; 2101 case Intrinsic::mips_srari_h: Max = 15; break; 2102 case Intrinsic::mips_srari_w: Max = 31; break; 2103 case Intrinsic::mips_srari_d: Max = 63; break; 2104 default: llvm_unreachable("Unmatched intrinsic"); 2105 } 2106 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2107 if (Value < 0 || Value > Max) 2108 report_fatal_error("Immediate out of range"); 2109 return SDValue(); 2110 } 2111 case Intrinsic::mips_srl_b: 2112 case Intrinsic::mips_srl_h: 2113 case Intrinsic::mips_srl_w: 2114 case Intrinsic::mips_srl_d: 2115 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), 2116 truncateVecElts(Op, DAG)); 2117 case Intrinsic::mips_srli_b: 2118 case Intrinsic::mips_srli_h: 2119 case Intrinsic::mips_srli_w: 2120 case Intrinsic::mips_srli_d: 2121 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), 2122 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2123 case Intrinsic::mips_srlri_b: 2124 case Intrinsic::mips_srlri_h: 2125 case Intrinsic::mips_srlri_w: 2126 case Intrinsic::mips_srlri_d: { 2127 // Report an error for out of range values. 2128 int64_t Max; 2129 switch (Intrinsic) { 2130 case Intrinsic::mips_srlri_b: Max = 7; break; 2131 case Intrinsic::mips_srlri_h: Max = 15; break; 2132 case Intrinsic::mips_srlri_w: Max = 31; break; 2133 case Intrinsic::mips_srlri_d: Max = 63; break; 2134 default: llvm_unreachable("Unmatched intrinsic"); 2135 } 2136 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2137 if (Value < 0 || Value > Max) 2138 report_fatal_error("Immediate out of range"); 2139 return SDValue(); 2140 } 2141 case Intrinsic::mips_subv_b: 2142 case Intrinsic::mips_subv_h: 2143 case Intrinsic::mips_subv_w: 2144 case Intrinsic::mips_subv_d: 2145 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), 2146 Op->getOperand(2)); 2147 case Intrinsic::mips_subvi_b: 2148 case Intrinsic::mips_subvi_h: 2149 case Intrinsic::mips_subvi_w: 2150 case Intrinsic::mips_subvi_d: 2151 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), 2152 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2153 case Intrinsic::mips_vshf_b: 2154 case Intrinsic::mips_vshf_h: 2155 case Intrinsic::mips_vshf_w: 2156 case Intrinsic::mips_vshf_d: 2157 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2158 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 2159 case Intrinsic::mips_xor_v: 2160 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), 2161 Op->getOperand(2)); 2162 case Intrinsic::mips_xori_b: 2163 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), 2164 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2165 case Intrinsic::thread_pointer: { 2166 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2167 return DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT); 2168 } 2169 } 2170 } 2171 2172 static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2173 const MipsSubtarget &Subtarget) { 2174 SDLoc DL(Op); 2175 SDValue ChainIn = Op->getOperand(0); 2176 SDValue Address = Op->getOperand(2); 2177 SDValue Offset = Op->getOperand(3); 2178 EVT ResTy = Op->getValueType(0); 2179 EVT PtrTy = Address->getValueType(0); 2180 2181 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2182 // however takes an i32 signed constant offset. The actual type of the 2183 // intrinsic is a scaled signed i10. 2184 if (Subtarget.isABI_N64()) 2185 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2186 2187 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2188 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), 2189 /* Alignment = */ 16); 2190 } 2191 2192 SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 2193 SelectionDAG &DAG) const { 2194 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2195 switch (Intr) { 2196 default: 2197 return SDValue(); 2198 case Intrinsic::mips_extp: 2199 return lowerDSPIntr(Op, DAG, MipsISD::EXTP); 2200 case Intrinsic::mips_extpdp: 2201 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); 2202 case Intrinsic::mips_extr_w: 2203 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); 2204 case Intrinsic::mips_extr_r_w: 2205 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); 2206 case Intrinsic::mips_extr_rs_w: 2207 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); 2208 case Intrinsic::mips_extr_s_h: 2209 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); 2210 case Intrinsic::mips_mthlip: 2211 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); 2212 case Intrinsic::mips_mulsaq_s_w_ph: 2213 return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); 2214 case Intrinsic::mips_maq_s_w_phl: 2215 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); 2216 case Intrinsic::mips_maq_s_w_phr: 2217 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); 2218 case Intrinsic::mips_maq_sa_w_phl: 2219 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); 2220 case Intrinsic::mips_maq_sa_w_phr: 2221 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); 2222 case Intrinsic::mips_dpaq_s_w_ph: 2223 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); 2224 case Intrinsic::mips_dpsq_s_w_ph: 2225 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); 2226 case Intrinsic::mips_dpaq_sa_l_w: 2227 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); 2228 case Intrinsic::mips_dpsq_sa_l_w: 2229 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); 2230 case Intrinsic::mips_dpaqx_s_w_ph: 2231 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); 2232 case Intrinsic::mips_dpaqx_sa_w_ph: 2233 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); 2234 case Intrinsic::mips_dpsqx_s_w_ph: 2235 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); 2236 case Intrinsic::mips_dpsqx_sa_w_ph: 2237 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); 2238 case Intrinsic::mips_ld_b: 2239 case Intrinsic::mips_ld_h: 2240 case Intrinsic::mips_ld_w: 2241 case Intrinsic::mips_ld_d: 2242 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget); 2243 } 2244 } 2245 2246 static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2247 const MipsSubtarget &Subtarget) { 2248 SDLoc DL(Op); 2249 SDValue ChainIn = Op->getOperand(0); 2250 SDValue Value = Op->getOperand(2); 2251 SDValue Address = Op->getOperand(3); 2252 SDValue Offset = Op->getOperand(4); 2253 EVT PtrTy = Address->getValueType(0); 2254 2255 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2256 // however takes an i32 signed constant offset. The actual type of the 2257 // intrinsic is a scaled signed i10. 2258 if (Subtarget.isABI_N64()) 2259 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2260 2261 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2262 2263 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), 2264 /* Alignment = */ 16); 2265 } 2266 2267 SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, 2268 SelectionDAG &DAG) const { 2269 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2270 switch (Intr) { 2271 default: 2272 return SDValue(); 2273 case Intrinsic::mips_st_b: 2274 case Intrinsic::mips_st_h: 2275 case Intrinsic::mips_st_w: 2276 case Intrinsic::mips_st_d: 2277 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget); 2278 } 2279 } 2280 2281 /// \brief Check if the given BuildVectorSDNode is a splat. 2282 /// This method currently relies on DAG nodes being reused when equivalent, 2283 /// so it's possible for this to return false even when isConstantSplat returns 2284 /// true. 2285 static bool isSplatVector(const BuildVectorSDNode *N) { 2286 unsigned int nOps = N->getNumOperands(); 2287 assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector"); 2288 2289 SDValue Operand0 = N->getOperand(0); 2290 2291 for (unsigned int i = 1; i < nOps; ++i) { 2292 if (N->getOperand(i) != Operand0) 2293 return false; 2294 } 2295 2296 return true; 2297 } 2298 2299 // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. 2300 // 2301 // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We 2302 // choose to sign-extend but we could have equally chosen zero-extend. The 2303 // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT 2304 // result into this node later (possibly changing it to a zero-extend in the 2305 // process). 2306 SDValue MipsSETargetLowering:: 2307 lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { 2308 SDLoc DL(Op); 2309 EVT ResTy = Op->getValueType(0); 2310 SDValue Op0 = Op->getOperand(0); 2311 EVT VecTy = Op0->getValueType(0); 2312 2313 if (!VecTy.is128BitVector()) 2314 return SDValue(); 2315 2316 if (ResTy.isInteger()) { 2317 SDValue Op1 = Op->getOperand(1); 2318 EVT EltTy = VecTy.getVectorElementType(); 2319 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, 2320 DAG.getValueType(EltTy)); 2321 } 2322 2323 return Op; 2324 } 2325 2326 static bool isConstantOrUndef(const SDValue Op) { 2327 if (Op->isUndef()) 2328 return true; 2329 if (isa<ConstantSDNode>(Op)) 2330 return true; 2331 if (isa<ConstantFPSDNode>(Op)) 2332 return true; 2333 return false; 2334 } 2335 2336 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 2337 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 2338 if (isConstantOrUndef(Op->getOperand(i))) 2339 return true; 2340 return false; 2341 } 2342 2343 // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the 2344 // backend. 2345 // 2346 // Lowers according to the following rules: 2347 // - Constant splats are legal as-is as long as the SplatBitSize is a power of 2348 // 2 less than or equal to 64 and the value fits into a signed 10-bit 2349 // immediate 2350 // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize 2351 // is a power of 2 less than or equal to 64 and the value does not fit into a 2352 // signed 10-bit immediate 2353 // - Non-constant splats are legal as-is. 2354 // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. 2355 // - All others are illegal and must be expanded. 2356 SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, 2357 SelectionDAG &DAG) const { 2358 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 2359 EVT ResTy = Op->getValueType(0); 2360 SDLoc DL(Op); 2361 APInt SplatValue, SplatUndef; 2362 unsigned SplatBitSize; 2363 bool HasAnyUndefs; 2364 2365 if (!Subtarget.hasMSA() || !ResTy.is128BitVector()) 2366 return SDValue(); 2367 2368 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 2369 HasAnyUndefs, 8, 2370 !Subtarget.isLittle()) && SplatBitSize <= 64) { 2371 // We can only cope with 8, 16, 32, or 64-bit elements 2372 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 2373 SplatBitSize != 64) 2374 return SDValue(); 2375 2376 // If the value isn't an integer type we will have to bitcast 2377 // from an integer type first. Also, if there are any undefs, we must 2378 // lower them to defined values first. 2379 if (ResTy.isInteger() && !HasAnyUndefs) 2380 return Op; 2381 2382 EVT ViaVecTy; 2383 2384 switch (SplatBitSize) { 2385 default: 2386 return SDValue(); 2387 case 8: 2388 ViaVecTy = MVT::v16i8; 2389 break; 2390 case 16: 2391 ViaVecTy = MVT::v8i16; 2392 break; 2393 case 32: 2394 ViaVecTy = MVT::v4i32; 2395 break; 2396 case 64: 2397 // There's no fill.d to fall back on for 64-bit values 2398 return SDValue(); 2399 } 2400 2401 // SelectionDAG::getConstant will promote SplatValue appropriately. 2402 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); 2403 2404 // Bitcast to the type we originally wanted 2405 if (ViaVecTy != ResTy) 2406 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 2407 2408 return Result; 2409 } else if (isSplatVector(Node)) 2410 return Op; 2411 else if (!isConstantOrUndefBUILD_VECTOR(Node)) { 2412 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 2413 // The resulting code is the same length as the expansion, but it doesn't 2414 // use memory operations 2415 EVT ResTy = Node->getValueType(0); 2416 2417 assert(ResTy.isVector()); 2418 2419 unsigned NumElts = ResTy.getVectorNumElements(); 2420 SDValue Vector = DAG.getUNDEF(ResTy); 2421 for (unsigned i = 0; i < NumElts; ++i) { 2422 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 2423 Node->getOperand(i), 2424 DAG.getConstant(i, DL, MVT::i32)); 2425 } 2426 return Vector; 2427 } 2428 2429 return SDValue(); 2430 } 2431 2432 // Lower VECTOR_SHUFFLE into SHF (if possible). 2433 // 2434 // SHF splits the vector into blocks of four elements, then shuffles these 2435 // elements according to a <4 x i2> constant (encoded as an integer immediate). 2436 // 2437 // It is therefore possible to lower into SHF when the mask takes the form: 2438 // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> 2439 // When undef's appear they are treated as if they were whatever value is 2440 // necessary in order to fit the above forms. 2441 // 2442 // For example: 2443 // %2 = shufflevector <8 x i16> %0, <8 x i16> undef, 2444 // <8 x i32> <i32 3, i32 2, i32 1, i32 0, 2445 // i32 7, i32 6, i32 5, i32 4> 2446 // is lowered to: 2447 // (SHF_H $w0, $w1, 27) 2448 // where the 27 comes from: 2449 // 3 + (2 << 2) + (1 << 4) + (0 << 6) 2450 static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, 2451 SmallVector<int, 16> Indices, 2452 SelectionDAG &DAG) { 2453 int SHFIndices[4] = { -1, -1, -1, -1 }; 2454 2455 if (Indices.size() < 4) 2456 return SDValue(); 2457 2458 for (unsigned i = 0; i < 4; ++i) { 2459 for (unsigned j = i; j < Indices.size(); j += 4) { 2460 int Idx = Indices[j]; 2461 2462 // Convert from vector index to 4-element subvector index 2463 // If an index refers to an element outside of the subvector then give up 2464 if (Idx != -1) { 2465 Idx -= 4 * (j / 4); 2466 if (Idx < 0 || Idx >= 4) 2467 return SDValue(); 2468 } 2469 2470 // If the mask has an undef, replace it with the current index. 2471 // Note that it might still be undef if the current index is also undef 2472 if (SHFIndices[i] == -1) 2473 SHFIndices[i] = Idx; 2474 2475 // Check that non-undef values are the same as in the mask. If they 2476 // aren't then give up 2477 if (!(Idx == -1 || Idx == SHFIndices[i])) 2478 return SDValue(); 2479 } 2480 } 2481 2482 // Calculate the immediate. Replace any remaining undefs with zero 2483 APInt Imm(32, 0); 2484 for (int i = 3; i >= 0; --i) { 2485 int Idx = SHFIndices[i]; 2486 2487 if (Idx == -1) 2488 Idx = 0; 2489 2490 Imm <<= 2; 2491 Imm |= Idx & 0x3; 2492 } 2493 2494 SDLoc DL(Op); 2495 return DAG.getNode(MipsISD::SHF, DL, ResTy, 2496 DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0)); 2497 } 2498 2499 /// Determine whether a range fits a regular pattern of values. 2500 /// This function accounts for the possibility of jumping over the End iterator. 2501 template <typename ValType> 2502 static bool 2503 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, 2504 unsigned CheckStride, 2505 typename SmallVectorImpl<ValType>::const_iterator End, 2506 ValType ExpectedIndex, unsigned ExpectedIndexStride) { 2507 auto &I = Begin; 2508 2509 while (I != End) { 2510 if (*I != -1 && *I != ExpectedIndex) 2511 return false; 2512 ExpectedIndex += ExpectedIndexStride; 2513 2514 // Incrementing past End is undefined behaviour so we must increment one 2515 // step at a time and check for End at each step. 2516 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) 2517 ; // Empty loop body. 2518 } 2519 return true; 2520 } 2521 2522 // Determine whether VECTOR_SHUFFLE is a SPLATI. 2523 // 2524 // It is a SPLATI when the mask is: 2525 // <x, x, x, ...> 2526 // where x is any valid index. 2527 // 2528 // When undef's appear in the mask they are treated as if they were whatever 2529 // value is necessary in order to fit the above form. 2530 static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, 2531 SmallVector<int, 16> Indices, 2532 SelectionDAG &DAG) { 2533 assert((Indices.size() % 2) == 0); 2534 2535 int SplatIndex = -1; 2536 for (const auto &V : Indices) { 2537 if (V != -1) { 2538 SplatIndex = V; 2539 break; 2540 } 2541 } 2542 2543 return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex, 2544 0); 2545 } 2546 2547 // Lower VECTOR_SHUFFLE into ILVEV (if possible). 2548 // 2549 // ILVEV interleaves the even elements from each vector. 2550 // 2551 // It is possible to lower into ILVEV when the mask consists of two of the 2552 // following forms interleaved: 2553 // <0, 2, 4, ...> 2554 // <n, n+2, n+4, ...> 2555 // where n is the number of elements in the vector. 2556 // For example: 2557 // <0, 0, 2, 2, 4, 4, ...> 2558 // <0, n, 2, n+2, 4, n+4, ...> 2559 // 2560 // When undef's appear in the mask they are treated as if they were whatever 2561 // value is necessary in order to fit the above forms. 2562 static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, 2563 SmallVector<int, 16> Indices, 2564 SelectionDAG &DAG) { 2565 assert((Indices.size() % 2) == 0); 2566 2567 SDValue Wt; 2568 SDValue Ws; 2569 const auto &Begin = Indices.begin(); 2570 const auto &End = Indices.end(); 2571 2572 // Check even elements are taken from the even elements of one half or the 2573 // other and pick an operand accordingly. 2574 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2)) 2575 Wt = Op->getOperand(0); 2576 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2)) 2577 Wt = Op->getOperand(1); 2578 else 2579 return SDValue(); 2580 2581 // Check odd elements are taken from the even elements of one half or the 2582 // other and pick an operand accordingly. 2583 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2)) 2584 Ws = Op->getOperand(0); 2585 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2)) 2586 Ws = Op->getOperand(1); 2587 else 2588 return SDValue(); 2589 2590 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt); 2591 } 2592 2593 // Lower VECTOR_SHUFFLE into ILVOD (if possible). 2594 // 2595 // ILVOD interleaves the odd elements from each vector. 2596 // 2597 // It is possible to lower into ILVOD when the mask consists of two of the 2598 // following forms interleaved: 2599 // <1, 3, 5, ...> 2600 // <n+1, n+3, n+5, ...> 2601 // where n is the number of elements in the vector. 2602 // For example: 2603 // <1, 1, 3, 3, 5, 5, ...> 2604 // <1, n+1, 3, n+3, 5, n+5, ...> 2605 // 2606 // When undef's appear in the mask they are treated as if they were whatever 2607 // value is necessary in order to fit the above forms. 2608 static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, 2609 SmallVector<int, 16> Indices, 2610 SelectionDAG &DAG) { 2611 assert((Indices.size() % 2) == 0); 2612 2613 SDValue Wt; 2614 SDValue Ws; 2615 const auto &Begin = Indices.begin(); 2616 const auto &End = Indices.end(); 2617 2618 // Check even elements are taken from the odd elements of one half or the 2619 // other and pick an operand accordingly. 2620 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2)) 2621 Wt = Op->getOperand(0); 2622 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2)) 2623 Wt = Op->getOperand(1); 2624 else 2625 return SDValue(); 2626 2627 // Check odd elements are taken from the odd elements of one half or the 2628 // other and pick an operand accordingly. 2629 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2)) 2630 Ws = Op->getOperand(0); 2631 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2)) 2632 Ws = Op->getOperand(1); 2633 else 2634 return SDValue(); 2635 2636 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Wt, Ws); 2637 } 2638 2639 // Lower VECTOR_SHUFFLE into ILVR (if possible). 2640 // 2641 // ILVR interleaves consecutive elements from the right (lowest-indexed) half of 2642 // each vector. 2643 // 2644 // It is possible to lower into ILVR when the mask consists of two of the 2645 // following forms interleaved: 2646 // <0, 1, 2, ...> 2647 // <n, n+1, n+2, ...> 2648 // where n is the number of elements in the vector. 2649 // For example: 2650 // <0, 0, 1, 1, 2, 2, ...> 2651 // <0, n, 1, n+1, 2, n+2, ...> 2652 // 2653 // When undef's appear in the mask they are treated as if they were whatever 2654 // value is necessary in order to fit the above forms. 2655 static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, 2656 SmallVector<int, 16> Indices, 2657 SelectionDAG &DAG) { 2658 assert((Indices.size() % 2) == 0); 2659 2660 SDValue Wt; 2661 SDValue Ws; 2662 const auto &Begin = Indices.begin(); 2663 const auto &End = Indices.end(); 2664 2665 // Check even elements are taken from the right (lowest-indexed) elements of 2666 // one half or the other and pick an operand accordingly. 2667 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1)) 2668 Wt = Op->getOperand(0); 2669 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1)) 2670 Wt = Op->getOperand(1); 2671 else 2672 return SDValue(); 2673 2674 // Check odd elements are taken from the right (lowest-indexed) elements of 2675 // one half or the other and pick an operand accordingly. 2676 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1)) 2677 Ws = Op->getOperand(0); 2678 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1)) 2679 Ws = Op->getOperand(1); 2680 else 2681 return SDValue(); 2682 2683 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt); 2684 } 2685 2686 // Lower VECTOR_SHUFFLE into ILVL (if possible). 2687 // 2688 // ILVL interleaves consecutive elements from the left (highest-indexed) half 2689 // of each vector. 2690 // 2691 // It is possible to lower into ILVL when the mask consists of two of the 2692 // following forms interleaved: 2693 // <x, x+1, x+2, ...> 2694 // <n+x, n+x+1, n+x+2, ...> 2695 // where n is the number of elements in the vector and x is half n. 2696 // For example: 2697 // <x, x, x+1, x+1, x+2, x+2, ...> 2698 // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> 2699 // 2700 // When undef's appear in the mask they are treated as if they were whatever 2701 // value is necessary in order to fit the above forms. 2702 static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, 2703 SmallVector<int, 16> Indices, 2704 SelectionDAG &DAG) { 2705 assert((Indices.size() % 2) == 0); 2706 2707 unsigned HalfSize = Indices.size() / 2; 2708 SDValue Wt; 2709 SDValue Ws; 2710 const auto &Begin = Indices.begin(); 2711 const auto &End = Indices.end(); 2712 2713 // Check even elements are taken from the left (highest-indexed) elements of 2714 // one half or the other and pick an operand accordingly. 2715 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1)) 2716 Wt = Op->getOperand(0); 2717 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1)) 2718 Wt = Op->getOperand(1); 2719 else 2720 return SDValue(); 2721 2722 // Check odd elements are taken from the left (highest-indexed) elements of 2723 // one half or the other and pick an operand accordingly. 2724 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1)) 2725 Ws = Op->getOperand(0); 2726 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize, 2727 1)) 2728 Ws = Op->getOperand(1); 2729 else 2730 return SDValue(); 2731 2732 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt); 2733 } 2734 2735 // Lower VECTOR_SHUFFLE into PCKEV (if possible). 2736 // 2737 // PCKEV copies the even elements of each vector into the result vector. 2738 // 2739 // It is possible to lower into PCKEV when the mask consists of two of the 2740 // following forms concatenated: 2741 // <0, 2, 4, ...> 2742 // <n, n+2, n+4, ...> 2743 // where n is the number of elements in the vector. 2744 // For example: 2745 // <0, 2, 4, ..., 0, 2, 4, ...> 2746 // <0, 2, 4, ..., n, n+2, n+4, ...> 2747 // 2748 // When undef's appear in the mask they are treated as if they were whatever 2749 // value is necessary in order to fit the above forms. 2750 static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, 2751 SmallVector<int, 16> Indices, 2752 SelectionDAG &DAG) { 2753 assert((Indices.size() % 2) == 0); 2754 2755 SDValue Wt; 2756 SDValue Ws; 2757 const auto &Begin = Indices.begin(); 2758 const auto &Mid = Indices.begin() + Indices.size() / 2; 2759 const auto &End = Indices.end(); 2760 2761 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2)) 2762 Wt = Op->getOperand(0); 2763 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2)) 2764 Wt = Op->getOperand(1); 2765 else 2766 return SDValue(); 2767 2768 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2)) 2769 Ws = Op->getOperand(0); 2770 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2)) 2771 Ws = Op->getOperand(1); 2772 else 2773 return SDValue(); 2774 2775 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt); 2776 } 2777 2778 // Lower VECTOR_SHUFFLE into PCKOD (if possible). 2779 // 2780 // PCKOD copies the odd elements of each vector into the result vector. 2781 // 2782 // It is possible to lower into PCKOD when the mask consists of two of the 2783 // following forms concatenated: 2784 // <1, 3, 5, ...> 2785 // <n+1, n+3, n+5, ...> 2786 // where n is the number of elements in the vector. 2787 // For example: 2788 // <1, 3, 5, ..., 1, 3, 5, ...> 2789 // <1, 3, 5, ..., n+1, n+3, n+5, ...> 2790 // 2791 // When undef's appear in the mask they are treated as if they were whatever 2792 // value is necessary in order to fit the above forms. 2793 static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, 2794 SmallVector<int, 16> Indices, 2795 SelectionDAG &DAG) { 2796 assert((Indices.size() % 2) == 0); 2797 2798 SDValue Wt; 2799 SDValue Ws; 2800 const auto &Begin = Indices.begin(); 2801 const auto &Mid = Indices.begin() + Indices.size() / 2; 2802 const auto &End = Indices.end(); 2803 2804 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2)) 2805 Wt = Op->getOperand(0); 2806 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2)) 2807 Wt = Op->getOperand(1); 2808 else 2809 return SDValue(); 2810 2811 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2)) 2812 Ws = Op->getOperand(0); 2813 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2)) 2814 Ws = Op->getOperand(1); 2815 else 2816 return SDValue(); 2817 2818 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt); 2819 } 2820 2821 // Lower VECTOR_SHUFFLE into VSHF. 2822 // 2823 // This mostly consists of converting the shuffle indices in Indices into a 2824 // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is 2825 // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, 2826 // if the type is v8i16 and all the indices are less than 8 then the second 2827 // operand is unused and can be replaced with anything. We choose to replace it 2828 // with the used operand since this reduces the number of instructions overall. 2829 static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, 2830 SmallVector<int, 16> Indices, 2831 SelectionDAG &DAG) { 2832 SmallVector<SDValue, 16> Ops; 2833 SDValue Op0; 2834 SDValue Op1; 2835 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); 2836 EVT MaskEltTy = MaskVecTy.getVectorElementType(); 2837 bool Using1stVec = false; 2838 bool Using2ndVec = false; 2839 SDLoc DL(Op); 2840 int ResTyNumElts = ResTy.getVectorNumElements(); 2841 2842 for (int i = 0; i < ResTyNumElts; ++i) { 2843 // Idx == -1 means UNDEF 2844 int Idx = Indices[i]; 2845 2846 if (0 <= Idx && Idx < ResTyNumElts) 2847 Using1stVec = true; 2848 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) 2849 Using2ndVec = true; 2850 } 2851 2852 for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end(); 2853 ++I) 2854 Ops.push_back(DAG.getTargetConstant(*I, DL, MaskEltTy)); 2855 2856 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); 2857 2858 if (Using1stVec && Using2ndVec) { 2859 Op0 = Op->getOperand(0); 2860 Op1 = Op->getOperand(1); 2861 } else if (Using1stVec) 2862 Op0 = Op1 = Op->getOperand(0); 2863 else if (Using2ndVec) 2864 Op0 = Op1 = Op->getOperand(1); 2865 else 2866 llvm_unreachable("shuffle vector mask references neither vector operand?"); 2867 2868 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. 2869 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> 2870 // VSHF concatenates the vectors in a bitwise fashion: 2871 // <0b00, 0b01> + <0b10, 0b11> -> 2872 // 0b0100 + 0b1110 -> 0b01001110 2873 // <0b10, 0b11, 0b00, 0b01> 2874 // We must therefore swap the operands to get the correct result. 2875 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0); 2876 } 2877 2878 // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the 2879 // indices in the shuffle. 2880 SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 2881 SelectionDAG &DAG) const { 2882 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op); 2883 EVT ResTy = Op->getValueType(0); 2884 2885 if (!ResTy.is128BitVector()) 2886 return SDValue(); 2887 2888 int ResTyNumElts = ResTy.getVectorNumElements(); 2889 SmallVector<int, 16> Indices; 2890 2891 for (int i = 0; i < ResTyNumElts; ++i) 2892 Indices.push_back(Node->getMaskElt(i)); 2893 2894 // splati.[bhwd] is preferable to the others but is matched from 2895 // MipsISD::VSHF. 2896 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG)) 2897 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 2898 SDValue Result; 2899 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG))) 2900 return Result; 2901 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG))) 2902 return Result; 2903 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG))) 2904 return Result; 2905 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG))) 2906 return Result; 2907 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG))) 2908 return Result; 2909 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG))) 2910 return Result; 2911 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG))) 2912 return Result; 2913 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 2914 } 2915 2916 MachineBasicBlock * 2917 MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI, 2918 MachineBasicBlock *BB) const { 2919 // $bb: 2920 // bposge32_pseudo $vr0 2921 // => 2922 // $bb: 2923 // bposge32 $tbb 2924 // $fbb: 2925 // li $vr2, 0 2926 // b $sink 2927 // $tbb: 2928 // li $vr1, 1 2929 // $sink: 2930 // $vr0 = phi($vr2, $fbb, $vr1, $tbb) 2931 2932 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2933 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 2934 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 2935 DebugLoc DL = MI.getDebugLoc(); 2936 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2937 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 2938 MachineFunction *F = BB->getParent(); 2939 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 2940 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 2941 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 2942 F->insert(It, FBB); 2943 F->insert(It, TBB); 2944 F->insert(It, Sink); 2945 2946 // Transfer the remainder of BB and its successor edges to Sink. 2947 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 2948 BB->end()); 2949 Sink->transferSuccessorsAndUpdatePHIs(BB); 2950 2951 // Add successors. 2952 BB->addSuccessor(FBB); 2953 BB->addSuccessor(TBB); 2954 FBB->addSuccessor(Sink); 2955 TBB->addSuccessor(Sink); 2956 2957 // Insert the real bposge32 instruction to $BB. 2958 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); 2959 // Insert the real bposge32c instruction to $BB. 2960 BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB); 2961 2962 // Fill $FBB. 2963 unsigned VR2 = RegInfo.createVirtualRegister(RC); 2964 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) 2965 .addReg(Mips::ZERO).addImm(0); 2966 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 2967 2968 // Fill $TBB. 2969 unsigned VR1 = RegInfo.createVirtualRegister(RC); 2970 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) 2971 .addReg(Mips::ZERO).addImm(1); 2972 2973 // Insert phi function to $Sink. 2974 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 2975 MI.getOperand(0).getReg()) 2976 .addReg(VR2) 2977 .addMBB(FBB) 2978 .addReg(VR1) 2979 .addMBB(TBB); 2980 2981 MI.eraseFromParent(); // The pseudo instruction is gone now. 2982 return Sink; 2983 } 2984 2985 MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo( 2986 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { 2987 // $bb: 2988 // vany_nonzero $rd, $ws 2989 // => 2990 // $bb: 2991 // bnz.b $ws, $tbb 2992 // b $fbb 2993 // $fbb: 2994 // li $rd1, 0 2995 // b $sink 2996 // $tbb: 2997 // li $rd2, 1 2998 // $sink: 2999 // $rd = phi($rd1, $fbb, $rd2, $tbb) 3000 3001 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3002 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3003 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3004 DebugLoc DL = MI.getDebugLoc(); 3005 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3006 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3007 MachineFunction *F = BB->getParent(); 3008 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3009 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3010 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3011 F->insert(It, FBB); 3012 F->insert(It, TBB); 3013 F->insert(It, Sink); 3014 3015 // Transfer the remainder of BB and its successor edges to Sink. 3016 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3017 BB->end()); 3018 Sink->transferSuccessorsAndUpdatePHIs(BB); 3019 3020 // Add successors. 3021 BB->addSuccessor(FBB); 3022 BB->addSuccessor(TBB); 3023 FBB->addSuccessor(Sink); 3024 TBB->addSuccessor(Sink); 3025 3026 // Insert the real bnz.b instruction to $BB. 3027 BuildMI(BB, DL, TII->get(BranchOp)) 3028 .addReg(MI.getOperand(1).getReg()) 3029 .addMBB(TBB); 3030 3031 // Fill $FBB. 3032 unsigned RD1 = RegInfo.createVirtualRegister(RC); 3033 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) 3034 .addReg(Mips::ZERO).addImm(0); 3035 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3036 3037 // Fill $TBB. 3038 unsigned RD2 = RegInfo.createVirtualRegister(RC); 3039 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) 3040 .addReg(Mips::ZERO).addImm(1); 3041 3042 // Insert phi function to $Sink. 3043 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3044 MI.getOperand(0).getReg()) 3045 .addReg(RD1) 3046 .addMBB(FBB) 3047 .addReg(RD2) 3048 .addMBB(TBB); 3049 3050 MI.eraseFromParent(); // The pseudo instruction is gone now. 3051 return Sink; 3052 } 3053 3054 // Emit the COPY_FW pseudo instruction. 3055 // 3056 // copy_fw_pseudo $fd, $ws, n 3057 // => 3058 // copy_u_w $rt, $ws, $n 3059 // mtc1 $rt, $fd 3060 // 3061 // When n is zero, the equivalent operation can be performed with (potentially) 3062 // zero instructions due to register overlaps. This optimization is never valid 3063 // for lane 1 because it would require FR=0 mode which isn't supported by MSA. 3064 MachineBasicBlock * 3065 MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI, 3066 MachineBasicBlock *BB) const { 3067 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3068 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3069 DebugLoc DL = MI.getDebugLoc(); 3070 unsigned Fd = MI.getOperand(0).getReg(); 3071 unsigned Ws = MI.getOperand(1).getReg(); 3072 unsigned Lane = MI.getOperand(2).getImm(); 3073 3074 if (Lane == 0) { 3075 unsigned Wt = Ws; 3076 if (!Subtarget.useOddSPReg()) { 3077 // We must copy to an even-numbered MSA register so that the 3078 // single-precision sub-register is also guaranteed to be even-numbered. 3079 Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass); 3080 3081 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws); 3082 } 3083 3084 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3085 } else { 3086 unsigned Wt = RegInfo.createVirtualRegister( 3087 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : 3088 &Mips::MSA128WEvensRegClass); 3089 3090 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); 3091 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3092 } 3093 3094 MI.eraseFromParent(); // The pseudo instruction is gone now. 3095 return BB; 3096 } 3097 3098 // Emit the COPY_FD pseudo instruction. 3099 // 3100 // copy_fd_pseudo $fd, $ws, n 3101 // => 3102 // splati.d $wt, $ws, $n 3103 // copy $fd, $wt:sub_64 3104 // 3105 // When n is zero, the equivalent operation can be performed with (potentially) 3106 // zero instructions due to register overlaps. This optimization is always 3107 // valid because FR=1 mode which is the only supported mode in MSA. 3108 MachineBasicBlock * 3109 MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI, 3110 MachineBasicBlock *BB) const { 3111 assert(Subtarget.isFP64bit()); 3112 3113 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3114 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3115 unsigned Fd = MI.getOperand(0).getReg(); 3116 unsigned Ws = MI.getOperand(1).getReg(); 3117 unsigned Lane = MI.getOperand(2).getImm() * 2; 3118 DebugLoc DL = MI.getDebugLoc(); 3119 3120 if (Lane == 0) 3121 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64); 3122 else { 3123 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3124 3125 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1); 3126 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64); 3127 } 3128 3129 MI.eraseFromParent(); // The pseudo instruction is gone now. 3130 return BB; 3131 } 3132 3133 // Emit the INSERT_FW pseudo instruction. 3134 // 3135 // insert_fw_pseudo $wd, $wd_in, $n, $fs 3136 // => 3137 // subreg_to_reg $wt:sub_lo, $fs 3138 // insve_w $wd[$n], $wd_in, $wt[0] 3139 MachineBasicBlock * 3140 MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI, 3141 MachineBasicBlock *BB) const { 3142 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3143 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3144 DebugLoc DL = MI.getDebugLoc(); 3145 unsigned Wd = MI.getOperand(0).getReg(); 3146 unsigned Wd_in = MI.getOperand(1).getReg(); 3147 unsigned Lane = MI.getOperand(2).getImm(); 3148 unsigned Fs = MI.getOperand(3).getReg(); 3149 unsigned Wt = RegInfo.createVirtualRegister( 3150 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : 3151 &Mips::MSA128WEvensRegClass); 3152 3153 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3154 .addImm(0) 3155 .addReg(Fs) 3156 .addImm(Mips::sub_lo); 3157 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd) 3158 .addReg(Wd_in) 3159 .addImm(Lane) 3160 .addReg(Wt) 3161 .addImm(0); 3162 3163 MI.eraseFromParent(); // The pseudo instruction is gone now. 3164 return BB; 3165 } 3166 3167 // Emit the INSERT_FD pseudo instruction. 3168 // 3169 // insert_fd_pseudo $wd, $fs, n 3170 // => 3171 // subreg_to_reg $wt:sub_64, $fs 3172 // insve_d $wd[$n], $wd_in, $wt[0] 3173 MachineBasicBlock * 3174 MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI, 3175 MachineBasicBlock *BB) const { 3176 assert(Subtarget.isFP64bit()); 3177 3178 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3179 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3180 DebugLoc DL = MI.getDebugLoc(); 3181 unsigned Wd = MI.getOperand(0).getReg(); 3182 unsigned Wd_in = MI.getOperand(1).getReg(); 3183 unsigned Lane = MI.getOperand(2).getImm(); 3184 unsigned Fs = MI.getOperand(3).getReg(); 3185 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3186 3187 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3188 .addImm(0) 3189 .addReg(Fs) 3190 .addImm(Mips::sub_64); 3191 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd) 3192 .addReg(Wd_in) 3193 .addImm(Lane) 3194 .addReg(Wt) 3195 .addImm(0); 3196 3197 MI.eraseFromParent(); // The pseudo instruction is gone now. 3198 return BB; 3199 } 3200 3201 // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. 3202 // 3203 // For integer: 3204 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) 3205 // => 3206 // (SLL $lanetmp1, $lane, <log2size) 3207 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3208 // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs) 3209 // (NEG $lanetmp2, $lanetmp1) 3210 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3211 // 3212 // For floating point: 3213 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs) 3214 // => 3215 // (SUBREG_TO_REG $wt, $fs, <subreg>) 3216 // (SLL $lanetmp1, $lane, <log2size) 3217 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3218 // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0) 3219 // (NEG $lanetmp2, $lanetmp1) 3220 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3221 MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( 3222 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes, 3223 bool IsFP) const { 3224 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3225 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3226 DebugLoc DL = MI.getDebugLoc(); 3227 unsigned Wd = MI.getOperand(0).getReg(); 3228 unsigned SrcVecReg = MI.getOperand(1).getReg(); 3229 unsigned LaneReg = MI.getOperand(2).getReg(); 3230 unsigned SrcValReg = MI.getOperand(3).getReg(); 3231 3232 const TargetRegisterClass *VecRC = nullptr; 3233 // FIXME: This should be true for N32 too. 3234 const TargetRegisterClass *GPRRC = 3235 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3236 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0; 3237 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL; 3238 unsigned EltLog2Size; 3239 unsigned InsertOp = 0; 3240 unsigned InsveOp = 0; 3241 switch (EltSizeInBytes) { 3242 default: 3243 llvm_unreachable("Unexpected size"); 3244 case 1: 3245 EltLog2Size = 0; 3246 InsertOp = Mips::INSERT_B; 3247 InsveOp = Mips::INSVE_B; 3248 VecRC = &Mips::MSA128BRegClass; 3249 break; 3250 case 2: 3251 EltLog2Size = 1; 3252 InsertOp = Mips::INSERT_H; 3253 InsveOp = Mips::INSVE_H; 3254 VecRC = &Mips::MSA128HRegClass; 3255 break; 3256 case 4: 3257 EltLog2Size = 2; 3258 InsertOp = Mips::INSERT_W; 3259 InsveOp = Mips::INSVE_W; 3260 VecRC = &Mips::MSA128WRegClass; 3261 break; 3262 case 8: 3263 EltLog2Size = 3; 3264 InsertOp = Mips::INSERT_D; 3265 InsveOp = Mips::INSVE_D; 3266 VecRC = &Mips::MSA128DRegClass; 3267 break; 3268 } 3269 3270 if (IsFP) { 3271 unsigned Wt = RegInfo.createVirtualRegister(VecRC); 3272 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3273 .addImm(0) 3274 .addReg(SrcValReg) 3275 .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo); 3276 SrcValReg = Wt; 3277 } 3278 3279 // Convert the lane index into a byte index 3280 if (EltSizeInBytes != 1) { 3281 unsigned LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); 3282 BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1) 3283 .addReg(LaneReg) 3284 .addImm(EltLog2Size); 3285 LaneReg = LaneTmp1; 3286 } 3287 3288 // Rotate bytes around so that the desired lane is element zero 3289 unsigned WdTmp1 = RegInfo.createVirtualRegister(VecRC); 3290 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1) 3291 .addReg(SrcVecReg) 3292 .addReg(SrcVecReg) 3293 .addReg(LaneReg, 0, SubRegIdx); 3294 3295 unsigned WdTmp2 = RegInfo.createVirtualRegister(VecRC); 3296 if (IsFP) { 3297 // Use insve.df to insert to element zero 3298 BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2) 3299 .addReg(WdTmp1) 3300 .addImm(0) 3301 .addReg(SrcValReg) 3302 .addImm(0); 3303 } else { 3304 // Use insert.df to insert to element zero 3305 BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2) 3306 .addReg(WdTmp1) 3307 .addReg(SrcValReg) 3308 .addImm(0); 3309 } 3310 3311 // Rotate elements the rest of the way for a full rotation. 3312 // sld.df inteprets $rt modulo the number of columns so we only need to negate 3313 // the lane index to do this. 3314 unsigned LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); 3315 BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB), 3316 LaneTmp2) 3317 .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO) 3318 .addReg(LaneReg); 3319 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd) 3320 .addReg(WdTmp2) 3321 .addReg(WdTmp2) 3322 .addReg(LaneTmp2, 0, SubRegIdx); 3323 3324 MI.eraseFromParent(); // The pseudo instruction is gone now. 3325 return BB; 3326 } 3327 3328 // Emit the FILL_FW pseudo instruction. 3329 // 3330 // fill_fw_pseudo $wd, $fs 3331 // => 3332 // implicit_def $wt1 3333 // insert_subreg $wt2:subreg_lo, $wt1, $fs 3334 // splati.w $wd, $wt2[0] 3335 MachineBasicBlock * 3336 MipsSETargetLowering::emitFILL_FW(MachineInstr &MI, 3337 MachineBasicBlock *BB) const { 3338 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3339 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3340 DebugLoc DL = MI.getDebugLoc(); 3341 unsigned Wd = MI.getOperand(0).getReg(); 3342 unsigned Fs = MI.getOperand(1).getReg(); 3343 unsigned Wt1 = RegInfo.createVirtualRegister( 3344 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3345 : &Mips::MSA128WEvensRegClass); 3346 unsigned Wt2 = RegInfo.createVirtualRegister( 3347 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3348 : &Mips::MSA128WEvensRegClass); 3349 3350 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3351 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3352 .addReg(Wt1) 3353 .addReg(Fs) 3354 .addImm(Mips::sub_lo); 3355 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0); 3356 3357 MI.eraseFromParent(); // The pseudo instruction is gone now. 3358 return BB; 3359 } 3360 3361 // Emit the FILL_FD pseudo instruction. 3362 // 3363 // fill_fd_pseudo $wd, $fs 3364 // => 3365 // implicit_def $wt1 3366 // insert_subreg $wt2:subreg_64, $wt1, $fs 3367 // splati.d $wd, $wt2[0] 3368 MachineBasicBlock * 3369 MipsSETargetLowering::emitFILL_FD(MachineInstr &MI, 3370 MachineBasicBlock *BB) const { 3371 assert(Subtarget.isFP64bit()); 3372 3373 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3374 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3375 DebugLoc DL = MI.getDebugLoc(); 3376 unsigned Wd = MI.getOperand(0).getReg(); 3377 unsigned Fs = MI.getOperand(1).getReg(); 3378 unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3379 unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3380 3381 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3382 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3383 .addReg(Wt1) 3384 .addReg(Fs) 3385 .addImm(Mips::sub_64); 3386 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0); 3387 3388 MI.eraseFromParent(); // The pseudo instruction is gone now. 3389 return BB; 3390 } 3391 3392 // Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA 3393 // register. 3394 // 3395 // STF16 MSA128F16:$wd, mem_simm10:$addr 3396 // => 3397 // copy_u.h $rtemp,$wd[0] 3398 // sh $rtemp, $addr 3399 // 3400 // Safety: We can't use st.h & co as they would over write the memory after 3401 // the destination. It would require half floats be allocated 16 bytes(!) of 3402 // space. 3403 MachineBasicBlock * 3404 MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, 3405 MachineBasicBlock *BB) const { 3406 3407 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3408 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3409 DebugLoc DL = MI.getDebugLoc(); 3410 unsigned Ws = MI.getOperand(0).getReg(); 3411 unsigned Rt = MI.getOperand(1).getReg(); 3412 const MachineMemOperand &MMO = **MI.memoperands_begin(); 3413 unsigned Imm = MMO.getOffset(); 3414 3415 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3416 // spill and reload can expand as a GPR64 operand. Examine the 3417 // operand in detail and default to ABI. 3418 const TargetRegisterClass *RC = 3419 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3420 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3421 : &Mips::GPR64RegClass); 3422 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3423 unsigned Rs = RegInfo.createVirtualRegister(RC); 3424 3425 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0); 3426 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64)) 3427 .addReg(Rs) 3428 .addReg(Rt) 3429 .addImm(Imm) 3430 .addMemOperand(BB->getParent()->getMachineMemOperand( 3431 &MMO, MMO.getOffset(), MMO.getSize())); 3432 3433 MI.eraseFromParent(); 3434 return BB; 3435 } 3436 3437 // Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register. 3438 // 3439 // LD_F16 MSA128F16:$wd, mem_simm10:$addr 3440 // => 3441 // lh $rtemp, $addr 3442 // fill.h $wd, $rtemp 3443 // 3444 // Safety: We can't use ld.h & co as they over-read from the source. 3445 // Additionally, if the address is not modulo 16, 2 cases can occur: 3446 // a) Segmentation fault as the load instruction reads from a memory page 3447 // memory it's not supposed to. 3448 // b) The load crosses an implementation specific boundary, requiring OS 3449 // intervention. 3450 // 3451 MachineBasicBlock * 3452 MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, 3453 MachineBasicBlock *BB) const { 3454 3455 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3456 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3457 DebugLoc DL = MI.getDebugLoc(); 3458 unsigned Wd = MI.getOperand(0).getReg(); 3459 3460 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3461 // spill and reload can expand as a GPR64 operand. Examine the 3462 // operand in detail and default to ABI. 3463 const TargetRegisterClass *RC = 3464 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3465 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3466 : &Mips::GPR64RegClass); 3467 3468 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3469 unsigned Rt = RegInfo.createVirtualRegister(RC); 3470 3471 MachineInstrBuilder MIB = 3472 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt); 3473 for (unsigned i = 1; i < MI.getNumOperands(); i++) 3474 MIB.add(MI.getOperand(i)); 3475 3476 BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt); 3477 3478 MI.eraseFromParent(); 3479 return BB; 3480 } 3481 3482 // Emit the FPROUND_PSEUDO instruction. 3483 // 3484 // Round an FGR64Opnd, FGR32Opnd to an f16. 3485 // 3486 // Safety: Cycle the operand through the GPRs so the result always ends up 3487 // the correct MSA register. 3488 // 3489 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs 3490 // / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register 3491 // (which they can be, as the MSA registers are defined to alias the 3492 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3493 // the correct register class. That requires operands be tie-able across 3494 // register classes which have a sub/super register class relationship. 3495 // 3496 // For FPG32Opnd: 3497 // 3498 // FPROUND MSA128F16:$wd, FGR32Opnd:$fs 3499 // => 3500 // mfc1 $rtemp, $fs 3501 // fill.w $rtemp, $wtemp 3502 // fexdo.w $wd, $wtemp, $wtemp 3503 // 3504 // For FPG64Opnd on mips32r2+: 3505 // 3506 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3507 // => 3508 // mfc1 $rtemp, $fs 3509 // fill.w $rtemp, $wtemp 3510 // mfhc1 $rtemp2, $fs 3511 // insert.w $wtemp[1], $rtemp2 3512 // insert.w $wtemp[3], $rtemp2 3513 // fexdo.w $wtemp2, $wtemp, $wtemp 3514 // fexdo.h $wd, $temp2, $temp2 3515 // 3516 // For FGR64Opnd on mips64r2+: 3517 // 3518 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3519 // => 3520 // dmfc1 $rtemp, $fs 3521 // fill.d $rtemp, $wtemp 3522 // fexdo.w $wtemp2, $wtemp, $wtemp 3523 // fexdo.h $wd, $wtemp2, $wtemp2 3524 // 3525 // Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the 3526 // undef bits are "just right" and the exception enable bits are 3527 // set. By using fill.w to replicate $fs into all elements over 3528 // insert.w for one element, we avoid that potiential case. If 3529 // fexdo.[hw] causes an exception in, the exception is valid and it 3530 // occurs for all elements. 3531 // 3532 MachineBasicBlock * 3533 MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, 3534 MachineBasicBlock *BB, 3535 bool IsFGR64) const { 3536 3537 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3538 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3539 // it. 3540 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3541 3542 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3543 3544 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3545 DebugLoc DL = MI.getDebugLoc(); 3546 unsigned Wd = MI.getOperand(0).getReg(); 3547 unsigned Fs = MI.getOperand(1).getReg(); 3548 3549 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3550 unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3551 const TargetRegisterClass *GPRRC = 3552 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3553 unsigned MFC1Opc = IsFGR64onMips64 ? Mips::DMFC1 : Mips::MFC1; 3554 unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; 3555 3556 // Perform the register class copy as mentioned above. 3557 unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC); 3558 BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs); 3559 BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp); 3560 unsigned WPHI = Wtemp; 3561 3562 if (!Subtarget.hasMips64() && IsFGR64) { 3563 unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3564 BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs); 3565 unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3566 unsigned Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3567 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2) 3568 .addReg(Wtemp) 3569 .addReg(Rtemp2) 3570 .addImm(1); 3571 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3) 3572 .addReg(Wtemp2) 3573 .addReg(Rtemp2) 3574 .addImm(3); 3575 WPHI = Wtemp3; 3576 } 3577 3578 if (IsFGR64) { 3579 unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3580 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2) 3581 .addReg(WPHI) 3582 .addReg(WPHI); 3583 WPHI = Wtemp2; 3584 } 3585 3586 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI); 3587 3588 MI.eraseFromParent(); 3589 return BB; 3590 } 3591 3592 // Emit the FPEXTEND_PSEUDO instruction. 3593 // 3594 // Expand an f16 to either a FGR32Opnd or FGR64Opnd. 3595 // 3596 // Safety: Cycle the result through the GPRs so the result always ends up 3597 // the correct floating point register. 3598 // 3599 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd 3600 // / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register 3601 // (which they can be, as the MSA registers are defined to alias the 3602 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3603 // the correct register class. That requires operands be tie-able across 3604 // register classes which have a sub/super register class relationship. I 3605 // haven't checked. 3606 // 3607 // For FGR32Opnd: 3608 // 3609 // FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws 3610 // => 3611 // fexupr.w $wtemp, $ws 3612 // copy_s.w $rtemp, $ws[0] 3613 // mtc1 $rtemp, $fd 3614 // 3615 // For FGR64Opnd on Mips64: 3616 // 3617 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3618 // => 3619 // fexupr.w $wtemp, $ws 3620 // fexupr.d $wtemp2, $wtemp 3621 // copy_s.d $rtemp, $wtemp2s[0] 3622 // dmtc1 $rtemp, $fd 3623 // 3624 // For FGR64Opnd on Mips32: 3625 // 3626 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3627 // => 3628 // fexupr.w $wtemp, $ws 3629 // fexupr.d $wtemp2, $wtemp 3630 // copy_s.w $rtemp, $wtemp2[0] 3631 // mtc1 $rtemp, $ftemp 3632 // copy_s.w $rtemp2, $wtemp2[1] 3633 // $fd = mthc1 $rtemp2, $ftemp 3634 // 3635 MachineBasicBlock * 3636 MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI, 3637 MachineBasicBlock *BB, 3638 bool IsFGR64) const { 3639 3640 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3641 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3642 // it. 3643 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3644 3645 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3646 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; 3647 3648 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3649 DebugLoc DL = MI.getDebugLoc(); 3650 unsigned Fd = MI.getOperand(0).getReg(); 3651 unsigned Ws = MI.getOperand(1).getReg(); 3652 3653 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3654 const TargetRegisterClass *GPRRC = 3655 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3656 unsigned MTC1Opc = IsFGR64onMips64 ? Mips::DMTC1 : Mips::MTC1; 3657 unsigned COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W; 3658 3659 unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3660 unsigned WPHI = Wtemp; 3661 3662 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws); 3663 if (IsFGR64) { 3664 WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3665 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp); 3666 } 3667 3668 // Perform the safety regclass copy mentioned above. 3669 unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC); 3670 unsigned FPRPHI = IsFGR64onMips32 3671 ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass) 3672 : Fd; 3673 BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0); 3674 BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp); 3675 3676 if (IsFGR64onMips32) { 3677 unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3678 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2) 3679 .addReg(WPHI) 3680 .addImm(1); 3681 BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd) 3682 .addReg(FPRPHI) 3683 .addReg(Rtemp2); 3684 } 3685 3686 MI.eraseFromParent(); 3687 return BB; 3688 } 3689 3690 // Emit the FEXP2_W_1 pseudo instructions. 3691 // 3692 // fexp2_w_1_pseudo $wd, $wt 3693 // => 3694 // ldi.w $ws, 1 3695 // fexp2.w $wd, $ws, $wt 3696 MachineBasicBlock * 3697 MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI, 3698 MachineBasicBlock *BB) const { 3699 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3700 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3701 const TargetRegisterClass *RC = &Mips::MSA128WRegClass; 3702 unsigned Ws1 = RegInfo.createVirtualRegister(RC); 3703 unsigned Ws2 = RegInfo.createVirtualRegister(RC); 3704 DebugLoc DL = MI.getDebugLoc(); 3705 3706 // Splat 1.0 into a vector 3707 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1); 3708 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1); 3709 3710 // Emit 1.0 * fexp2(Wt) 3711 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI.getOperand(0).getReg()) 3712 .addReg(Ws2) 3713 .addReg(MI.getOperand(1).getReg()); 3714 3715 MI.eraseFromParent(); // The pseudo instruction is gone now. 3716 return BB; 3717 } 3718 3719 // Emit the FEXP2_D_1 pseudo instructions. 3720 // 3721 // fexp2_d_1_pseudo $wd, $wt 3722 // => 3723 // ldi.d $ws, 1 3724 // fexp2.d $wd, $ws, $wt 3725 MachineBasicBlock * 3726 MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI, 3727 MachineBasicBlock *BB) const { 3728 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3729 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3730 const TargetRegisterClass *RC = &Mips::MSA128DRegClass; 3731 unsigned Ws1 = RegInfo.createVirtualRegister(RC); 3732 unsigned Ws2 = RegInfo.createVirtualRegister(RC); 3733 DebugLoc DL = MI.getDebugLoc(); 3734 3735 // Splat 1.0 into a vector 3736 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1); 3737 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1); 3738 3739 // Emit 1.0 * fexp2(Wt) 3740 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI.getOperand(0).getReg()) 3741 .addReg(Ws2) 3742 .addReg(MI.getOperand(1).getReg()); 3743 3744 MI.eraseFromParent(); // The pseudo instruction is gone now. 3745 return BB; 3746 } 3747