1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the PPCISelLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PPCISelLowering.h" 14 #include "MCTargetDesc/PPCPredicates.h" 15 #include "PPC.h" 16 #include "PPCCCState.h" 17 #include "PPCCallingConv.h" 18 #include "PPCFrameLowering.h" 19 #include "PPCInstrInfo.h" 20 #include "PPCMachineFunctionInfo.h" 21 #include "PPCPerfectShuffle.h" 22 #include "PPCRegisterInfo.h" 23 #include "PPCSubtarget.h" 24 #include "PPCTargetMachine.h" 25 #include "llvm/ADT/APFloat.h" 26 #include "llvm/ADT/APInt.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/DenseMap.h" 29 #include "llvm/ADT/None.h" 30 #include "llvm/ADT/STLExtras.h" 31 #include "llvm/ADT/SmallPtrSet.h" 32 #include "llvm/ADT/SmallSet.h" 33 #include "llvm/ADT/SmallVector.h" 34 #include "llvm/ADT/Statistic.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/ADT/StringSwitch.h" 37 #include "llvm/CodeGen/CallingConvLower.h" 38 #include "llvm/CodeGen/ISDOpcodes.h" 39 #include "llvm/CodeGen/MachineBasicBlock.h" 40 #include "llvm/CodeGen/MachineFrameInfo.h" 41 #include "llvm/CodeGen/MachineFunction.h" 42 #include "llvm/CodeGen/MachineInstr.h" 43 #include "llvm/CodeGen/MachineInstrBuilder.h" 44 #include "llvm/CodeGen/MachineJumpTableInfo.h" 45 #include "llvm/CodeGen/MachineLoopInfo.h" 46 #include "llvm/CodeGen/MachineMemOperand.h" 47 #include "llvm/CodeGen/MachineModuleInfo.h" 48 #include "llvm/CodeGen/MachineOperand.h" 49 #include "llvm/CodeGen/MachineRegisterInfo.h" 50 #include "llvm/CodeGen/RuntimeLibcalls.h" 51 #include "llvm/CodeGen/SelectionDAG.h" 52 #include "llvm/CodeGen/SelectionDAGNodes.h" 53 #include "llvm/CodeGen/TargetInstrInfo.h" 54 #include "llvm/CodeGen/TargetLowering.h" 55 #include "llvm/CodeGen/TargetRegisterInfo.h" 56 #include "llvm/CodeGen/ValueTypes.h" 57 #include "llvm/IR/CallSite.h" 58 #include "llvm/IR/CallingConv.h" 59 #include "llvm/IR/Constant.h" 60 #include "llvm/IR/Constants.h" 61 #include "llvm/IR/DataLayout.h" 62 #include "llvm/IR/DebugLoc.h" 63 #include "llvm/IR/DerivedTypes.h" 64 #include "llvm/IR/Function.h" 65 #include "llvm/IR/GlobalValue.h" 66 #include "llvm/IR/IRBuilder.h" 67 #include "llvm/IR/Instructions.h" 68 #include "llvm/IR/Intrinsics.h" 69 #include "llvm/IR/Module.h" 70 #include "llvm/IR/Type.h" 71 #include "llvm/IR/Use.h" 72 #include "llvm/IR/Value.h" 73 #include "llvm/MC/MCContext.h" 74 #include "llvm/MC/MCExpr.h" 75 #include "llvm/MC/MCRegisterInfo.h" 76 #include "llvm/MC/MCSymbolXCOFF.h" 77 #include "llvm/Support/AtomicOrdering.h" 78 #include "llvm/Support/BranchProbability.h" 79 #include "llvm/Support/Casting.h" 80 #include "llvm/Support/CodeGen.h" 81 #include "llvm/Support/CommandLine.h" 82 #include "llvm/Support/Compiler.h" 83 #include "llvm/Support/Debug.h" 84 #include "llvm/Support/ErrorHandling.h" 85 #include "llvm/Support/Format.h" 86 #include "llvm/Support/KnownBits.h" 87 #include "llvm/Support/MachineValueType.h" 88 #include "llvm/Support/MathExtras.h" 89 #include "llvm/Support/raw_ostream.h" 90 #include "llvm/Target/TargetMachine.h" 91 #include "llvm/Target/TargetOptions.h" 92 #include <algorithm> 93 #include <cassert> 94 #include <cstdint> 95 #include <iterator> 96 #include <list> 97 #include <utility> 98 #include <vector> 99 100 using namespace llvm; 101 102 #define DEBUG_TYPE "ppc-lowering" 103 104 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", 105 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); 106 107 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", 108 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); 109 110 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", 111 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); 112 113 static cl::opt<bool> DisableSCO("disable-ppc-sco", 114 cl::desc("disable sibling call optimization on ppc"), cl::Hidden); 115 116 static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision", 117 cl::desc("enable quad precision float support on ppc"), cl::Hidden); 118 119 STATISTIC(NumTailCalls, "Number of tail calls"); 120 STATISTIC(NumSiblingCalls, "Number of sibling calls"); 121 122 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int); 123 124 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl); 125 126 // FIXME: Remove this once the bug has been fixed! 127 extern cl::opt<bool> ANDIGlueBug; 128 129 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, 130 const PPCSubtarget &STI) 131 : TargetLowering(TM), Subtarget(STI) { 132 // Use _setjmp/_longjmp instead of setjmp/longjmp. 133 setUseUnderscoreSetJmp(true); 134 setUseUnderscoreLongJmp(true); 135 136 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 137 // arguments are at least 4/8 bytes aligned. 138 bool isPPC64 = Subtarget.isPPC64(); 139 setMinStackArgumentAlignment(isPPC64 ? 8:4); 140 141 // Set up the register classes. 142 addRegisterClass(MVT::i32, &PPC::GPRCRegClass); 143 if (!useSoftFloat()) { 144 if (hasSPE()) { 145 addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass); 146 addRegisterClass(MVT::f64, &PPC::SPERCRegClass); 147 } else { 148 addRegisterClass(MVT::f32, &PPC::F4RCRegClass); 149 addRegisterClass(MVT::f64, &PPC::F8RCRegClass); 150 } 151 } 152 153 // Match BITREVERSE to customized fast code sequence in the td file. 154 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); 155 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); 156 157 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended. 158 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); 159 160 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. 161 for (MVT VT : MVT::integer_valuetypes()) { 162 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 163 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); 164 } 165 166 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 167 168 // PowerPC has pre-inc load and store's. 169 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 170 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 171 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 172 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 173 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 174 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 175 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 176 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 177 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 178 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 179 if (!Subtarget.hasSPE()) { 180 setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); 181 setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal); 182 setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal); 183 setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); 184 } 185 186 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry. 187 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; 188 for (MVT VT : ScalarIntVTs) { 189 setOperationAction(ISD::ADDC, VT, Legal); 190 setOperationAction(ISD::ADDE, VT, Legal); 191 setOperationAction(ISD::SUBC, VT, Legal); 192 setOperationAction(ISD::SUBE, VT, Legal); 193 } 194 195 if (Subtarget.useCRBits()) { 196 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 197 198 if (isPPC64 || Subtarget.hasFPCVT()) { 199 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); 200 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, 201 isPPC64 ? MVT::i64 : MVT::i32); 202 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); 203 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, 204 isPPC64 ? MVT::i64 : MVT::i32); 205 } else { 206 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); 207 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); 208 } 209 210 // PowerPC does not support direct load/store of condition registers. 211 setOperationAction(ISD::LOAD, MVT::i1, Custom); 212 setOperationAction(ISD::STORE, MVT::i1, Custom); 213 214 // FIXME: Remove this once the ANDI glue bug is fixed: 215 if (ANDIGlueBug) 216 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); 217 218 for (MVT VT : MVT::integer_valuetypes()) { 219 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 220 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 221 setTruncStoreAction(VT, MVT::i1, Expand); 222 } 223 224 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); 225 } 226 227 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on 228 // PPC (the libcall is not available). 229 setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom); 230 setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom); 231 232 // We do not currently implement these libm ops for PowerPC. 233 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 234 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 235 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 236 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 237 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 238 setOperationAction(ISD::FREM, MVT::ppcf128, Expand); 239 240 // PowerPC has no SREM/UREM instructions unless we are on P9 241 // On P9 we may use a hardware instruction to compute the remainder. 242 // The instructions are not legalized directly because in the cases where the 243 // result of both the remainder and the division is required it is more 244 // efficient to compute the remainder from the result of the division rather 245 // than use the remainder instruction. 246 if (Subtarget.isISA3_0()) { 247 setOperationAction(ISD::SREM, MVT::i32, Custom); 248 setOperationAction(ISD::UREM, MVT::i32, Custom); 249 setOperationAction(ISD::SREM, MVT::i64, Custom); 250 setOperationAction(ISD::UREM, MVT::i64, Custom); 251 } else { 252 setOperationAction(ISD::SREM, MVT::i32, Expand); 253 setOperationAction(ISD::UREM, MVT::i32, Expand); 254 setOperationAction(ISD::SREM, MVT::i64, Expand); 255 setOperationAction(ISD::UREM, MVT::i64, Expand); 256 } 257 258 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 259 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 260 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 261 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 262 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 263 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 264 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 265 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 266 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 267 268 // We don't support sin/cos/sqrt/fmod/pow 269 setOperationAction(ISD::FSIN , MVT::f64, Expand); 270 setOperationAction(ISD::FCOS , MVT::f64, Expand); 271 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 272 setOperationAction(ISD::FREM , MVT::f64, Expand); 273 setOperationAction(ISD::FPOW , MVT::f64, Expand); 274 setOperationAction(ISD::FSIN , MVT::f32, Expand); 275 setOperationAction(ISD::FCOS , MVT::f32, Expand); 276 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 277 setOperationAction(ISD::FREM , MVT::f32, Expand); 278 setOperationAction(ISD::FPOW , MVT::f32, Expand); 279 if (Subtarget.hasSPE()) { 280 setOperationAction(ISD::FMA , MVT::f64, Expand); 281 setOperationAction(ISD::FMA , MVT::f32, Expand); 282 } else { 283 setOperationAction(ISD::FMA , MVT::f64, Legal); 284 setOperationAction(ISD::FMA , MVT::f32, Legal); 285 } 286 287 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 288 289 // If we're enabling GP optimizations, use hardware square root 290 if (!Subtarget.hasFSQRT() && 291 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && 292 Subtarget.hasFRE())) 293 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 294 295 if (!Subtarget.hasFSQRT() && 296 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() && 297 Subtarget.hasFRES())) 298 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 299 300 if (Subtarget.hasFCPSGN()) { 301 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); 302 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); 303 } else { 304 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 305 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 306 } 307 308 if (Subtarget.hasFPRND()) { 309 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 310 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 311 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 312 setOperationAction(ISD::FROUND, MVT::f64, Legal); 313 314 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 315 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 316 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 317 setOperationAction(ISD::FROUND, MVT::f32, Legal); 318 } 319 320 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd 321 // to speed up scalar BSWAP64. 322 // CTPOP or CTTZ were introduced in P8/P9 respectively 323 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 324 if (Subtarget.hasP9Vector()) 325 setOperationAction(ISD::BSWAP, MVT::i64 , Custom); 326 else 327 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 328 if (Subtarget.isISA3_0()) { 329 setOperationAction(ISD::CTTZ , MVT::i32 , Legal); 330 setOperationAction(ISD::CTTZ , MVT::i64 , Legal); 331 } else { 332 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 333 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 334 } 335 336 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) { 337 setOperationAction(ISD::CTPOP, MVT::i32 , Legal); 338 setOperationAction(ISD::CTPOP, MVT::i64 , Legal); 339 } else { 340 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 341 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 342 } 343 344 // PowerPC does not have ROTR 345 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 346 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 347 348 if (!Subtarget.useCRBits()) { 349 // PowerPC does not have Select 350 setOperationAction(ISD::SELECT, MVT::i32, Expand); 351 setOperationAction(ISD::SELECT, MVT::i64, Expand); 352 setOperationAction(ISD::SELECT, MVT::f32, Expand); 353 setOperationAction(ISD::SELECT, MVT::f64, Expand); 354 } 355 356 // PowerPC wants to turn select_cc of FP into fsel when possible. 357 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 358 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 359 360 // PowerPC wants to optimize integer setcc a bit 361 if (!Subtarget.useCRBits()) 362 setOperationAction(ISD::SETCC, MVT::i32, Custom); 363 364 // PowerPC does not have BRCOND which requires SetCC 365 if (!Subtarget.useCRBits()) 366 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 367 368 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 369 370 if (Subtarget.hasSPE()) { 371 // SPE has built-in conversions 372 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); 373 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); 374 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); 375 } else { 376 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 377 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 378 379 // PowerPC does not have [U|S]INT_TO_FP 380 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 381 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 382 } 383 384 if (Subtarget.hasDirectMove() && isPPC64) { 385 setOperationAction(ISD::BITCAST, MVT::f32, Legal); 386 setOperationAction(ISD::BITCAST, MVT::i32, Legal); 387 setOperationAction(ISD::BITCAST, MVT::i64, Legal); 388 setOperationAction(ISD::BITCAST, MVT::f64, Legal); 389 } else { 390 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 391 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 392 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 393 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 394 } 395 396 // We cannot sextinreg(i1). Expand to shifts. 397 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 398 399 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 400 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 401 // support continuation, user-level threading, and etc.. As a result, no 402 // other SjLj exception interfaces are implemented and please don't build 403 // your own exception handling based on them. 404 // LLVM/Clang supports zero-cost DWARF exception handling. 405 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 406 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 407 408 // We want to legalize GlobalAddress and ConstantPool nodes into the 409 // appropriate instructions to materialize the address. 410 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 411 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 412 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 413 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 414 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 415 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 416 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 417 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 418 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 419 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 420 421 // TRAP is legal. 422 setOperationAction(ISD::TRAP, MVT::Other, Legal); 423 424 // TRAMPOLINE is custom lowered. 425 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 426 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 427 428 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 429 setOperationAction(ISD::VASTART , MVT::Other, Custom); 430 431 if (Subtarget.isSVR4ABI()) { 432 if (isPPC64) { 433 // VAARG always uses double-word chunks, so promote anything smaller. 434 setOperationAction(ISD::VAARG, MVT::i1, Promote); 435 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 436 setOperationAction(ISD::VAARG, MVT::i8, Promote); 437 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 438 setOperationAction(ISD::VAARG, MVT::i16, Promote); 439 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 440 setOperationAction(ISD::VAARG, MVT::i32, Promote); 441 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 442 setOperationAction(ISD::VAARG, MVT::Other, Expand); 443 } else { 444 // VAARG is custom lowered with the 32-bit SVR4 ABI. 445 setOperationAction(ISD::VAARG, MVT::Other, Custom); 446 setOperationAction(ISD::VAARG, MVT::i64, Custom); 447 } 448 } else 449 setOperationAction(ISD::VAARG, MVT::Other, Expand); 450 451 if (Subtarget.isSVR4ABI() && !isPPC64) 452 // VACOPY is custom lowered with the 32-bit SVR4 ABI. 453 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 454 else 455 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 456 457 // Use the default implementation. 458 setOperationAction(ISD::VAEND , MVT::Other, Expand); 459 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 460 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 461 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 462 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 463 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom); 464 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom); 465 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); 466 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); 467 468 // We want to custom lower some of our intrinsics. 469 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 470 471 // To handle counter-based loop conditions. 472 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); 473 474 setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); 475 setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); 476 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); 477 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 478 479 // Comparisons that require checking two conditions. 480 if (Subtarget.hasSPE()) { 481 setCondCodeAction(ISD::SETO, MVT::f32, Expand); 482 setCondCodeAction(ISD::SETO, MVT::f64, Expand); 483 setCondCodeAction(ISD::SETUO, MVT::f32, Expand); 484 setCondCodeAction(ISD::SETUO, MVT::f64, Expand); 485 } 486 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 487 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 488 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 489 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 490 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 491 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 492 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 493 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 494 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 495 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 496 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 497 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 498 499 if (Subtarget.has64BitSupport()) { 500 // They also have instructions for converting between i64 and fp. 501 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 502 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 503 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 504 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 505 // This is just the low 32 bits of a (signed) fp->i64 conversion. 506 // We cannot do this with Promote because i64 is not a legal type. 507 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 508 509 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) 510 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 511 } else { 512 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 513 if (Subtarget.hasSPE()) 514 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); 515 else 516 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 517 } 518 519 // With the instructions enabled under FPCVT, we can do everything. 520 if (Subtarget.hasFPCVT()) { 521 if (Subtarget.has64BitSupport()) { 522 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 523 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 524 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 525 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 526 } 527 528 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 529 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 530 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 531 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 532 } 533 534 if (Subtarget.use64BitRegs()) { 535 // 64-bit PowerPC implementations can support i64 types directly 536 addRegisterClass(MVT::i64, &PPC::G8RCRegClass); 537 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 538 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 539 // 64-bit PowerPC wants to expand i128 shifts itself. 540 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 541 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 542 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 543 } else { 544 // 32-bit PowerPC wants to expand i64 shifts itself. 545 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 546 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 547 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 548 } 549 550 if (Subtarget.hasAltivec()) { 551 // First set operation action for all vector types to expand. Then we 552 // will selectively turn on ones that can be effectively codegen'd. 553 for (MVT VT : MVT::vector_valuetypes()) { 554 // add/sub are legal for all supported vector VT's. 555 setOperationAction(ISD::ADD, VT, Legal); 556 setOperationAction(ISD::SUB, VT, Legal); 557 558 // For v2i64, these are only valid with P8Vector. This is corrected after 559 // the loop. 560 setOperationAction(ISD::SMAX, VT, Legal); 561 setOperationAction(ISD::SMIN, VT, Legal); 562 setOperationAction(ISD::UMAX, VT, Legal); 563 setOperationAction(ISD::UMIN, VT, Legal); 564 565 if (Subtarget.hasVSX()) { 566 setOperationAction(ISD::FMAXNUM, VT, Legal); 567 setOperationAction(ISD::FMINNUM, VT, Legal); 568 } 569 570 // Vector instructions introduced in P8 571 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { 572 setOperationAction(ISD::CTPOP, VT, Legal); 573 setOperationAction(ISD::CTLZ, VT, Legal); 574 } 575 else { 576 setOperationAction(ISD::CTPOP, VT, Expand); 577 setOperationAction(ISD::CTLZ, VT, Expand); 578 } 579 580 // Vector instructions introduced in P9 581 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128)) 582 setOperationAction(ISD::CTTZ, VT, Legal); 583 else 584 setOperationAction(ISD::CTTZ, VT, Expand); 585 586 // We promote all shuffles to v16i8. 587 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 588 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 589 590 // We promote all non-typed operations to v4i32. 591 setOperationAction(ISD::AND , VT, Promote); 592 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 593 setOperationAction(ISD::OR , VT, Promote); 594 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 595 setOperationAction(ISD::XOR , VT, Promote); 596 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 597 setOperationAction(ISD::LOAD , VT, Promote); 598 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 599 setOperationAction(ISD::SELECT, VT, Promote); 600 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 601 setOperationAction(ISD::VSELECT, VT, Legal); 602 setOperationAction(ISD::SELECT_CC, VT, Promote); 603 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); 604 setOperationAction(ISD::STORE, VT, Promote); 605 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 606 607 // No other operations are legal. 608 setOperationAction(ISD::MUL , VT, Expand); 609 setOperationAction(ISD::SDIV, VT, Expand); 610 setOperationAction(ISD::SREM, VT, Expand); 611 setOperationAction(ISD::UDIV, VT, Expand); 612 setOperationAction(ISD::UREM, VT, Expand); 613 setOperationAction(ISD::FDIV, VT, Expand); 614 setOperationAction(ISD::FREM, VT, Expand); 615 setOperationAction(ISD::FNEG, VT, Expand); 616 setOperationAction(ISD::FSQRT, VT, Expand); 617 setOperationAction(ISD::FLOG, VT, Expand); 618 setOperationAction(ISD::FLOG10, VT, Expand); 619 setOperationAction(ISD::FLOG2, VT, Expand); 620 setOperationAction(ISD::FEXP, VT, Expand); 621 setOperationAction(ISD::FEXP2, VT, Expand); 622 setOperationAction(ISD::FSIN, VT, Expand); 623 setOperationAction(ISD::FCOS, VT, Expand); 624 setOperationAction(ISD::FABS, VT, Expand); 625 setOperationAction(ISD::FFLOOR, VT, Expand); 626 setOperationAction(ISD::FCEIL, VT, Expand); 627 setOperationAction(ISD::FTRUNC, VT, Expand); 628 setOperationAction(ISD::FRINT, VT, Expand); 629 setOperationAction(ISD::FNEARBYINT, VT, Expand); 630 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 631 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 632 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 633 setOperationAction(ISD::MULHU, VT, Expand); 634 setOperationAction(ISD::MULHS, VT, Expand); 635 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 636 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 637 setOperationAction(ISD::UDIVREM, VT, Expand); 638 setOperationAction(ISD::SDIVREM, VT, Expand); 639 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 640 setOperationAction(ISD::FPOW, VT, Expand); 641 setOperationAction(ISD::BSWAP, VT, Expand); 642 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 643 setOperationAction(ISD::ROTL, VT, Expand); 644 setOperationAction(ISD::ROTR, VT, Expand); 645 646 for (MVT InnerVT : MVT::vector_valuetypes()) { 647 setTruncStoreAction(VT, InnerVT, Expand); 648 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 649 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 650 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 651 } 652 } 653 if (!Subtarget.hasP8Vector()) { 654 setOperationAction(ISD::SMAX, MVT::v2i64, Expand); 655 setOperationAction(ISD::SMIN, MVT::v2i64, Expand); 656 setOperationAction(ISD::UMAX, MVT::v2i64, Expand); 657 setOperationAction(ISD::UMIN, MVT::v2i64, Expand); 658 } 659 660 for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}) 661 setOperationAction(ISD::ABS, VT, Custom); 662 663 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 664 // with merges, splats, etc. 665 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 666 667 // Vector truncates to sub-word integer that fit in an Altivec/VSX register 668 // are cheap, so handle them before they get expanded to scalar. 669 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); 670 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); 671 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); 672 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); 673 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); 674 675 setOperationAction(ISD::AND , MVT::v4i32, Legal); 676 setOperationAction(ISD::OR , MVT::v4i32, Legal); 677 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 678 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 679 setOperationAction(ISD::SELECT, MVT::v4i32, 680 Subtarget.useCRBits() ? Legal : Expand); 681 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 682 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 683 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 684 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 685 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 686 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 687 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 688 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 689 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 690 691 // Without hasP8Altivec set, v2i64 SMAX isn't available. 692 // But ABS custom lowering requires SMAX support. 693 if (!Subtarget.hasP8Altivec()) 694 setOperationAction(ISD::ABS, MVT::v2i64, Expand); 695 696 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); 697 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); 698 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); 699 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); 700 701 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 702 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 703 704 if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { 705 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 706 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 707 } 708 709 if (Subtarget.hasP8Altivec()) 710 setOperationAction(ISD::MUL, MVT::v4i32, Legal); 711 else 712 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 713 714 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 715 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 716 717 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 718 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 719 720 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 721 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 722 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 723 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 724 725 // Altivec does not contain unordered floating-point compare instructions 726 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); 727 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); 728 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); 729 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); 730 731 if (Subtarget.hasVSX()) { 732 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 733 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 734 if (Subtarget.hasP8Vector()) { 735 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 736 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); 737 } 738 if (Subtarget.hasDirectMove() && isPPC64) { 739 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); 740 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); 741 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); 742 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); 743 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal); 744 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal); 745 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); 746 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); 747 } 748 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 749 750 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 751 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 752 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 753 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 754 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 755 756 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 757 758 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 759 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 760 761 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 762 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 763 764 // Share the Altivec comparison restrictions. 765 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); 766 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); 767 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); 768 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); 769 770 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 771 setOperationAction(ISD::STORE, MVT::v2f64, Legal); 772 773 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); 774 775 if (Subtarget.hasP8Vector()) 776 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass); 777 778 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); 779 780 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass); 781 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); 782 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); 783 784 if (Subtarget.hasP8Altivec()) { 785 setOperationAction(ISD::SHL, MVT::v2i64, Legal); 786 setOperationAction(ISD::SRA, MVT::v2i64, Legal); 787 setOperationAction(ISD::SRL, MVT::v2i64, Legal); 788 789 // 128 bit shifts can be accomplished via 3 instructions for SHL and 790 // SRL, but not for SRA because of the instructions available: 791 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth 792 // doing 793 setOperationAction(ISD::SHL, MVT::v1i128, Expand); 794 setOperationAction(ISD::SRL, MVT::v1i128, Expand); 795 setOperationAction(ISD::SRA, MVT::v1i128, Expand); 796 797 setOperationAction(ISD::SETCC, MVT::v2i64, Legal); 798 } 799 else { 800 setOperationAction(ISD::SHL, MVT::v2i64, Expand); 801 setOperationAction(ISD::SRA, MVT::v2i64, Expand); 802 setOperationAction(ISD::SRL, MVT::v2i64, Expand); 803 804 setOperationAction(ISD::SETCC, MVT::v2i64, Custom); 805 806 // VSX v2i64 only supports non-arithmetic operations. 807 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 808 setOperationAction(ISD::SUB, MVT::v2i64, Expand); 809 } 810 811 setOperationAction(ISD::LOAD, MVT::v2i64, Promote); 812 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); 813 setOperationAction(ISD::STORE, MVT::v2i64, Promote); 814 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); 815 816 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); 817 818 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 819 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 820 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 821 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 822 823 // Custom handling for partial vectors of integers converted to 824 // floating point. We already have optimal handling for v2i32 through 825 // the DAG combine, so those aren't necessary. 826 setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom); 827 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom); 828 setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom); 829 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 830 setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom); 831 setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom); 832 setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom); 833 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); 834 835 setOperationAction(ISD::FNEG, MVT::v4f32, Legal); 836 setOperationAction(ISD::FNEG, MVT::v2f64, Legal); 837 setOperationAction(ISD::FABS, MVT::v4f32, Legal); 838 setOperationAction(ISD::FABS, MVT::v2f64, Legal); 839 840 if (Subtarget.hasDirectMove()) 841 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 842 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 843 844 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); 845 } 846 847 if (Subtarget.hasP8Altivec()) { 848 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); 849 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); 850 } 851 852 if (Subtarget.hasP9Vector()) { 853 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 854 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 855 856 // 128 bit shifts can be accomplished via 3 instructions for SHL and 857 // SRL, but not for SRA because of the instructions available: 858 // VS{RL} and VS{RL}O. 859 setOperationAction(ISD::SHL, MVT::v1i128, Legal); 860 setOperationAction(ISD::SRL, MVT::v1i128, Legal); 861 setOperationAction(ISD::SRA, MVT::v1i128, Expand); 862 863 if (EnableQuadPrecision) { 864 addRegisterClass(MVT::f128, &PPC::VRRCRegClass); 865 setOperationAction(ISD::FADD, MVT::f128, Legal); 866 setOperationAction(ISD::FSUB, MVT::f128, Legal); 867 setOperationAction(ISD::FDIV, MVT::f128, Legal); 868 setOperationAction(ISD::FMUL, MVT::f128, Legal); 869 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal); 870 // No extending loads to f128 on PPC. 871 for (MVT FPT : MVT::fp_valuetypes()) 872 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand); 873 setOperationAction(ISD::FMA, MVT::f128, Legal); 874 setCondCodeAction(ISD::SETULT, MVT::f128, Expand); 875 setCondCodeAction(ISD::SETUGT, MVT::f128, Expand); 876 setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand); 877 setCondCodeAction(ISD::SETOGE, MVT::f128, Expand); 878 setCondCodeAction(ISD::SETOLE, MVT::f128, Expand); 879 setCondCodeAction(ISD::SETONE, MVT::f128, Expand); 880 881 setOperationAction(ISD::FTRUNC, MVT::f128, Legal); 882 setOperationAction(ISD::FRINT, MVT::f128, Legal); 883 setOperationAction(ISD::FFLOOR, MVT::f128, Legal); 884 setOperationAction(ISD::FCEIL, MVT::f128, Legal); 885 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal); 886 setOperationAction(ISD::FROUND, MVT::f128, Legal); 887 888 setOperationAction(ISD::SELECT, MVT::f128, Expand); 889 setOperationAction(ISD::FP_ROUND, MVT::f64, Legal); 890 setOperationAction(ISD::FP_ROUND, MVT::f32, Legal); 891 setTruncStoreAction(MVT::f128, MVT::f64, Expand); 892 setTruncStoreAction(MVT::f128, MVT::f32, Expand); 893 setOperationAction(ISD::BITCAST, MVT::i128, Custom); 894 // No implementation for these ops for PowerPC. 895 setOperationAction(ISD::FSIN , MVT::f128, Expand); 896 setOperationAction(ISD::FCOS , MVT::f128, Expand); 897 setOperationAction(ISD::FPOW, MVT::f128, Expand); 898 setOperationAction(ISD::FPOWI, MVT::f128, Expand); 899 setOperationAction(ISD::FREM, MVT::f128, Expand); 900 } 901 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); 902 903 } 904 905 if (Subtarget.hasP9Altivec()) { 906 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 907 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); 908 } 909 } 910 911 if (Subtarget.hasQPX()) { 912 setOperationAction(ISD::FADD, MVT::v4f64, Legal); 913 setOperationAction(ISD::FSUB, MVT::v4f64, Legal); 914 setOperationAction(ISD::FMUL, MVT::v4f64, Legal); 915 setOperationAction(ISD::FREM, MVT::v4f64, Expand); 916 917 setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); 918 setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand); 919 920 setOperationAction(ISD::LOAD , MVT::v4f64, Custom); 921 setOperationAction(ISD::STORE , MVT::v4f64, Custom); 922 923 setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom); 924 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom); 925 926 if (!Subtarget.useCRBits()) 927 setOperationAction(ISD::SELECT, MVT::v4f64, Expand); 928 setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); 929 930 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal); 931 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand); 932 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand); 933 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand); 934 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom); 935 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal); 936 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); 937 938 setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal); 939 setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); 940 941 setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); 942 setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); 943 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); 944 945 setOperationAction(ISD::FNEG , MVT::v4f64, Legal); 946 setOperationAction(ISD::FABS , MVT::v4f64, Legal); 947 setOperationAction(ISD::FSIN , MVT::v4f64, Expand); 948 setOperationAction(ISD::FCOS , MVT::v4f64, Expand); 949 setOperationAction(ISD::FPOW , MVT::v4f64, Expand); 950 setOperationAction(ISD::FLOG , MVT::v4f64, Expand); 951 setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); 952 setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); 953 setOperationAction(ISD::FEXP , MVT::v4f64, Expand); 954 setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand); 955 956 setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal); 957 setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal); 958 959 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal); 960 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal); 961 962 addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass); 963 964 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 965 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 966 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 967 setOperationAction(ISD::FREM, MVT::v4f32, Expand); 968 969 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); 970 setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand); 971 972 setOperationAction(ISD::LOAD , MVT::v4f32, Custom); 973 setOperationAction(ISD::STORE , MVT::v4f32, Custom); 974 975 if (!Subtarget.useCRBits()) 976 setOperationAction(ISD::SELECT, MVT::v4f32, Expand); 977 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 978 979 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal); 980 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand); 981 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand); 982 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand); 983 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom); 984 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 985 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 986 987 setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal); 988 setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand); 989 990 setOperationAction(ISD::FNEG , MVT::v4f32, Legal); 991 setOperationAction(ISD::FABS , MVT::v4f32, Legal); 992 setOperationAction(ISD::FSIN , MVT::v4f32, Expand); 993 setOperationAction(ISD::FCOS , MVT::v4f32, Expand); 994 setOperationAction(ISD::FPOW , MVT::v4f32, Expand); 995 setOperationAction(ISD::FLOG , MVT::v4f32, Expand); 996 setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); 997 setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); 998 setOperationAction(ISD::FEXP , MVT::v4f32, Expand); 999 setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand); 1000 1001 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); 1002 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); 1003 1004 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal); 1005 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal); 1006 1007 addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass); 1008 1009 setOperationAction(ISD::AND , MVT::v4i1, Legal); 1010 setOperationAction(ISD::OR , MVT::v4i1, Legal); 1011 setOperationAction(ISD::XOR , MVT::v4i1, Legal); 1012 1013 if (!Subtarget.useCRBits()) 1014 setOperationAction(ISD::SELECT, MVT::v4i1, Expand); 1015 setOperationAction(ISD::VSELECT, MVT::v4i1, Legal); 1016 1017 setOperationAction(ISD::LOAD , MVT::v4i1, Custom); 1018 setOperationAction(ISD::STORE , MVT::v4i1, Custom); 1019 1020 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom); 1021 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand); 1022 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand); 1023 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand); 1024 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom); 1025 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand); 1026 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); 1027 1028 setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom); 1029 setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom); 1030 1031 addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass); 1032 1033 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); 1034 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); 1035 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); 1036 setOperationAction(ISD::FROUND, MVT::v4f64, Legal); 1037 1038 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 1039 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 1040 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 1041 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 1042 1043 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand); 1044 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); 1045 1046 // These need to set FE_INEXACT, and so cannot be vectorized here. 1047 setOperationAction(ISD::FRINT, MVT::v4f64, Expand); 1048 setOperationAction(ISD::FRINT, MVT::v4f32, Expand); 1049 1050 if (TM.Options.UnsafeFPMath) { 1051 setOperationAction(ISD::FDIV, MVT::v4f64, Legal); 1052 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); 1053 1054 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 1055 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 1056 } else { 1057 setOperationAction(ISD::FDIV, MVT::v4f64, Expand); 1058 setOperationAction(ISD::FSQRT, MVT::v4f64, Expand); 1059 1060 setOperationAction(ISD::FDIV, MVT::v4f32, Expand); 1061 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 1062 } 1063 } 1064 1065 if (Subtarget.has64BitSupport()) 1066 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 1067 1068 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); 1069 1070 if (!isPPC64) { 1071 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); 1072 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); 1073 } 1074 1075 setBooleanContents(ZeroOrOneBooleanContent); 1076 1077 if (Subtarget.hasAltivec()) { 1078 // Altivec instructions set fields to all zeros or all ones. 1079 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 1080 } 1081 1082 if (!isPPC64) { 1083 // These libcalls are not available in 32-bit. 1084 setLibcallName(RTLIB::SHL_I128, nullptr); 1085 setLibcallName(RTLIB::SRL_I128, nullptr); 1086 setLibcallName(RTLIB::SRA_I128, nullptr); 1087 } 1088 1089 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); 1090 1091 // We have target-specific dag combine patterns for the following nodes: 1092 setTargetDAGCombine(ISD::ADD); 1093 setTargetDAGCombine(ISD::SHL); 1094 setTargetDAGCombine(ISD::SRA); 1095 setTargetDAGCombine(ISD::SRL); 1096 setTargetDAGCombine(ISD::MUL); 1097 setTargetDAGCombine(ISD::SINT_TO_FP); 1098 setTargetDAGCombine(ISD::BUILD_VECTOR); 1099 if (Subtarget.hasFPCVT()) 1100 setTargetDAGCombine(ISD::UINT_TO_FP); 1101 setTargetDAGCombine(ISD::LOAD); 1102 setTargetDAGCombine(ISD::STORE); 1103 setTargetDAGCombine(ISD::BR_CC); 1104 if (Subtarget.useCRBits()) 1105 setTargetDAGCombine(ISD::BRCOND); 1106 setTargetDAGCombine(ISD::BSWAP); 1107 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 1108 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 1109 setTargetDAGCombine(ISD::INTRINSIC_VOID); 1110 1111 setTargetDAGCombine(ISD::SIGN_EXTEND); 1112 setTargetDAGCombine(ISD::ZERO_EXTEND); 1113 setTargetDAGCombine(ISD::ANY_EXTEND); 1114 1115 setTargetDAGCombine(ISD::TRUNCATE); 1116 1117 if (Subtarget.useCRBits()) { 1118 setTargetDAGCombine(ISD::TRUNCATE); 1119 setTargetDAGCombine(ISD::SETCC); 1120 setTargetDAGCombine(ISD::SELECT_CC); 1121 } 1122 1123 // Use reciprocal estimates. 1124 if (TM.Options.UnsafeFPMath) { 1125 setTargetDAGCombine(ISD::FDIV); 1126 setTargetDAGCombine(ISD::FSQRT); 1127 } 1128 1129 if (Subtarget.hasP9Altivec()) { 1130 setTargetDAGCombine(ISD::ABS); 1131 setTargetDAGCombine(ISD::VSELECT); 1132 } 1133 1134 // Darwin long double math library functions have $LDBL128 appended. 1135 if (Subtarget.isDarwin()) { 1136 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 1137 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 1138 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 1139 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 1140 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 1141 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 1142 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 1143 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 1144 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 1145 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 1146 } 1147 1148 if (EnableQuadPrecision) { 1149 setLibcallName(RTLIB::LOG_F128, "logf128"); 1150 setLibcallName(RTLIB::LOG2_F128, "log2f128"); 1151 setLibcallName(RTLIB::LOG10_F128, "log10f128"); 1152 setLibcallName(RTLIB::EXP_F128, "expf128"); 1153 setLibcallName(RTLIB::EXP2_F128, "exp2f128"); 1154 setLibcallName(RTLIB::SIN_F128, "sinf128"); 1155 setLibcallName(RTLIB::COS_F128, "cosf128"); 1156 setLibcallName(RTLIB::POW_F128, "powf128"); 1157 setLibcallName(RTLIB::FMIN_F128, "fminf128"); 1158 setLibcallName(RTLIB::FMAX_F128, "fmaxf128"); 1159 setLibcallName(RTLIB::POWI_F128, "__powikf2"); 1160 setLibcallName(RTLIB::REM_F128, "fmodf128"); 1161 } 1162 1163 // With 32 condition bits, we don't need to sink (and duplicate) compares 1164 // aggressively in CodeGenPrep. 1165 if (Subtarget.useCRBits()) { 1166 setHasMultipleConditionRegisters(); 1167 setJumpIsExpensive(); 1168 } 1169 1170 setMinFunctionAlignment(2); 1171 if (Subtarget.isDarwin()) 1172 setPrefFunctionAlignment(4); 1173 1174 switch (Subtarget.getDarwinDirective()) { 1175 default: break; 1176 case PPC::DIR_970: 1177 case PPC::DIR_A2: 1178 case PPC::DIR_E500: 1179 case PPC::DIR_E500mc: 1180 case PPC::DIR_E5500: 1181 case PPC::DIR_PWR4: 1182 case PPC::DIR_PWR5: 1183 case PPC::DIR_PWR5X: 1184 case PPC::DIR_PWR6: 1185 case PPC::DIR_PWR6X: 1186 case PPC::DIR_PWR7: 1187 case PPC::DIR_PWR8: 1188 case PPC::DIR_PWR9: 1189 setPrefFunctionAlignment(4); 1190 setPrefLoopAlignment(4); 1191 break; 1192 } 1193 1194 if (Subtarget.enableMachineScheduler()) 1195 setSchedulingPreference(Sched::Source); 1196 else 1197 setSchedulingPreference(Sched::Hybrid); 1198 1199 computeRegisterProperties(STI.getRegisterInfo()); 1200 1201 // The Freescale cores do better with aggressive inlining of memcpy and 1202 // friends. GCC uses same threshold of 128 bytes (= 32 word stores). 1203 if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || 1204 Subtarget.getDarwinDirective() == PPC::DIR_E5500) { 1205 MaxStoresPerMemset = 32; 1206 MaxStoresPerMemsetOptSize = 16; 1207 MaxStoresPerMemcpy = 32; 1208 MaxStoresPerMemcpyOptSize = 8; 1209 MaxStoresPerMemmove = 32; 1210 MaxStoresPerMemmoveOptSize = 8; 1211 } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) { 1212 // The A2 also benefits from (very) aggressive inlining of memcpy and 1213 // friends. The overhead of a the function call, even when warm, can be 1214 // over one hundred cycles. 1215 MaxStoresPerMemset = 128; 1216 MaxStoresPerMemcpy = 128; 1217 MaxStoresPerMemmove = 128; 1218 MaxLoadsPerMemcmp = 128; 1219 } else { 1220 MaxLoadsPerMemcmp = 8; 1221 MaxLoadsPerMemcmpOptSize = 4; 1222 } 1223 } 1224 1225 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine 1226 /// the desired ByVal argument alignment. 1227 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, 1228 unsigned MaxMaxAlign) { 1229 if (MaxAlign == MaxMaxAlign) 1230 return; 1231 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 1232 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) 1233 MaxAlign = 32; 1234 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) 1235 MaxAlign = 16; 1236 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 1237 unsigned EltAlign = 0; 1238 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); 1239 if (EltAlign > MaxAlign) 1240 MaxAlign = EltAlign; 1241 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 1242 for (auto *EltTy : STy->elements()) { 1243 unsigned EltAlign = 0; 1244 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign); 1245 if (EltAlign > MaxAlign) 1246 MaxAlign = EltAlign; 1247 if (MaxAlign == MaxMaxAlign) 1248 break; 1249 } 1250 } 1251 } 1252 1253 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 1254 /// function arguments in the caller parameter area. 1255 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, 1256 const DataLayout &DL) const { 1257 // Darwin passes everything on 4 byte boundary. 1258 if (Subtarget.isDarwin()) 1259 return 4; 1260 1261 // 16byte and wider vectors are passed on 16byte boundary. 1262 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 1263 unsigned Align = Subtarget.isPPC64() ? 8 : 4; 1264 if (Subtarget.hasAltivec() || Subtarget.hasQPX()) 1265 getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); 1266 return Align; 1267 } 1268 1269 unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, 1270 CallingConv:: ID CC, 1271 EVT VT) const { 1272 if (Subtarget.hasSPE() && VT == MVT::f64) 1273 return 2; 1274 return PPCTargetLowering::getNumRegisters(Context, VT); 1275 } 1276 1277 MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, 1278 CallingConv:: ID CC, 1279 EVT VT) const { 1280 if (Subtarget.hasSPE() && VT == MVT::f64) 1281 return MVT::i32; 1282 return PPCTargetLowering::getRegisterType(Context, VT); 1283 } 1284 1285 bool PPCTargetLowering::useSoftFloat() const { 1286 return Subtarget.useSoftFloat(); 1287 } 1288 1289 bool PPCTargetLowering::hasSPE() const { 1290 return Subtarget.hasSPE(); 1291 } 1292 1293 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 1294 switch ((PPCISD::NodeType)Opcode) { 1295 case PPCISD::FIRST_NUMBER: break; 1296 case PPCISD::FSEL: return "PPCISD::FSEL"; 1297 case PPCISD::FCFID: return "PPCISD::FCFID"; 1298 case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; 1299 case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; 1300 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; 1301 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 1302 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 1303 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; 1304 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; 1305 case PPCISD::FP_TO_UINT_IN_VSR: 1306 return "PPCISD::FP_TO_UINT_IN_VSR,"; 1307 case PPCISD::FP_TO_SINT_IN_VSR: 1308 return "PPCISD::FP_TO_SINT_IN_VSR"; 1309 case PPCISD::FRE: return "PPCISD::FRE"; 1310 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; 1311 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 1312 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 1313 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 1314 case PPCISD::VPERM: return "PPCISD::VPERM"; 1315 case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; 1316 case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; 1317 case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE"; 1318 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; 1319 case PPCISD::VECSHL: return "PPCISD::VECSHL"; 1320 case PPCISD::CMPB: return "PPCISD::CMPB"; 1321 case PPCISD::Hi: return "PPCISD::Hi"; 1322 case PPCISD::Lo: return "PPCISD::Lo"; 1323 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 1324 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8"; 1325 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16"; 1326 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 1327 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; 1328 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 1329 case PPCISD::SRL: return "PPCISD::SRL"; 1330 case PPCISD::SRA: return "PPCISD::SRA"; 1331 case PPCISD::SHL: return "PPCISD::SHL"; 1332 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; 1333 case PPCISD::CALL: return "PPCISD::CALL"; 1334 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; 1335 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 1336 case PPCISD::BCTRL: return "PPCISD::BCTRL"; 1337 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; 1338 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 1339 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; 1340 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; 1341 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; 1342 case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; 1343 case PPCISD::MFVSR: return "PPCISD::MFVSR"; 1344 case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; 1345 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; 1346 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; 1347 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; 1348 case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT"; 1349 case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT"; 1350 case PPCISD::VCMP: return "PPCISD::VCMP"; 1351 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 1352 case PPCISD::LBRX: return "PPCISD::LBRX"; 1353 case PPCISD::STBRX: return "PPCISD::STBRX"; 1354 case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; 1355 case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; 1356 case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; 1357 case PPCISD::STXSIX: return "PPCISD::STXSIX"; 1358 case PPCISD::VEXTS: return "PPCISD::VEXTS"; 1359 case PPCISD::SExtVElems: return "PPCISD::SExtVElems"; 1360 case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; 1361 case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; 1362 case PPCISD::ST_VSR_SCAL_INT: 1363 return "PPCISD::ST_VSR_SCAL_INT"; 1364 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 1365 case PPCISD::BDNZ: return "PPCISD::BDNZ"; 1366 case PPCISD::BDZ: return "PPCISD::BDZ"; 1367 case PPCISD::MFFS: return "PPCISD::MFFS"; 1368 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 1369 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 1370 case PPCISD::CR6SET: return "PPCISD::CR6SET"; 1371 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; 1372 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; 1373 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; 1374 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; 1375 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; 1376 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; 1377 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; 1378 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; 1379 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; 1380 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; 1381 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; 1382 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; 1383 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; 1384 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; 1385 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; 1386 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; 1387 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; 1388 case PPCISD::SC: return "PPCISD::SC"; 1389 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; 1390 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; 1391 case PPCISD::RFEBB: return "PPCISD::RFEBB"; 1392 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; 1393 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; 1394 case PPCISD::VABSD: return "PPCISD::VABSD"; 1395 case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; 1396 case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; 1397 case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; 1398 case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; 1399 case PPCISD::QBFLT: return "PPCISD::QBFLT"; 1400 case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; 1401 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; 1402 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; 1403 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH"; 1404 case PPCISD::FP_EXTEND_LH: return "PPCISD::FP_EXTEND_LH"; 1405 } 1406 return nullptr; 1407 } 1408 1409 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, 1410 EVT VT) const { 1411 if (!VT.isVector()) 1412 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; 1413 1414 if (Subtarget.hasQPX()) 1415 return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); 1416 1417 return VT.changeVectorElementTypeToInteger(); 1418 } 1419 1420 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { 1421 assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); 1422 return true; 1423 } 1424 1425 //===----------------------------------------------------------------------===// 1426 // Node matching predicates, for use by the tblgen matching code. 1427 //===----------------------------------------------------------------------===// 1428 1429 /// isFloatingPointZero - Return true if this is 0.0 or -0.0. 1430 static bool isFloatingPointZero(SDValue Op) { 1431 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 1432 return CFP->getValueAPF().isZero(); 1433 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 1434 // Maybe this has already been legalized into the constant pool? 1435 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 1436 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 1437 return CFP->getValueAPF().isZero(); 1438 } 1439 return false; 1440 } 1441 1442 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 1443 /// true if Op is undef or if it matches the specified value. 1444 static bool isConstantOrUndef(int Op, int Val) { 1445 return Op < 0 || Op == Val; 1446 } 1447 1448 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 1449 /// VPKUHUM instruction. 1450 /// The ShuffleKind distinguishes between big-endian operations with 1451 /// two different inputs (0), either-endian operations with two identical 1452 /// inputs (1), and little-endian operations with two different inputs (2). 1453 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1454 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1455 SelectionDAG &DAG) { 1456 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1457 if (ShuffleKind == 0) { 1458 if (IsLE) 1459 return false; 1460 for (unsigned i = 0; i != 16; ++i) 1461 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 1462 return false; 1463 } else if (ShuffleKind == 2) { 1464 if (!IsLE) 1465 return false; 1466 for (unsigned i = 0; i != 16; ++i) 1467 if (!isConstantOrUndef(N->getMaskElt(i), i*2)) 1468 return false; 1469 } else if (ShuffleKind == 1) { 1470 unsigned j = IsLE ? 0 : 1; 1471 for (unsigned i = 0; i != 8; ++i) 1472 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || 1473 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) 1474 return false; 1475 } 1476 return true; 1477 } 1478 1479 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 1480 /// VPKUWUM instruction. 1481 /// The ShuffleKind distinguishes between big-endian operations with 1482 /// two different inputs (0), either-endian operations with two identical 1483 /// inputs (1), and little-endian operations with two different inputs (2). 1484 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1485 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1486 SelectionDAG &DAG) { 1487 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1488 if (ShuffleKind == 0) { 1489 if (IsLE) 1490 return false; 1491 for (unsigned i = 0; i != 16; i += 2) 1492 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 1493 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 1494 return false; 1495 } else if (ShuffleKind == 2) { 1496 if (!IsLE) 1497 return false; 1498 for (unsigned i = 0; i != 16; i += 2) 1499 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 1500 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) 1501 return false; 1502 } else if (ShuffleKind == 1) { 1503 unsigned j = IsLE ? 0 : 2; 1504 for (unsigned i = 0; i != 8; i += 2) 1505 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 1506 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 1507 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 1508 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) 1509 return false; 1510 } 1511 return true; 1512 } 1513 1514 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a 1515 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the 1516 /// current subtarget. 1517 /// 1518 /// The ShuffleKind distinguishes between big-endian operations with 1519 /// two different inputs (0), either-endian operations with two identical 1520 /// inputs (1), and little-endian operations with two different inputs (2). 1521 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1522 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1523 SelectionDAG &DAG) { 1524 const PPCSubtarget& Subtarget = 1525 static_cast<const PPCSubtarget&>(DAG.getSubtarget()); 1526 if (!Subtarget.hasP8Vector()) 1527 return false; 1528 1529 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1530 if (ShuffleKind == 0) { 1531 if (IsLE) 1532 return false; 1533 for (unsigned i = 0; i != 16; i += 4) 1534 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) || 1535 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) || 1536 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) || 1537 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7)) 1538 return false; 1539 } else if (ShuffleKind == 2) { 1540 if (!IsLE) 1541 return false; 1542 for (unsigned i = 0; i != 16; i += 4) 1543 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 1544 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) || 1545 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) || 1546 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3)) 1547 return false; 1548 } else if (ShuffleKind == 1) { 1549 unsigned j = IsLE ? 0 : 4; 1550 for (unsigned i = 0; i != 8; i += 4) 1551 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 1552 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 1553 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) || 1554 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) || 1555 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 1556 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) || 1557 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) || 1558 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)) 1559 return false; 1560 } 1561 return true; 1562 } 1563 1564 /// isVMerge - Common function, used to match vmrg* shuffles. 1565 /// 1566 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 1567 unsigned LHSStart, unsigned RHSStart) { 1568 if (N->getValueType(0) != MVT::v16i8) 1569 return false; 1570 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 1571 "Unsupported merge size!"); 1572 1573 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 1574 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 1575 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 1576 LHSStart+j+i*UnitSize) || 1577 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 1578 RHSStart+j+i*UnitSize)) 1579 return false; 1580 } 1581 return true; 1582 } 1583 1584 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 1585 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). 1586 /// The ShuffleKind distinguishes between big-endian merges with two 1587 /// different inputs (0), either-endian merges with two identical inputs (1), 1588 /// and little-endian merges with two different inputs (2). For the latter, 1589 /// the input operands are swapped (see PPCInstrAltivec.td). 1590 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 1591 unsigned ShuffleKind, SelectionDAG &DAG) { 1592 if (DAG.getDataLayout().isLittleEndian()) { 1593 if (ShuffleKind == 1) // unary 1594 return isVMerge(N, UnitSize, 0, 0); 1595 else if (ShuffleKind == 2) // swapped 1596 return isVMerge(N, UnitSize, 0, 16); 1597 else 1598 return false; 1599 } else { 1600 if (ShuffleKind == 1) // unary 1601 return isVMerge(N, UnitSize, 8, 8); 1602 else if (ShuffleKind == 0) // normal 1603 return isVMerge(N, UnitSize, 8, 24); 1604 else 1605 return false; 1606 } 1607 } 1608 1609 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 1610 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). 1611 /// The ShuffleKind distinguishes between big-endian merges with two 1612 /// different inputs (0), either-endian merges with two identical inputs (1), 1613 /// and little-endian merges with two different inputs (2). For the latter, 1614 /// the input operands are swapped (see PPCInstrAltivec.td). 1615 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 1616 unsigned ShuffleKind, SelectionDAG &DAG) { 1617 if (DAG.getDataLayout().isLittleEndian()) { 1618 if (ShuffleKind == 1) // unary 1619 return isVMerge(N, UnitSize, 8, 8); 1620 else if (ShuffleKind == 2) // swapped 1621 return isVMerge(N, UnitSize, 8, 24); 1622 else 1623 return false; 1624 } else { 1625 if (ShuffleKind == 1) // unary 1626 return isVMerge(N, UnitSize, 0, 0); 1627 else if (ShuffleKind == 0) // normal 1628 return isVMerge(N, UnitSize, 0, 16); 1629 else 1630 return false; 1631 } 1632 } 1633 1634 /** 1635 * Common function used to match vmrgew and vmrgow shuffles 1636 * 1637 * The indexOffset determines whether to look for even or odd words in 1638 * the shuffle mask. This is based on the of the endianness of the target 1639 * machine. 1640 * - Little Endian: 1641 * - Use offset of 0 to check for odd elements 1642 * - Use offset of 4 to check for even elements 1643 * - Big Endian: 1644 * - Use offset of 0 to check for even elements 1645 * - Use offset of 4 to check for odd elements 1646 * A detailed description of the vector element ordering for little endian and 1647 * big endian can be found at 1648 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html 1649 * Targeting your applications - what little endian and big endian IBM XL C/C++ 1650 * compiler differences mean to you 1651 * 1652 * The mask to the shuffle vector instruction specifies the indices of the 1653 * elements from the two input vectors to place in the result. The elements are 1654 * numbered in array-access order, starting with the first vector. These vectors 1655 * are always of type v16i8, thus each vector will contain 16 elements of size 1656 * 8. More info on the shuffle vector can be found in the 1657 * http://llvm.org/docs/LangRef.html#shufflevector-instruction 1658 * Language Reference. 1659 * 1660 * The RHSStartValue indicates whether the same input vectors are used (unary) 1661 * or two different input vectors are used, based on the following: 1662 * - If the instruction uses the same vector for both inputs, the range of the 1663 * indices will be 0 to 15. In this case, the RHSStart value passed should 1664 * be 0. 1665 * - If the instruction has two different vectors then the range of the 1666 * indices will be 0 to 31. In this case, the RHSStart value passed should 1667 * be 16 (indices 0-15 specify elements in the first vector while indices 16 1668 * to 31 specify elements in the second vector). 1669 * 1670 * \param[in] N The shuffle vector SD Node to analyze 1671 * \param[in] IndexOffset Specifies whether to look for even or odd elements 1672 * \param[in] RHSStartValue Specifies the starting index for the righthand input 1673 * vector to the shuffle_vector instruction 1674 * \return true iff this shuffle vector represents an even or odd word merge 1675 */ 1676 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, 1677 unsigned RHSStartValue) { 1678 if (N->getValueType(0) != MVT::v16i8) 1679 return false; 1680 1681 for (unsigned i = 0; i < 2; ++i) 1682 for (unsigned j = 0; j < 4; ++j) 1683 if (!isConstantOrUndef(N->getMaskElt(i*4+j), 1684 i*RHSStartValue+j+IndexOffset) || 1685 !isConstantOrUndef(N->getMaskElt(i*4+j+8), 1686 i*RHSStartValue+j+IndexOffset+8)) 1687 return false; 1688 return true; 1689 } 1690 1691 /** 1692 * Determine if the specified shuffle mask is suitable for the vmrgew or 1693 * vmrgow instructions. 1694 * 1695 * \param[in] N The shuffle vector SD Node to analyze 1696 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false) 1697 * \param[in] ShuffleKind Identify the type of merge: 1698 * - 0 = big-endian merge with two different inputs; 1699 * - 1 = either-endian merge with two identical inputs; 1700 * - 2 = little-endian merge with two different inputs (inputs are swapped for 1701 * little-endian merges). 1702 * \param[in] DAG The current SelectionDAG 1703 * \return true iff this shuffle mask 1704 */ 1705 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, 1706 unsigned ShuffleKind, SelectionDAG &DAG) { 1707 if (DAG.getDataLayout().isLittleEndian()) { 1708 unsigned indexOffset = CheckEven ? 4 : 0; 1709 if (ShuffleKind == 1) // Unary 1710 return isVMerge(N, indexOffset, 0); 1711 else if (ShuffleKind == 2) // swapped 1712 return isVMerge(N, indexOffset, 16); 1713 else 1714 return false; 1715 } 1716 else { 1717 unsigned indexOffset = CheckEven ? 0 : 4; 1718 if (ShuffleKind == 1) // Unary 1719 return isVMerge(N, indexOffset, 0); 1720 else if (ShuffleKind == 0) // Normal 1721 return isVMerge(N, indexOffset, 16); 1722 else 1723 return false; 1724 } 1725 return false; 1726 } 1727 1728 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 1729 /// amount, otherwise return -1. 1730 /// The ShuffleKind distinguishes between big-endian operations with two 1731 /// different inputs (0), either-endian operations with two identical inputs 1732 /// (1), and little-endian operations with two different inputs (2). For the 1733 /// latter, the input operands are swapped (see PPCInstrAltivec.td). 1734 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, 1735 SelectionDAG &DAG) { 1736 if (N->getValueType(0) != MVT::v16i8) 1737 return -1; 1738 1739 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1740 1741 // Find the first non-undef value in the shuffle mask. 1742 unsigned i; 1743 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 1744 /*search*/; 1745 1746 if (i == 16) return -1; // all undef. 1747 1748 // Otherwise, check to see if the rest of the elements are consecutively 1749 // numbered from this value. 1750 unsigned ShiftAmt = SVOp->getMaskElt(i); 1751 if (ShiftAmt < i) return -1; 1752 1753 ShiftAmt -= i; 1754 bool isLE = DAG.getDataLayout().isLittleEndian(); 1755 1756 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { 1757 // Check the rest of the elements to see if they are consecutive. 1758 for (++i; i != 16; ++i) 1759 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1760 return -1; 1761 } else if (ShuffleKind == 1) { 1762 // Check the rest of the elements to see if they are consecutive. 1763 for (++i; i != 16; ++i) 1764 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 1765 return -1; 1766 } else 1767 return -1; 1768 1769 if (isLE) 1770 ShiftAmt = 16 - ShiftAmt; 1771 1772 return ShiftAmt; 1773 } 1774 1775 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 1776 /// specifies a splat of a single element that is suitable for input to 1777 /// VSPLTB/VSPLTH/VSPLTW. 1778 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 1779 assert(N->getValueType(0) == MVT::v16i8 && 1780 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 1781 1782 // The consecutive indices need to specify an element, not part of two 1783 // different elements. So abandon ship early if this isn't the case. 1784 if (N->getMaskElt(0) % EltSize != 0) 1785 return false; 1786 1787 // This is a splat operation if each element of the permute is the same, and 1788 // if the value doesn't reference the second vector. 1789 unsigned ElementBase = N->getMaskElt(0); 1790 1791 // FIXME: Handle UNDEF elements too! 1792 if (ElementBase >= 16) 1793 return false; 1794 1795 // Check that the indices are consecutive, in the case of a multi-byte element 1796 // splatted with a v16i8 mask. 1797 for (unsigned i = 1; i != EltSize; ++i) 1798 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 1799 return false; 1800 1801 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 1802 if (N->getMaskElt(i) < 0) continue; 1803 for (unsigned j = 0; j != EltSize; ++j) 1804 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 1805 return false; 1806 } 1807 return true; 1808 } 1809 1810 /// Check that the mask is shuffling N byte elements. Within each N byte 1811 /// element of the mask, the indices could be either in increasing or 1812 /// decreasing order as long as they are consecutive. 1813 /// \param[in] N the shuffle vector SD Node to analyze 1814 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/ 1815 /// Word/DoubleWord/QuadWord). 1816 /// \param[in] StepLen the delta indices number among the N byte element, if 1817 /// the mask is in increasing/decreasing order then it is 1/-1. 1818 /// \return true iff the mask is shuffling N byte elements. 1819 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width, 1820 int StepLen) { 1821 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) && 1822 "Unexpected element width."); 1823 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width."); 1824 1825 unsigned NumOfElem = 16 / Width; 1826 unsigned MaskVal[16]; // Width is never greater than 16 1827 for (unsigned i = 0; i < NumOfElem; ++i) { 1828 MaskVal[0] = N->getMaskElt(i * Width); 1829 if ((StepLen == 1) && (MaskVal[0] % Width)) { 1830 return false; 1831 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) { 1832 return false; 1833 } 1834 1835 for (unsigned int j = 1; j < Width; ++j) { 1836 MaskVal[j] = N->getMaskElt(i * Width + j); 1837 if (MaskVal[j] != MaskVal[j-1] + StepLen) { 1838 return false; 1839 } 1840 } 1841 } 1842 1843 return true; 1844 } 1845 1846 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, 1847 unsigned &InsertAtByte, bool &Swap, bool IsLE) { 1848 if (!isNByteElemShuffleMask(N, 4, 1)) 1849 return false; 1850 1851 // Now we look at mask elements 0,4,8,12 1852 unsigned M0 = N->getMaskElt(0) / 4; 1853 unsigned M1 = N->getMaskElt(4) / 4; 1854 unsigned M2 = N->getMaskElt(8) / 4; 1855 unsigned M3 = N->getMaskElt(12) / 4; 1856 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 }; 1857 unsigned BigEndianShifts[] = { 3, 0, 1, 2 }; 1858 1859 // Below, let H and L be arbitrary elements of the shuffle mask 1860 // where H is in the range [4,7] and L is in the range [0,3]. 1861 // H, 1, 2, 3 or L, 5, 6, 7 1862 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) || 1863 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) { 1864 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3]; 1865 InsertAtByte = IsLE ? 12 : 0; 1866 Swap = M0 < 4; 1867 return true; 1868 } 1869 // 0, H, 2, 3 or 4, L, 6, 7 1870 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) || 1871 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) { 1872 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3]; 1873 InsertAtByte = IsLE ? 8 : 4; 1874 Swap = M1 < 4; 1875 return true; 1876 } 1877 // 0, 1, H, 3 or 4, 5, L, 7 1878 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) || 1879 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) { 1880 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3]; 1881 InsertAtByte = IsLE ? 4 : 8; 1882 Swap = M2 < 4; 1883 return true; 1884 } 1885 // 0, 1, 2, H or 4, 5, 6, L 1886 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) || 1887 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) { 1888 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3]; 1889 InsertAtByte = IsLE ? 0 : 12; 1890 Swap = M3 < 4; 1891 return true; 1892 } 1893 1894 // If both vector operands for the shuffle are the same vector, the mask will 1895 // contain only elements from the first one and the second one will be undef. 1896 if (N->getOperand(1).isUndef()) { 1897 ShiftElts = 0; 1898 Swap = true; 1899 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1; 1900 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) { 1901 InsertAtByte = IsLE ? 12 : 0; 1902 return true; 1903 } 1904 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) { 1905 InsertAtByte = IsLE ? 8 : 4; 1906 return true; 1907 } 1908 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) { 1909 InsertAtByte = IsLE ? 4 : 8; 1910 return true; 1911 } 1912 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) { 1913 InsertAtByte = IsLE ? 0 : 12; 1914 return true; 1915 } 1916 } 1917 1918 return false; 1919 } 1920 1921 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, 1922 bool &Swap, bool IsLE) { 1923 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); 1924 // Ensure each byte index of the word is consecutive. 1925 if (!isNByteElemShuffleMask(N, 4, 1)) 1926 return false; 1927 1928 // Now we look at mask elements 0,4,8,12, which are the beginning of words. 1929 unsigned M0 = N->getMaskElt(0) / 4; 1930 unsigned M1 = N->getMaskElt(4) / 4; 1931 unsigned M2 = N->getMaskElt(8) / 4; 1932 unsigned M3 = N->getMaskElt(12) / 4; 1933 1934 // If both vector operands for the shuffle are the same vector, the mask will 1935 // contain only elements from the first one and the second one will be undef. 1936 if (N->getOperand(1).isUndef()) { 1937 assert(M0 < 4 && "Indexing into an undef vector?"); 1938 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4) 1939 return false; 1940 1941 ShiftElts = IsLE ? (4 - M0) % 4 : M0; 1942 Swap = false; 1943 return true; 1944 } 1945 1946 // Ensure each word index of the ShuffleVector Mask is consecutive. 1947 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8) 1948 return false; 1949 1950 if (IsLE) { 1951 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) { 1952 // Input vectors don't need to be swapped if the leading element 1953 // of the result is one of the 3 left elements of the second vector 1954 // (or if there is no shift to be done at all). 1955 Swap = false; 1956 ShiftElts = (8 - M0) % 8; 1957 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) { 1958 // Input vectors need to be swapped if the leading element 1959 // of the result is one of the 3 left elements of the first vector 1960 // (or if we're shifting by 4 - thereby simply swapping the vectors). 1961 Swap = true; 1962 ShiftElts = (4 - M0) % 4; 1963 } 1964 1965 return true; 1966 } else { // BE 1967 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) { 1968 // Input vectors don't need to be swapped if the leading element 1969 // of the result is one of the 4 elements of the first vector. 1970 Swap = false; 1971 ShiftElts = M0; 1972 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) { 1973 // Input vectors need to be swapped if the leading element 1974 // of the result is one of the 4 elements of the right vector. 1975 Swap = true; 1976 ShiftElts = M0 - 4; 1977 } 1978 1979 return true; 1980 } 1981 } 1982 1983 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) { 1984 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); 1985 1986 if (!isNByteElemShuffleMask(N, Width, -1)) 1987 return false; 1988 1989 for (int i = 0; i < 16; i += Width) 1990 if (N->getMaskElt(i) != i + Width - 1) 1991 return false; 1992 1993 return true; 1994 } 1995 1996 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) { 1997 return isXXBRShuffleMaskHelper(N, 2); 1998 } 1999 2000 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) { 2001 return isXXBRShuffleMaskHelper(N, 4); 2002 } 2003 2004 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) { 2005 return isXXBRShuffleMaskHelper(N, 8); 2006 } 2007 2008 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) { 2009 return isXXBRShuffleMaskHelper(N, 16); 2010 } 2011 2012 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap 2013 /// if the inputs to the instruction should be swapped and set \p DM to the 2014 /// value for the immediate. 2015 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI 2016 /// AND element 0 of the result comes from the first input (LE) or second input 2017 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered. 2018 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle 2019 /// mask. 2020 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM, 2021 bool &Swap, bool IsLE) { 2022 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); 2023 2024 // Ensure each byte index of the double word is consecutive. 2025 if (!isNByteElemShuffleMask(N, 8, 1)) 2026 return false; 2027 2028 unsigned M0 = N->getMaskElt(0) / 8; 2029 unsigned M1 = N->getMaskElt(8) / 8; 2030 assert(((M0 | M1) < 4) && "A mask element out of bounds?"); 2031 2032 // If both vector operands for the shuffle are the same vector, the mask will 2033 // contain only elements from the first one and the second one will be undef. 2034 if (N->getOperand(1).isUndef()) { 2035 if ((M0 | M1) < 2) { 2036 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1); 2037 Swap = false; 2038 return true; 2039 } else 2040 return false; 2041 } 2042 2043 if (IsLE) { 2044 if (M0 > 1 && M1 < 2) { 2045 Swap = false; 2046 } else if (M0 < 2 && M1 > 1) { 2047 M0 = (M0 + 2) % 4; 2048 M1 = (M1 + 2) % 4; 2049 Swap = true; 2050 } else 2051 return false; 2052 2053 // Note: if control flow comes here that means Swap is already set above 2054 DM = (((~M1) & 1) << 1) + ((~M0) & 1); 2055 return true; 2056 } else { // BE 2057 if (M0 < 2 && M1 > 1) { 2058 Swap = false; 2059 } else if (M0 > 1 && M1 < 2) { 2060 M0 = (M0 + 2) % 4; 2061 M1 = (M1 + 2) % 4; 2062 Swap = true; 2063 } else 2064 return false; 2065 2066 // Note: if control flow comes here that means Swap is already set above 2067 DM = (M0 << 1) + (M1 & 1); 2068 return true; 2069 } 2070 } 2071 2072 2073 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 2074 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 2075 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, 2076 SelectionDAG &DAG) { 2077 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 2078 assert(isSplatShuffleMask(SVOp, EltSize)); 2079 if (DAG.getDataLayout().isLittleEndian()) 2080 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); 2081 else 2082 return SVOp->getMaskElt(0) / EltSize; 2083 } 2084 2085 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 2086 /// by using a vspltis[bhw] instruction of the specified element size, return 2087 /// the constant being splatted. The ByteSize field indicates the number of 2088 /// bytes of each element [124] -> [bhw]. 2089 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 2090 SDValue OpVal(nullptr, 0); 2091 2092 // If ByteSize of the splat is bigger than the element size of the 2093 // build_vector, then we have a case where we are checking for a splat where 2094 // multiple elements of the buildvector are folded together into a single 2095 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 2096 unsigned EltSize = 16/N->getNumOperands(); 2097 if (EltSize < ByteSize) { 2098 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 2099 SDValue UniquedVals[4]; 2100 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 2101 2102 // See if all of the elements in the buildvector agree across. 2103 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 2104 if (N->getOperand(i).isUndef()) continue; 2105 // If the element isn't a constant, bail fully out. 2106 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 2107 2108 if (!UniquedVals[i&(Multiple-1)].getNode()) 2109 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 2110 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 2111 return SDValue(); // no match. 2112 } 2113 2114 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 2115 // either constant or undef values that are identical for each chunk. See 2116 // if these chunks can form into a larger vspltis*. 2117 2118 // Check to see if all of the leading entries are either 0 or -1. If 2119 // neither, then this won't fit into the immediate field. 2120 bool LeadingZero = true; 2121 bool LeadingOnes = true; 2122 for (unsigned i = 0; i != Multiple-1; ++i) { 2123 if (!UniquedVals[i].getNode()) continue; // Must have been undefs. 2124 2125 LeadingZero &= isNullConstant(UniquedVals[i]); 2126 LeadingOnes &= isAllOnesConstant(UniquedVals[i]); 2127 } 2128 // Finally, check the least significant entry. 2129 if (LeadingZero) { 2130 if (!UniquedVals[Multiple-1].getNode()) 2131 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef 2132 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 2133 if (Val < 16) // 0,0,0,4 -> vspltisw(4) 2134 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); 2135 } 2136 if (LeadingOnes) { 2137 if (!UniquedVals[Multiple-1].getNode()) 2138 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef 2139 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 2140 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 2141 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); 2142 } 2143 2144 return SDValue(); 2145 } 2146 2147 // Check to see if this buildvec has a single non-undef value in its elements. 2148 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 2149 if (N->getOperand(i).isUndef()) continue; 2150 if (!OpVal.getNode()) 2151 OpVal = N->getOperand(i); 2152 else if (OpVal != N->getOperand(i)) 2153 return SDValue(); 2154 } 2155 2156 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. 2157 2158 unsigned ValSizeInBytes = EltSize; 2159 uint64_t Value = 0; 2160 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 2161 Value = CN->getZExtValue(); 2162 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 2163 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 2164 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 2165 } 2166 2167 // If the splat value is larger than the element value, then we can never do 2168 // this splat. The only case that we could fit the replicated bits into our 2169 // immediate field for would be zero, and we prefer to use vxor for it. 2170 if (ValSizeInBytes < ByteSize) return SDValue(); 2171 2172 // If the element value is larger than the splat value, check if it consists 2173 // of a repeated bit pattern of size ByteSize. 2174 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)) 2175 return SDValue(); 2176 2177 // Properly sign extend the value. 2178 int MaskVal = SignExtend32(Value, ByteSize * 8); 2179 2180 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 2181 if (MaskVal == 0) return SDValue(); 2182 2183 // Finally, if this value fits in a 5 bit sext field, return it 2184 if (SignExtend32<5>(MaskVal) == MaskVal) 2185 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32); 2186 return SDValue(); 2187 } 2188 2189 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift 2190 /// amount, otherwise return -1. 2191 int PPC::isQVALIGNIShuffleMask(SDNode *N) { 2192 EVT VT = N->getValueType(0); 2193 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1) 2194 return -1; 2195 2196 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 2197 2198 // Find the first non-undef value in the shuffle mask. 2199 unsigned i; 2200 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i) 2201 /*search*/; 2202 2203 if (i == 4) return -1; // all undef. 2204 2205 // Otherwise, check to see if the rest of the elements are consecutively 2206 // numbered from this value. 2207 unsigned ShiftAmt = SVOp->getMaskElt(i); 2208 if (ShiftAmt < i) return -1; 2209 ShiftAmt -= i; 2210 2211 // Check the rest of the elements to see if they are consecutive. 2212 for (++i; i != 4; ++i) 2213 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 2214 return -1; 2215 2216 return ShiftAmt; 2217 } 2218 2219 //===----------------------------------------------------------------------===// 2220 // Addressing Mode Selection 2221 //===----------------------------------------------------------------------===// 2222 2223 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit 2224 /// or 64-bit immediate, and if the value can be accurately represented as a 2225 /// sign extension from a 16-bit value. If so, this returns true and the 2226 /// immediate. 2227 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) { 2228 if (!isa<ConstantSDNode>(N)) 2229 return false; 2230 2231 Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue(); 2232 if (N->getValueType(0) == MVT::i32) 2233 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 2234 else 2235 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 2236 } 2237 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) { 2238 return isIntS16Immediate(Op.getNode(), Imm); 2239 } 2240 2241 /// SelectAddressRegReg - Given the specified addressed, check to see if it 2242 /// can be represented as an indexed [r+r] operation. Returns false if it 2243 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is 2244 /// non-zero and N can be represented by a base register plus a signed 16-bit 2245 /// displacement, make a more precise judgement by checking (displacement % \p 2246 /// EncodingAlignment). 2247 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 2248 SDValue &Index, SelectionDAG &DAG, 2249 unsigned EncodingAlignment) const { 2250 int16_t imm = 0; 2251 if (N.getOpcode() == ISD::ADD) { 2252 if (isIntS16Immediate(N.getOperand(1), imm) && 2253 (!EncodingAlignment || !(imm % EncodingAlignment))) 2254 return false; // r+i 2255 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 2256 return false; // r+i 2257 2258 Base = N.getOperand(0); 2259 Index = N.getOperand(1); 2260 return true; 2261 } else if (N.getOpcode() == ISD::OR) { 2262 if (isIntS16Immediate(N.getOperand(1), imm) && 2263 (!EncodingAlignment || !(imm % EncodingAlignment))) 2264 return false; // r+i can fold it if we can. 2265 2266 // If this is an or of disjoint bitfields, we can codegen this as an add 2267 // (for better address arithmetic) if the LHS and RHS of the OR are provably 2268 // disjoint. 2269 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0)); 2270 2271 if (LHSKnown.Zero.getBoolValue()) { 2272 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1)); 2273 // If all of the bits are known zero on the LHS or RHS, the add won't 2274 // carry. 2275 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) { 2276 Base = N.getOperand(0); 2277 Index = N.getOperand(1); 2278 return true; 2279 } 2280 } 2281 } 2282 2283 return false; 2284 } 2285 2286 // If we happen to be doing an i64 load or store into a stack slot that has 2287 // less than a 4-byte alignment, then the frame-index elimination may need to 2288 // use an indexed load or store instruction (because the offset may not be a 2289 // multiple of 4). The extra register needed to hold the offset comes from the 2290 // register scavenger, and it is possible that the scavenger will need to use 2291 // an emergency spill slot. As a result, we need to make sure that a spill slot 2292 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned 2293 // stack slot. 2294 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { 2295 // FIXME: This does not handle the LWA case. 2296 if (VT != MVT::i64) 2297 return; 2298 2299 // NOTE: We'll exclude negative FIs here, which come from argument 2300 // lowering, because there are no known test cases triggering this problem 2301 // using packed structures (or similar). We can remove this exclusion if 2302 // we find such a test case. The reason why this is so test-case driven is 2303 // because this entire 'fixup' is only to prevent crashes (from the 2304 // register scavenger) on not-really-valid inputs. For example, if we have: 2305 // %a = alloca i1 2306 // %b = bitcast i1* %a to i64* 2307 // store i64* a, i64 b 2308 // then the store should really be marked as 'align 1', but is not. If it 2309 // were marked as 'align 1' then the indexed form would have been 2310 // instruction-selected initially, and the problem this 'fixup' is preventing 2311 // won't happen regardless. 2312 if (FrameIdx < 0) 2313 return; 2314 2315 MachineFunction &MF = DAG.getMachineFunction(); 2316 MachineFrameInfo &MFI = MF.getFrameInfo(); 2317 2318 unsigned Align = MFI.getObjectAlignment(FrameIdx); 2319 if (Align >= 4) 2320 return; 2321 2322 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2323 FuncInfo->setHasNonRISpills(); 2324 } 2325 2326 /// Returns true if the address N can be represented by a base register plus 2327 /// a signed 16-bit displacement [r+imm], and if it is not better 2328 /// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept 2329 /// displacements that are multiples of that value. 2330 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 2331 SDValue &Base, 2332 SelectionDAG &DAG, 2333 unsigned EncodingAlignment) const { 2334 // FIXME dl should come from parent load or store, not from address 2335 SDLoc dl(N); 2336 // If this can be more profitably realized as r+r, fail. 2337 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment)) 2338 return false; 2339 2340 if (N.getOpcode() == ISD::ADD) { 2341 int16_t imm = 0; 2342 if (isIntS16Immediate(N.getOperand(1), imm) && 2343 (!EncodingAlignment || (imm % EncodingAlignment) == 0)) { 2344 Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); 2345 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 2346 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 2347 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 2348 } else { 2349 Base = N.getOperand(0); 2350 } 2351 return true; // [r+i] 2352 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 2353 // Match LOAD (ADD (X, Lo(G))). 2354 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 2355 && "Cannot handle constant offsets yet!"); 2356 Disp = N.getOperand(1).getOperand(0); // The global address. 2357 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 2358 Disp.getOpcode() == ISD::TargetGlobalTLSAddress || 2359 Disp.getOpcode() == ISD::TargetConstantPool || 2360 Disp.getOpcode() == ISD::TargetJumpTable); 2361 Base = N.getOperand(0); 2362 return true; // [&g+r] 2363 } 2364 } else if (N.getOpcode() == ISD::OR) { 2365 int16_t imm = 0; 2366 if (isIntS16Immediate(N.getOperand(1), imm) && 2367 (!EncodingAlignment || (imm % EncodingAlignment) == 0)) { 2368 // If this is an or of disjoint bitfields, we can codegen this as an add 2369 // (for better address arithmetic) if the LHS and RHS of the OR are 2370 // provably disjoint. 2371 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0)); 2372 2373 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 2374 // If all of the bits are known zero on the LHS or RHS, the add won't 2375 // carry. 2376 if (FrameIndexSDNode *FI = 2377 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 2378 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 2379 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 2380 } else { 2381 Base = N.getOperand(0); 2382 } 2383 Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); 2384 return true; 2385 } 2386 } 2387 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 2388 // Loading from a constant address. 2389 2390 // If this address fits entirely in a 16-bit sext immediate field, codegen 2391 // this as "d, 0" 2392 int16_t Imm; 2393 if (isIntS16Immediate(CN, Imm) && 2394 (!EncodingAlignment || (Imm % EncodingAlignment) == 0)) { 2395 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); 2396 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 2397 CN->getValueType(0)); 2398 return true; 2399 } 2400 2401 // Handle 32-bit sext immediates with LIS + addr mode. 2402 if ((CN->getValueType(0) == MVT::i32 || 2403 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && 2404 (!EncodingAlignment || (CN->getZExtValue() % EncodingAlignment) == 0)) { 2405 int Addr = (int)CN->getZExtValue(); 2406 2407 // Otherwise, break this down into an LIS + disp. 2408 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32); 2409 2410 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl, 2411 MVT::i32); 2412 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 2413 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 2414 return true; 2415 } 2416 } 2417 2418 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout())); 2419 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) { 2420 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 2421 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 2422 } else 2423 Base = N; 2424 return true; // [r+0] 2425 } 2426 2427 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be 2428 /// represented as an indexed [r+r] operation. 2429 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 2430 SDValue &Index, 2431 SelectionDAG &DAG) const { 2432 // Check to see if we can easily represent this as an [r+r] address. This 2433 // will fail if it thinks that the address is more profitably represented as 2434 // reg+imm, e.g. where imm = 0. 2435 if (SelectAddressRegReg(N, Base, Index, DAG)) 2436 return true; 2437 2438 // If the address is the result of an add, we will utilize the fact that the 2439 // address calculation includes an implicit add. However, we can reduce 2440 // register pressure if we do not materialize a constant just for use as the 2441 // index register. We only get rid of the add if it is not an add of a 2442 // value and a 16-bit signed constant and both have a single use. 2443 int16_t imm = 0; 2444 if (N.getOpcode() == ISD::ADD && 2445 (!isIntS16Immediate(N.getOperand(1), imm) || 2446 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) { 2447 Base = N.getOperand(0); 2448 Index = N.getOperand(1); 2449 return true; 2450 } 2451 2452 // Otherwise, do it the hard way, using R0 as the base register. 2453 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 2454 N.getValueType()); 2455 Index = N; 2456 return true; 2457 } 2458 2459 /// Returns true if we should use a direct load into vector instruction 2460 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence. 2461 static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) { 2462 2463 // If there are any other uses other than scalar to vector, then we should 2464 // keep it as a scalar load -> direct move pattern to prevent multiple 2465 // loads. 2466 LoadSDNode *LD = dyn_cast<LoadSDNode>(N); 2467 if (!LD) 2468 return false; 2469 2470 EVT MemVT = LD->getMemoryVT(); 2471 if (!MemVT.isSimple()) 2472 return false; 2473 switch(MemVT.getSimpleVT().SimpleTy) { 2474 case MVT::i64: 2475 break; 2476 case MVT::i32: 2477 if (!ST.hasP8Vector()) 2478 return false; 2479 break; 2480 case MVT::i16: 2481 case MVT::i8: 2482 if (!ST.hasP9Vector()) 2483 return false; 2484 break; 2485 default: 2486 return false; 2487 } 2488 2489 SDValue LoadedVal(N, 0); 2490 if (!LoadedVal.hasOneUse()) 2491 return false; 2492 2493 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); 2494 UI != UE; ++UI) 2495 if (UI.getUse().get().getResNo() == 0 && 2496 UI->getOpcode() != ISD::SCALAR_TO_VECTOR) 2497 return false; 2498 2499 return true; 2500 } 2501 2502 /// getPreIndexedAddressParts - returns true by value, base pointer and 2503 /// offset pointer and addressing mode by reference if the node's address 2504 /// can be legally represented as pre-indexed load / store address. 2505 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 2506 SDValue &Offset, 2507 ISD::MemIndexedMode &AM, 2508 SelectionDAG &DAG) const { 2509 if (DisablePPCPreinc) return false; 2510 2511 bool isLoad = true; 2512 SDValue Ptr; 2513 EVT VT; 2514 unsigned Alignment; 2515 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 2516 Ptr = LD->getBasePtr(); 2517 VT = LD->getMemoryVT(); 2518 Alignment = LD->getAlignment(); 2519 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 2520 Ptr = ST->getBasePtr(); 2521 VT = ST->getMemoryVT(); 2522 Alignment = ST->getAlignment(); 2523 isLoad = false; 2524 } else 2525 return false; 2526 2527 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector 2528 // instructions because we can fold these into a more efficient instruction 2529 // instead, (such as LXSD). 2530 if (isLoad && usePartialVectorLoads(N, Subtarget)) { 2531 return false; 2532 } 2533 2534 // PowerPC doesn't have preinc load/store instructions for vectors (except 2535 // for QPX, which does have preinc r+r forms). 2536 if (VT.isVector()) { 2537 if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) { 2538 return false; 2539 } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) { 2540 AM = ISD::PRE_INC; 2541 return true; 2542 } 2543 } 2544 2545 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { 2546 // Common code will reject creating a pre-inc form if the base pointer 2547 // is a frame index, or if N is a store and the base pointer is either 2548 // the same as or a predecessor of the value being stored. Check for 2549 // those situations here, and try with swapped Base/Offset instead. 2550 bool Swap = false; 2551 2552 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) 2553 Swap = true; 2554 else if (!isLoad) { 2555 SDValue Val = cast<StoreSDNode>(N)->getValue(); 2556 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) 2557 Swap = true; 2558 } 2559 2560 if (Swap) 2561 std::swap(Base, Offset); 2562 2563 AM = ISD::PRE_INC; 2564 return true; 2565 } 2566 2567 // LDU/STU can only handle immediates that are a multiple of 4. 2568 if (VT != MVT::i64) { 2569 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0)) 2570 return false; 2571 } else { 2572 // LDU/STU need an address with at least 4-byte alignment. 2573 if (Alignment < 4) 2574 return false; 2575 2576 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4)) 2577 return false; 2578 } 2579 2580 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 2581 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 2582 // sext i32 to i64 when addr mode is r+i. 2583 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 2584 LD->getExtensionType() == ISD::SEXTLOAD && 2585 isa<ConstantSDNode>(Offset)) 2586 return false; 2587 } 2588 2589 AM = ISD::PRE_INC; 2590 return true; 2591 } 2592 2593 //===----------------------------------------------------------------------===// 2594 // LowerOperation implementation 2595 //===----------------------------------------------------------------------===// 2596 2597 /// Return true if we should reference labels using a PICBase, set the HiOpFlags 2598 /// and LoOpFlags to the target MO flags. 2599 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, 2600 unsigned &HiOpFlags, unsigned &LoOpFlags, 2601 const GlobalValue *GV = nullptr) { 2602 HiOpFlags = PPCII::MO_HA; 2603 LoOpFlags = PPCII::MO_LO; 2604 2605 // Don't use the pic base if not in PIC relocation model. 2606 if (IsPIC) { 2607 HiOpFlags |= PPCII::MO_PIC_FLAG; 2608 LoOpFlags |= PPCII::MO_PIC_FLAG; 2609 } 2610 2611 // If this is a reference to a global value that requires a non-lazy-ptr, make 2612 // sure that instruction lowering adds it. 2613 if (GV && Subtarget.hasLazyResolverStub(GV)) { 2614 HiOpFlags |= PPCII::MO_NLP_FLAG; 2615 LoOpFlags |= PPCII::MO_NLP_FLAG; 2616 2617 if (GV->hasHiddenVisibility()) { 2618 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 2619 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 2620 } 2621 } 2622 } 2623 2624 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 2625 SelectionDAG &DAG) { 2626 SDLoc DL(HiPart); 2627 EVT PtrVT = HiPart.getValueType(); 2628 SDValue Zero = DAG.getConstant(0, DL, PtrVT); 2629 2630 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 2631 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 2632 2633 // With PIC, the first instruction is actually "GR+hi(&G)". 2634 if (isPIC) 2635 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 2636 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 2637 2638 // Generate non-pic code that has direct accesses to the constant pool. 2639 // The address of the global is just (hi(&g)+lo(&g)). 2640 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 2641 } 2642 2643 static void setUsesTOCBasePtr(MachineFunction &MF) { 2644 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2645 FuncInfo->setUsesTOCBasePtr(); 2646 } 2647 2648 static void setUsesTOCBasePtr(SelectionDAG &DAG) { 2649 setUsesTOCBasePtr(DAG.getMachineFunction()); 2650 } 2651 2652 static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, 2653 SDValue GA) { 2654 EVT VT = Is64Bit ? MVT::i64 : MVT::i32; 2655 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : 2656 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); 2657 2658 SDValue Ops[] = { GA, Reg }; 2659 return DAG.getMemIntrinsicNode( 2660 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, 2661 MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, 2662 MachineMemOperand::MOLoad); 2663 } 2664 2665 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 2666 SelectionDAG &DAG) const { 2667 EVT PtrVT = Op.getValueType(); 2668 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2669 const Constant *C = CP->getConstVal(); 2670 2671 // 64-bit SVR4 ABI code is always position-independent. 2672 // The actual address of the GlobalValue is stored in the TOC. 2673 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2674 setUsesTOCBasePtr(DAG); 2675 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); 2676 return getTOCEntry(DAG, SDLoc(CP), true, GA); 2677 } 2678 2679 unsigned MOHiFlag, MOLoFlag; 2680 bool IsPIC = isPositionIndependent(); 2681 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2682 2683 if (IsPIC && Subtarget.isSVR4ABI()) { 2684 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 2685 PPCII::MO_PIC_FLAG); 2686 return getTOCEntry(DAG, SDLoc(CP), false, GA); 2687 } 2688 2689 SDValue CPIHi = 2690 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 2691 SDValue CPILo = 2692 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 2693 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG); 2694 } 2695 2696 // For 64-bit PowerPC, prefer the more compact relative encodings. 2697 // This trades 32 bits per jump table entry for one or two instructions 2698 // on the jump site. 2699 unsigned PPCTargetLowering::getJumpTableEncoding() const { 2700 if (isJumpTableRelative()) 2701 return MachineJumpTableInfo::EK_LabelDifference32; 2702 2703 return TargetLowering::getJumpTableEncoding(); 2704 } 2705 2706 bool PPCTargetLowering::isJumpTableRelative() const { 2707 if (Subtarget.isPPC64()) 2708 return true; 2709 return TargetLowering::isJumpTableRelative(); 2710 } 2711 2712 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table, 2713 SelectionDAG &DAG) const { 2714 if (!Subtarget.isPPC64()) 2715 return TargetLowering::getPICJumpTableRelocBase(Table, DAG); 2716 2717 switch (getTargetMachine().getCodeModel()) { 2718 case CodeModel::Small: 2719 case CodeModel::Medium: 2720 return TargetLowering::getPICJumpTableRelocBase(Table, DAG); 2721 default: 2722 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(), 2723 getPointerTy(DAG.getDataLayout())); 2724 } 2725 } 2726 2727 const MCExpr * 2728 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 2729 unsigned JTI, 2730 MCContext &Ctx) const { 2731 if (!Subtarget.isPPC64()) 2732 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); 2733 2734 switch (getTargetMachine().getCodeModel()) { 2735 case CodeModel::Small: 2736 case CodeModel::Medium: 2737 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); 2738 default: 2739 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); 2740 } 2741 } 2742 2743 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 2744 EVT PtrVT = Op.getValueType(); 2745 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 2746 2747 // 64-bit SVR4 ABI code is always position-independent. 2748 // The actual address of the GlobalValue is stored in the TOC. 2749 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2750 setUsesTOCBasePtr(DAG); 2751 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 2752 return getTOCEntry(DAG, SDLoc(JT), true, GA); 2753 } 2754 2755 unsigned MOHiFlag, MOLoFlag; 2756 bool IsPIC = isPositionIndependent(); 2757 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2758 2759 if (IsPIC && Subtarget.isSVR4ABI()) { 2760 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 2761 PPCII::MO_PIC_FLAG); 2762 return getTOCEntry(DAG, SDLoc(GA), false, GA); 2763 } 2764 2765 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 2766 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 2767 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG); 2768 } 2769 2770 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 2771 SelectionDAG &DAG) const { 2772 EVT PtrVT = Op.getValueType(); 2773 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op); 2774 const BlockAddress *BA = BASDN->getBlockAddress(); 2775 2776 // 64-bit SVR4 ABI code is always position-independent. 2777 // The actual BlockAddress is stored in the TOC. 2778 if (Subtarget.isSVR4ABI() && 2779 (Subtarget.isPPC64() || isPositionIndependent())) { 2780 if (Subtarget.isPPC64()) 2781 setUsesTOCBasePtr(DAG); 2782 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); 2783 return getTOCEntry(DAG, SDLoc(BASDN), Subtarget.isPPC64(), GA); 2784 } 2785 2786 unsigned MOHiFlag, MOLoFlag; 2787 bool IsPIC = isPositionIndependent(); 2788 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2789 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); 2790 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); 2791 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG); 2792 } 2793 2794 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 2795 SelectionDAG &DAG) const { 2796 // FIXME: TLS addresses currently use medium model code sequences, 2797 // which is the most useful form. Eventually support for small and 2798 // large models could be added if users need it, at the cost of 2799 // additional complexity. 2800 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2801 if (DAG.getTarget().useEmulatedTLS()) 2802 return LowerToTLSEmulatedModel(GA, DAG); 2803 2804 SDLoc dl(GA); 2805 const GlobalValue *GV = GA->getGlobal(); 2806 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2807 bool is64bit = Subtarget.isPPC64(); 2808 const Module *M = DAG.getMachineFunction().getFunction().getParent(); 2809 PICLevel::Level picLevel = M->getPICLevel(); 2810 2811 const TargetMachine &TM = getTargetMachine(); 2812 TLSModel::Model Model = TM.getTLSModel(GV); 2813 2814 if (Model == TLSModel::LocalExec) { 2815 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2816 PPCII::MO_TPREL_HA); 2817 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2818 PPCII::MO_TPREL_LO); 2819 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64) 2820 : DAG.getRegister(PPC::R2, MVT::i32); 2821 2822 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); 2823 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); 2824 } 2825 2826 if (Model == TLSModel::InitialExec) { 2827 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2828 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2829 PPCII::MO_TLS); 2830 SDValue GOTPtr; 2831 if (is64bit) { 2832 setUsesTOCBasePtr(DAG); 2833 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2834 GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, 2835 PtrVT, GOTReg, TGA); 2836 } else { 2837 if (!TM.isPositionIndependent()) 2838 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); 2839 else if (picLevel == PICLevel::SmallPIC) 2840 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 2841 else 2842 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 2843 } 2844 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, 2845 PtrVT, TGA, GOTPtr); 2846 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); 2847 } 2848 2849 if (Model == TLSModel::GeneralDynamic) { 2850 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2851 SDValue GOTPtr; 2852 if (is64bit) { 2853 setUsesTOCBasePtr(DAG); 2854 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2855 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, 2856 GOTReg, TGA); 2857 } else { 2858 if (picLevel == PICLevel::SmallPIC) 2859 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 2860 else 2861 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 2862 } 2863 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT, 2864 GOTPtr, TGA, TGA); 2865 } 2866 2867 if (Model == TLSModel::LocalDynamic) { 2868 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2869 SDValue GOTPtr; 2870 if (is64bit) { 2871 setUsesTOCBasePtr(DAG); 2872 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2873 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, 2874 GOTReg, TGA); 2875 } else { 2876 if (picLevel == PICLevel::SmallPIC) 2877 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 2878 else 2879 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 2880 } 2881 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl, 2882 PtrVT, GOTPtr, TGA, TGA); 2883 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, 2884 PtrVT, TLSAddr, TGA); 2885 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); 2886 } 2887 2888 llvm_unreachable("Unknown TLS model!"); 2889 } 2890 2891 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 2892 SelectionDAG &DAG) const { 2893 EVT PtrVT = Op.getValueType(); 2894 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 2895 SDLoc DL(GSDN); 2896 const GlobalValue *GV = GSDN->getGlobal(); 2897 2898 // 64-bit SVR4 ABI code is always position-independent. 2899 // The actual address of the GlobalValue is stored in the TOC. 2900 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2901 setUsesTOCBasePtr(DAG); 2902 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 2903 return getTOCEntry(DAG, DL, true, GA); 2904 } 2905 2906 unsigned MOHiFlag, MOLoFlag; 2907 bool IsPIC = isPositionIndependent(); 2908 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV); 2909 2910 if (IsPIC && Subtarget.isSVR4ABI()) { 2911 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 2912 GSDN->getOffset(), 2913 PPCII::MO_PIC_FLAG); 2914 return getTOCEntry(DAG, DL, false, GA); 2915 } 2916 2917 SDValue GAHi = 2918 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 2919 SDValue GALo = 2920 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 2921 2922 SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG); 2923 2924 // If the global reference is actually to a non-lazy-pointer, we have to do an 2925 // extra load to get the address of the global. 2926 if (MOHiFlag & PPCII::MO_NLP_FLAG) 2927 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 2928 return Ptr; 2929 } 2930 2931 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 2932 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 2933 SDLoc dl(Op); 2934 2935 if (Op.getValueType() == MVT::v2i64) { 2936 // When the operands themselves are v2i64 values, we need to do something 2937 // special because VSX has no underlying comparison operations for these. 2938 if (Op.getOperand(0).getValueType() == MVT::v2i64) { 2939 // Equality can be handled by casting to the legal type for Altivec 2940 // comparisons, everything else needs to be expanded. 2941 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 2942 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, 2943 DAG.getSetCC(dl, MVT::v4i32, 2944 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), 2945 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), 2946 CC)); 2947 } 2948 2949 return SDValue(); 2950 } 2951 2952 // We handle most of these in the usual way. 2953 return Op; 2954 } 2955 2956 // If we're comparing for equality to zero, expose the fact that this is 2957 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can 2958 // fold the new nodes. 2959 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG)) 2960 return V; 2961 2962 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 2963 // Leave comparisons against 0 and -1 alone for now, since they're usually 2964 // optimized. FIXME: revisit this when we can custom lower all setcc 2965 // optimizations. 2966 if (C->isAllOnesValue() || C->isNullValue()) 2967 return SDValue(); 2968 } 2969 2970 // If we have an integer seteq/setne, turn it into a compare against zero 2971 // by xor'ing the rhs with the lhs, which is faster than setting a 2972 // condition register, reading it back out, and masking the correct bit. The 2973 // normal approach here uses sub to do this instead of xor. Using xor exposes 2974 // the result to other bit-twiddling opportunities. 2975 EVT LHSVT = Op.getOperand(0).getValueType(); 2976 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 2977 EVT VT = Op.getValueType(); 2978 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 2979 Op.getOperand(1)); 2980 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC); 2981 } 2982 return SDValue(); 2983 } 2984 2985 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { 2986 SDNode *Node = Op.getNode(); 2987 EVT VT = Node->getValueType(0); 2988 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2989 SDValue InChain = Node->getOperand(0); 2990 SDValue VAListPtr = Node->getOperand(1); 2991 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 2992 SDLoc dl(Node); 2993 2994 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 2995 2996 // gpr_index 2997 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 2998 VAListPtr, MachinePointerInfo(SV), MVT::i8); 2999 InChain = GprIndex.getValue(1); 3000 3001 if (VT == MVT::i64) { 3002 // Check if GprIndex is even 3003 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 3004 DAG.getConstant(1, dl, MVT::i32)); 3005 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 3006 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE); 3007 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 3008 DAG.getConstant(1, dl, MVT::i32)); 3009 // Align GprIndex to be even if it isn't 3010 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 3011 GprIndex); 3012 } 3013 3014 // fpr index is 1 byte after gpr 3015 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 3016 DAG.getConstant(1, dl, MVT::i32)); 3017 3018 // fpr 3019 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 3020 FprPtr, MachinePointerInfo(SV), MVT::i8); 3021 InChain = FprIndex.getValue(1); 3022 3023 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 3024 DAG.getConstant(8, dl, MVT::i32)); 3025 3026 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 3027 DAG.getConstant(4, dl, MVT::i32)); 3028 3029 // areas 3030 SDValue OverflowArea = 3031 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo()); 3032 InChain = OverflowArea.getValue(1); 3033 3034 SDValue RegSaveArea = 3035 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo()); 3036 InChain = RegSaveArea.getValue(1); 3037 3038 // select overflow_area if index > 8 3039 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 3040 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT); 3041 3042 // adjustment constant gpr_index * 4/8 3043 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 3044 VT.isInteger() ? GprIndex : FprIndex, 3045 DAG.getConstant(VT.isInteger() ? 4 : 8, dl, 3046 MVT::i32)); 3047 3048 // OurReg = RegSaveArea + RegConstant 3049 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 3050 RegConstant); 3051 3052 // Floating types are 32 bytes into RegSaveArea 3053 if (VT.isFloatingPoint()) 3054 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 3055 DAG.getConstant(32, dl, MVT::i32)); 3056 3057 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 3058 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 3059 VT.isInteger() ? GprIndex : FprIndex, 3060 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl, 3061 MVT::i32)); 3062 3063 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 3064 VT.isInteger() ? VAListPtr : FprPtr, 3065 MachinePointerInfo(SV), MVT::i8); 3066 3067 // determine if we should load from reg_save_area or overflow_area 3068 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 3069 3070 // increase overflow_area by 4/8 if gpr/fpr > 8 3071 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 3072 DAG.getConstant(VT.isInteger() ? 4 : 8, 3073 dl, MVT::i32)); 3074 3075 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 3076 OverflowAreaPlusN); 3077 3078 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr, 3079 MachinePointerInfo(), MVT::i32); 3080 3081 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo()); 3082 } 3083 3084 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { 3085 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); 3086 3087 // We have to copy the entire va_list struct: 3088 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte 3089 return DAG.getMemcpy(Op.getOperand(0), Op, 3090 Op.getOperand(1), Op.getOperand(2), 3091 DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true, 3092 false, MachinePointerInfo(), MachinePointerInfo()); 3093 } 3094 3095 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 3096 SelectionDAG &DAG) const { 3097 return Op.getOperand(0); 3098 } 3099 3100 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 3101 SelectionDAG &DAG) const { 3102 SDValue Chain = Op.getOperand(0); 3103 SDValue Trmp = Op.getOperand(1); // trampoline 3104 SDValue FPtr = Op.getOperand(2); // nested function 3105 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 3106 SDLoc dl(Op); 3107 3108 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3109 bool isPPC64 = (PtrVT == MVT::i64); 3110 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); 3111 3112 TargetLowering::ArgListTy Args; 3113 TargetLowering::ArgListEntry Entry; 3114 3115 Entry.Ty = IntPtrTy; 3116 Entry.Node = Trmp; Args.push_back(Entry); 3117 3118 // TrampSize == (isPPC64 ? 48 : 40); 3119 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl, 3120 isPPC64 ? MVT::i64 : MVT::i32); 3121 Args.push_back(Entry); 3122 3123 Entry.Node = FPtr; Args.push_back(Entry); 3124 Entry.Node = Nest; Args.push_back(Entry); 3125 3126 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 3127 TargetLowering::CallLoweringInfo CLI(DAG); 3128 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee( 3129 CallingConv::C, Type::getVoidTy(*DAG.getContext()), 3130 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args)); 3131 3132 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 3133 return CallResult.second; 3134 } 3135 3136 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { 3137 MachineFunction &MF = DAG.getMachineFunction(); 3138 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3139 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3140 3141 SDLoc dl(Op); 3142 3143 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 3144 // vastart just stores the address of the VarArgsFrameIndex slot into the 3145 // memory location argument. 3146 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3147 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 3148 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 3149 MachinePointerInfo(SV)); 3150 } 3151 3152 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 3153 // We suppose the given va_list is already allocated. 3154 // 3155 // typedef struct { 3156 // char gpr; /* index into the array of 8 GPRs 3157 // * stored in the register save area 3158 // * gpr=0 corresponds to r3, 3159 // * gpr=1 to r4, etc. 3160 // */ 3161 // char fpr; /* index into the array of 8 FPRs 3162 // * stored in the register save area 3163 // * fpr=0 corresponds to f1, 3164 // * fpr=1 to f2, etc. 3165 // */ 3166 // char *overflow_arg_area; 3167 // /* location on stack that holds 3168 // * the next overflow argument 3169 // */ 3170 // char *reg_save_area; 3171 // /* where r3:r10 and f1:f8 (if saved) 3172 // * are stored 3173 // */ 3174 // } va_list[1]; 3175 3176 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); 3177 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); 3178 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 3179 PtrVT); 3180 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 3181 PtrVT); 3182 3183 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 3184 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT); 3185 3186 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 3187 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT); 3188 3189 uint64_t FPROffset = 1; 3190 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT); 3191 3192 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 3193 3194 // Store first byte : number of int regs 3195 SDValue firstStore = 3196 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1), 3197 MachinePointerInfo(SV), MVT::i8); 3198 uint64_t nextOffset = FPROffset; 3199 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 3200 ConstFPROffset); 3201 3202 // Store second byte : number of float regs 3203 SDValue secondStore = 3204 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 3205 MachinePointerInfo(SV, nextOffset), MVT::i8); 3206 nextOffset += StackOffset; 3207 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 3208 3209 // Store second word : arguments given on stack 3210 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 3211 MachinePointerInfo(SV, nextOffset)); 3212 nextOffset += FrameOffset; 3213 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 3214 3215 // Store third word : arguments given in registers 3216 return DAG.getStore(thirdStore, dl, FR, nextPtr, 3217 MachinePointerInfo(SV, nextOffset)); 3218 } 3219 3220 /// FPR - The set of FP registers that should be allocated for arguments, 3221 /// on Darwin. 3222 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, 3223 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, 3224 PPC::F11, PPC::F12, PPC::F13}; 3225 3226 /// QFPR - The set of QPX registers that should be allocated for arguments. 3227 static const MCPhysReg QFPR[] = { 3228 PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, 3229 PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13}; 3230 3231 /// CalculateStackSlotSize - Calculates the size reserved for this argument on 3232 /// the stack. 3233 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 3234 unsigned PtrByteSize) { 3235 unsigned ArgSize = ArgVT.getStoreSize(); 3236 if (Flags.isByVal()) 3237 ArgSize = Flags.getByValSize(); 3238 3239 // Round up to multiples of the pointer size, except for array members, 3240 // which are always packed. 3241 if (!Flags.isInConsecutiveRegs()) 3242 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3243 3244 return ArgSize; 3245 } 3246 3247 /// CalculateStackSlotAlignment - Calculates the alignment of this argument 3248 /// on the stack. 3249 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, 3250 ISD::ArgFlagsTy Flags, 3251 unsigned PtrByteSize) { 3252 unsigned Align = PtrByteSize; 3253 3254 // Altivec parameters are padded to a 16 byte boundary. 3255 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 3256 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 3257 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || 3258 ArgVT == MVT::v1i128 || ArgVT == MVT::f128) 3259 Align = 16; 3260 // QPX vector types stored in double-precision are padded to a 32 byte 3261 // boundary. 3262 else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1) 3263 Align = 32; 3264 3265 // ByVal parameters are aligned as requested. 3266 if (Flags.isByVal()) { 3267 unsigned BVAlign = Flags.getByValAlign(); 3268 if (BVAlign > PtrByteSize) { 3269 if (BVAlign % PtrByteSize != 0) 3270 llvm_unreachable( 3271 "ByVal alignment is not a multiple of the pointer size"); 3272 3273 Align = BVAlign; 3274 } 3275 } 3276 3277 // Array members are always packed to their original alignment. 3278 if (Flags.isInConsecutiveRegs()) { 3279 // If the array member was split into multiple registers, the first 3280 // needs to be aligned to the size of the full type. (Except for 3281 // ppcf128, which is only aligned as its f64 components.) 3282 if (Flags.isSplit() && OrigVT != MVT::ppcf128) 3283 Align = OrigVT.getStoreSize(); 3284 else 3285 Align = ArgVT.getStoreSize(); 3286 } 3287 3288 return Align; 3289 } 3290 3291 /// CalculateStackSlotUsed - Return whether this argument will use its 3292 /// stack slot (instead of being passed in registers). ArgOffset, 3293 /// AvailableFPRs, and AvailableVRs must hold the current argument 3294 /// position, and will be updated to account for this argument. 3295 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, 3296 ISD::ArgFlagsTy Flags, 3297 unsigned PtrByteSize, 3298 unsigned LinkageSize, 3299 unsigned ParamAreaSize, 3300 unsigned &ArgOffset, 3301 unsigned &AvailableFPRs, 3302 unsigned &AvailableVRs, bool HasQPX) { 3303 bool UseMemory = false; 3304 3305 // Respect alignment of argument on the stack. 3306 unsigned Align = 3307 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 3308 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 3309 // If there's no space left in the argument save area, we must 3310 // use memory (this check also catches zero-sized arguments). 3311 if (ArgOffset >= LinkageSize + ParamAreaSize) 3312 UseMemory = true; 3313 3314 // Allocate argument on the stack. 3315 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 3316 if (Flags.isInConsecutiveRegsLast()) 3317 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3318 // If we overran the argument save area, we must use memory 3319 // (this check catches arguments passed partially in memory) 3320 if (ArgOffset > LinkageSize + ParamAreaSize) 3321 UseMemory = true; 3322 3323 // However, if the argument is actually passed in an FPR or a VR, 3324 // we don't use memory after all. 3325 if (!Flags.isByVal()) { 3326 if (ArgVT == MVT::f32 || ArgVT == MVT::f64 || 3327 // QPX registers overlap with the scalar FP registers. 3328 (HasQPX && (ArgVT == MVT::v4f32 || 3329 ArgVT == MVT::v4f64 || 3330 ArgVT == MVT::v4i1))) 3331 if (AvailableFPRs > 0) { 3332 --AvailableFPRs; 3333 return false; 3334 } 3335 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 3336 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 3337 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || 3338 ArgVT == MVT::v1i128 || ArgVT == MVT::f128) 3339 if (AvailableVRs > 0) { 3340 --AvailableVRs; 3341 return false; 3342 } 3343 } 3344 3345 return UseMemory; 3346 } 3347 3348 /// EnsureStackAlignment - Round stack frame size up from NumBytes to 3349 /// ensure minimum alignment required for target. 3350 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, 3351 unsigned NumBytes) { 3352 unsigned TargetAlign = Lowering->getStackAlignment(); 3353 unsigned AlignMask = TargetAlign - 1; 3354 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 3355 return NumBytes; 3356 } 3357 3358 SDValue PPCTargetLowering::LowerFormalArguments( 3359 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 3360 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 3361 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3362 if (Subtarget.isSVR4ABI()) { 3363 if (Subtarget.isPPC64()) 3364 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, 3365 dl, DAG, InVals); 3366 else 3367 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, 3368 dl, DAG, InVals); 3369 } else { 3370 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 3371 dl, DAG, InVals); 3372 } 3373 } 3374 3375 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( 3376 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 3377 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 3378 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3379 3380 // 32-bit SVR4 ABI Stack Frame Layout: 3381 // +-----------------------------------+ 3382 // +--> | Back chain | 3383 // | +-----------------------------------+ 3384 // | | Floating-point register save area | 3385 // | +-----------------------------------+ 3386 // | | General register save area | 3387 // | +-----------------------------------+ 3388 // | | CR save word | 3389 // | +-----------------------------------+ 3390 // | | VRSAVE save word | 3391 // | +-----------------------------------+ 3392 // | | Alignment padding | 3393 // | +-----------------------------------+ 3394 // | | Vector register save area | 3395 // | +-----------------------------------+ 3396 // | | Local variable space | 3397 // | +-----------------------------------+ 3398 // | | Parameter list area | 3399 // | +-----------------------------------+ 3400 // | | LR save word | 3401 // | +-----------------------------------+ 3402 // SP--> +--- | Back chain | 3403 // +-----------------------------------+ 3404 // 3405 // Specifications: 3406 // System V Application Binary Interface PowerPC Processor Supplement 3407 // AltiVec Technology Programming Interface Manual 3408 3409 MachineFunction &MF = DAG.getMachineFunction(); 3410 MachineFrameInfo &MFI = MF.getFrameInfo(); 3411 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3412 3413 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3414 // Potential tail calls could cause overwriting of argument stack slots. 3415 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 3416 (CallConv == CallingConv::Fast)); 3417 unsigned PtrByteSize = 4; 3418 3419 // Assign locations to all of the incoming arguments. 3420 SmallVector<CCValAssign, 16> ArgLocs; 3421 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 3422 *DAG.getContext()); 3423 3424 // Reserve space for the linkage area on the stack. 3425 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3426 CCInfo.AllocateStack(LinkageSize, PtrByteSize); 3427 if (useSoftFloat() || hasSPE()) 3428 CCInfo.PreAnalyzeFormalArguments(Ins); 3429 3430 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); 3431 CCInfo.clearWasPPCF128(); 3432 3433 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 3434 CCValAssign &VA = ArgLocs[i]; 3435 3436 // Arguments stored in registers. 3437 if (VA.isRegLoc()) { 3438 const TargetRegisterClass *RC; 3439 EVT ValVT = VA.getValVT(); 3440 3441 switch (ValVT.getSimpleVT().SimpleTy) { 3442 default: 3443 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 3444 case MVT::i1: 3445 case MVT::i32: 3446 RC = &PPC::GPRCRegClass; 3447 break; 3448 case MVT::f32: 3449 if (Subtarget.hasP8Vector()) 3450 RC = &PPC::VSSRCRegClass; 3451 else if (Subtarget.hasSPE()) 3452 RC = &PPC::SPE4RCRegClass; 3453 else 3454 RC = &PPC::F4RCRegClass; 3455 break; 3456 case MVT::f64: 3457 if (Subtarget.hasVSX()) 3458 RC = &PPC::VSFRCRegClass; 3459 else if (Subtarget.hasSPE()) 3460 RC = &PPC::SPERCRegClass; 3461 else 3462 RC = &PPC::F8RCRegClass; 3463 break; 3464 case MVT::v16i8: 3465 case MVT::v8i16: 3466 case MVT::v4i32: 3467 RC = &PPC::VRRCRegClass; 3468 break; 3469 case MVT::v4f32: 3470 RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass; 3471 break; 3472 case MVT::v2f64: 3473 case MVT::v2i64: 3474 RC = &PPC::VRRCRegClass; 3475 break; 3476 case MVT::v4f64: 3477 RC = &PPC::QFRCRegClass; 3478 break; 3479 case MVT::v4i1: 3480 RC = &PPC::QBRCRegClass; 3481 break; 3482 } 3483 3484 // Transform the arguments stored in physical registers into virtual ones. 3485 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 3486 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, 3487 ValVT == MVT::i1 ? MVT::i32 : ValVT); 3488 3489 if (ValVT == MVT::i1) 3490 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); 3491 3492 InVals.push_back(ArgValue); 3493 } else { 3494 // Argument stored in memory. 3495 assert(VA.isMemLoc()); 3496 3497 // Get the extended size of the argument type in stack 3498 unsigned ArgSize = VA.getLocVT().getStoreSize(); 3499 // Get the actual size of the argument type 3500 unsigned ObjSize = VA.getValVT().getStoreSize(); 3501 unsigned ArgOffset = VA.getLocMemOffset(); 3502 // Stack objects in PPC32 are right justified. 3503 ArgOffset += ArgSize - ObjSize; 3504 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable); 3505 3506 // Create load nodes to retrieve arguments from the stack. 3507 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3508 InVals.push_back( 3509 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo())); 3510 } 3511 } 3512 3513 // Assign locations to all of the incoming aggregate by value arguments. 3514 // Aggregates passed by value are stored in the local variable space of the 3515 // caller's stack frame, right above the parameter list area. 3516 SmallVector<CCValAssign, 16> ByValArgLocs; 3517 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3518 ByValArgLocs, *DAG.getContext()); 3519 3520 // Reserve stack space for the allocations in CCInfo. 3521 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 3522 3523 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); 3524 3525 // Area that is at least reserved in the caller of this function. 3526 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 3527 MinReservedArea = std::max(MinReservedArea, LinkageSize); 3528 3529 // Set the size that is at least reserved in caller of this function. Tail 3530 // call optimized function's reserved stack space needs to be aligned so that 3531 // taking the difference between two stack areas will result in an aligned 3532 // stack. 3533 MinReservedArea = 3534 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3535 FuncInfo->setMinReservedArea(MinReservedArea); 3536 3537 SmallVector<SDValue, 8> MemOps; 3538 3539 // If the function takes variable number of arguments, make a frame index for 3540 // the start of the first vararg value... for expansion of llvm.va_start. 3541 if (isVarArg) { 3542 static const MCPhysReg GPArgRegs[] = { 3543 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 3544 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 3545 }; 3546 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 3547 3548 static const MCPhysReg FPArgRegs[] = { 3549 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 3550 PPC::F8 3551 }; 3552 unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 3553 3554 if (useSoftFloat() || hasSPE()) 3555 NumFPArgRegs = 0; 3556 3557 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); 3558 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs)); 3559 3560 // Make room for NumGPArgRegs and NumFPArgRegs. 3561 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 3562 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; 3563 3564 FuncInfo->setVarArgsStackOffset( 3565 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8, 3566 CCInfo.getNextStackOffset(), true)); 3567 3568 FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false)); 3569 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3570 3571 // The fixed integer arguments of a variadic function are stored to the 3572 // VarArgsFrameIndex on the stack so that they may be loaded by 3573 // dereferencing the result of va_next. 3574 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 3575 // Get an existing live-in vreg, or add a new one. 3576 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 3577 if (!VReg) 3578 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 3579 3580 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3581 SDValue Store = 3582 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3583 MemOps.push_back(Store); 3584 // Increment the address by four for the next argument to store 3585 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); 3586 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3587 } 3588 3589 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 3590 // is set. 3591 // The double arguments are stored to the VarArgsFrameIndex 3592 // on the stack. 3593 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 3594 // Get an existing live-in vreg, or add a new one. 3595 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 3596 if (!VReg) 3597 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 3598 3599 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 3600 SDValue Store = 3601 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3602 MemOps.push_back(Store); 3603 // Increment the address by eight for the next argument to store 3604 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl, 3605 PtrVT); 3606 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3607 } 3608 } 3609 3610 if (!MemOps.empty()) 3611 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3612 3613 return Chain; 3614 } 3615 3616 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3617 // value to MVT::i64 and then truncate to the correct register size. 3618 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, 3619 EVT ObjectVT, SelectionDAG &DAG, 3620 SDValue ArgVal, 3621 const SDLoc &dl) const { 3622 if (Flags.isSExt()) 3623 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 3624 DAG.getValueType(ObjectVT)); 3625 else if (Flags.isZExt()) 3626 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 3627 DAG.getValueType(ObjectVT)); 3628 3629 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); 3630 } 3631 3632 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( 3633 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 3634 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 3635 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3636 // TODO: add description of PPC stack frame format, or at least some docs. 3637 // 3638 bool isELFv2ABI = Subtarget.isELFv2ABI(); 3639 bool isLittleEndian = Subtarget.isLittleEndian(); 3640 MachineFunction &MF = DAG.getMachineFunction(); 3641 MachineFrameInfo &MFI = MF.getFrameInfo(); 3642 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3643 3644 assert(!(CallConv == CallingConv::Fast && isVarArg) && 3645 "fastcc not supported on varargs functions"); 3646 3647 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3648 // Potential tail calls could cause overwriting of argument stack slots. 3649 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 3650 (CallConv == CallingConv::Fast)); 3651 unsigned PtrByteSize = 8; 3652 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3653 3654 static const MCPhysReg GPR[] = { 3655 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3656 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3657 }; 3658 static const MCPhysReg VR[] = { 3659 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3660 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3661 }; 3662 3663 const unsigned Num_GPR_Regs = array_lengthof(GPR); 3664 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13; 3665 const unsigned Num_VR_Regs = array_lengthof(VR); 3666 const unsigned Num_QFPR_Regs = Num_FPR_Regs; 3667 3668 // Do a first pass over the arguments to determine whether the ABI 3669 // guarantees that our caller has allocated the parameter save area 3670 // on its stack frame. In the ELFv1 ABI, this is always the case; 3671 // in the ELFv2 ABI, it is true if this is a vararg function or if 3672 // any parameter is located in a stack slot. 3673 3674 bool HasParameterArea = !isELFv2ABI || isVarArg; 3675 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; 3676 unsigned NumBytes = LinkageSize; 3677 unsigned AvailableFPRs = Num_FPR_Regs; 3678 unsigned AvailableVRs = Num_VR_Regs; 3679 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 3680 if (Ins[i].Flags.isNest()) 3681 continue; 3682 3683 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, 3684 PtrByteSize, LinkageSize, ParamAreaSize, 3685 NumBytes, AvailableFPRs, AvailableVRs, 3686 Subtarget.hasQPX())) 3687 HasParameterArea = true; 3688 } 3689 3690 // Add DAG nodes to load the arguments or copy them out of registers. On 3691 // entry to a function on PPC, the arguments start after the linkage area, 3692 // although the first ones are often in registers. 3693 3694 unsigned ArgOffset = LinkageSize; 3695 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3696 unsigned &QFPR_idx = FPR_idx; 3697 SmallVector<SDValue, 8> MemOps; 3698 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin(); 3699 unsigned CurArgIdx = 0; 3700 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 3701 SDValue ArgVal; 3702 bool needsLoad = false; 3703 EVT ObjectVT = Ins[ArgNo].VT; 3704 EVT OrigVT = Ins[ArgNo].ArgVT; 3705 unsigned ObjSize = ObjectVT.getStoreSize(); 3706 unsigned ArgSize = ObjSize; 3707 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3708 if (Ins[ArgNo].isOrigArg()) { 3709 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 3710 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 3711 } 3712 // We re-align the argument offset for each argument, except when using the 3713 // fast calling convention, when we need to make sure we do that only when 3714 // we'll actually use a stack slot. 3715 unsigned CurArgOffset, Align; 3716 auto ComputeArgOffset = [&]() { 3717 /* Respect alignment of argument on the stack. */ 3718 Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); 3719 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 3720 CurArgOffset = ArgOffset; 3721 }; 3722 3723 if (CallConv != CallingConv::Fast) { 3724 ComputeArgOffset(); 3725 3726 /* Compute GPR index associated with argument offset. */ 3727 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 3728 GPR_idx = std::min(GPR_idx, Num_GPR_Regs); 3729 } 3730 3731 // FIXME the codegen can be much improved in some cases. 3732 // We do not have to keep everything in memory. 3733 if (Flags.isByVal()) { 3734 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 3735 3736 if (CallConv == CallingConv::Fast) 3737 ComputeArgOffset(); 3738 3739 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 3740 ObjSize = Flags.getByValSize(); 3741 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3742 // Empty aggregate parameters do not take up registers. Examples: 3743 // struct { } a; 3744 // union { } b; 3745 // int c[0]; 3746 // etc. However, we have to provide a place-holder in InVals, so 3747 // pretend we have an 8-byte item at the current address for that 3748 // purpose. 3749 if (!ObjSize) { 3750 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true); 3751 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3752 InVals.push_back(FIN); 3753 continue; 3754 } 3755 3756 // Create a stack object covering all stack doublewords occupied 3757 // by the argument. If the argument is (fully or partially) on 3758 // the stack, or if the argument is fully in registers but the 3759 // caller has allocated the parameter save anyway, we can refer 3760 // directly to the caller's stack frame. Otherwise, create a 3761 // local copy in our own frame. 3762 int FI; 3763 if (HasParameterArea || 3764 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize) 3765 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true); 3766 else 3767 FI = MFI.CreateStackObject(ArgSize, Align, false); 3768 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3769 3770 // Handle aggregates smaller than 8 bytes. 3771 if (ObjSize < PtrByteSize) { 3772 // The value of the object is its address, which differs from the 3773 // address of the enclosing doubleword on big-endian systems. 3774 SDValue Arg = FIN; 3775 if (!isLittleEndian) { 3776 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT); 3777 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); 3778 } 3779 InVals.push_back(Arg); 3780 3781 if (GPR_idx != Num_GPR_Regs) { 3782 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3783 FuncInfo->addLiveInAttr(VReg, Flags); 3784 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3785 SDValue Store; 3786 3787 if (ObjSize==1 || ObjSize==2 || ObjSize==4) { 3788 EVT ObjType = (ObjSize == 1 ? MVT::i8 : 3789 (ObjSize == 2 ? MVT::i16 : MVT::i32)); 3790 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, 3791 MachinePointerInfo(&*FuncArg), ObjType); 3792 } else { 3793 // For sizes that don't fit a truncating store (3, 5, 6, 7), 3794 // store the whole register as-is to the parameter save area 3795 // slot. 3796 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3797 MachinePointerInfo(&*FuncArg)); 3798 } 3799 3800 MemOps.push_back(Store); 3801 } 3802 // Whether we copied from a register or not, advance the offset 3803 // into the parameter save area by a full doubleword. 3804 ArgOffset += PtrByteSize; 3805 continue; 3806 } 3807 3808 // The value of the object is its address, which is the address of 3809 // its first stack doubleword. 3810 InVals.push_back(FIN); 3811 3812 // Store whatever pieces of the object are in registers to memory. 3813 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3814 if (GPR_idx == Num_GPR_Regs) 3815 break; 3816 3817 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3818 FuncInfo->addLiveInAttr(VReg, Flags); 3819 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3820 SDValue Addr = FIN; 3821 if (j) { 3822 SDValue Off = DAG.getConstant(j, dl, PtrVT); 3823 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); 3824 } 3825 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, 3826 MachinePointerInfo(&*FuncArg, j)); 3827 MemOps.push_back(Store); 3828 ++GPR_idx; 3829 } 3830 ArgOffset += ArgSize; 3831 continue; 3832 } 3833 3834 switch (ObjectVT.getSimpleVT().SimpleTy) { 3835 default: llvm_unreachable("Unhandled argument type!"); 3836 case MVT::i1: 3837 case MVT::i32: 3838 case MVT::i64: 3839 if (Flags.isNest()) { 3840 // The 'nest' parameter, if any, is passed in R11. 3841 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass); 3842 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3843 3844 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3845 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3846 3847 break; 3848 } 3849 3850 // These can be scalar arguments or elements of an integer array type 3851 // passed directly. Clang may use those instead of "byval" aggregate 3852 // types to avoid forcing arguments to memory unnecessarily. 3853 if (GPR_idx != Num_GPR_Regs) { 3854 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3855 FuncInfo->addLiveInAttr(VReg, Flags); 3856 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3857 3858 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3859 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3860 // value to MVT::i64 and then truncate to the correct register size. 3861 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3862 } else { 3863 if (CallConv == CallingConv::Fast) 3864 ComputeArgOffset(); 3865 3866 needsLoad = true; 3867 ArgSize = PtrByteSize; 3868 } 3869 if (CallConv != CallingConv::Fast || needsLoad) 3870 ArgOffset += 8; 3871 break; 3872 3873 case MVT::f32: 3874 case MVT::f64: 3875 // These can be scalar arguments or elements of a float array type 3876 // passed directly. The latter are used to implement ELFv2 homogenous 3877 // float aggregates. 3878 if (FPR_idx != Num_FPR_Regs) { 3879 unsigned VReg; 3880 3881 if (ObjectVT == MVT::f32) 3882 VReg = MF.addLiveIn(FPR[FPR_idx], 3883 Subtarget.hasP8Vector() 3884 ? &PPC::VSSRCRegClass 3885 : &PPC::F4RCRegClass); 3886 else 3887 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() 3888 ? &PPC::VSFRCRegClass 3889 : &PPC::F8RCRegClass); 3890 3891 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3892 ++FPR_idx; 3893 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) { 3894 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 3895 // once we support fp <-> gpr moves. 3896 3897 // This can only ever happen in the presence of f32 array types, 3898 // since otherwise we never run out of FPRs before running out 3899 // of GPRs. 3900 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3901 FuncInfo->addLiveInAttr(VReg, Flags); 3902 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3903 3904 if (ObjectVT == MVT::f32) { 3905 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0)) 3906 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, 3907 DAG.getConstant(32, dl, MVT::i32)); 3908 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); 3909 } 3910 3911 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); 3912 } else { 3913 if (CallConv == CallingConv::Fast) 3914 ComputeArgOffset(); 3915 3916 needsLoad = true; 3917 } 3918 3919 // When passing an array of floats, the array occupies consecutive 3920 // space in the argument area; only round up to the next doubleword 3921 // at the end of the array. Otherwise, each float takes 8 bytes. 3922 if (CallConv != CallingConv::Fast || needsLoad) { 3923 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; 3924 ArgOffset += ArgSize; 3925 if (Flags.isInConsecutiveRegsLast()) 3926 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3927 } 3928 break; 3929 case MVT::v4f32: 3930 case MVT::v4i32: 3931 case MVT::v8i16: 3932 case MVT::v16i8: 3933 case MVT::v2f64: 3934 case MVT::v2i64: 3935 case MVT::v1i128: 3936 case MVT::f128: 3937 if (!Subtarget.hasQPX()) { 3938 // These can be scalar arguments or elements of a vector array type 3939 // passed directly. The latter are used to implement ELFv2 homogenous 3940 // vector aggregates. 3941 if (VR_idx != Num_VR_Regs) { 3942 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3943 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3944 ++VR_idx; 3945 } else { 3946 if (CallConv == CallingConv::Fast) 3947 ComputeArgOffset(); 3948 needsLoad = true; 3949 } 3950 if (CallConv != CallingConv::Fast || needsLoad) 3951 ArgOffset += 16; 3952 break; 3953 } // not QPX 3954 3955 assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && 3956 "Invalid QPX parameter type"); 3957 LLVM_FALLTHROUGH; 3958 3959 case MVT::v4f64: 3960 case MVT::v4i1: 3961 // QPX vectors are treated like their scalar floating-point subregisters 3962 // (except that they're larger). 3963 unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32; 3964 if (QFPR_idx != Num_QFPR_Regs) { 3965 const TargetRegisterClass *RC; 3966 switch (ObjectVT.getSimpleVT().SimpleTy) { 3967 case MVT::v4f64: RC = &PPC::QFRCRegClass; break; 3968 case MVT::v4f32: RC = &PPC::QSRCRegClass; break; 3969 default: RC = &PPC::QBRCRegClass; break; 3970 } 3971 3972 unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC); 3973 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3974 ++QFPR_idx; 3975 } else { 3976 if (CallConv == CallingConv::Fast) 3977 ComputeArgOffset(); 3978 needsLoad = true; 3979 } 3980 if (CallConv != CallingConv::Fast || needsLoad) 3981 ArgOffset += Sz; 3982 break; 3983 } 3984 3985 // We need to load the argument to a virtual register if we determined 3986 // above that we ran out of physical registers of the appropriate type. 3987 if (needsLoad) { 3988 if (ObjSize < ArgSize && !isLittleEndian) 3989 CurArgOffset += ArgSize - ObjSize; 3990 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable); 3991 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3992 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); 3993 } 3994 3995 InVals.push_back(ArgVal); 3996 } 3997 3998 // Area that is at least reserved in the caller of this function. 3999 unsigned MinReservedArea; 4000 if (HasParameterArea) 4001 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); 4002 else 4003 MinReservedArea = LinkageSize; 4004 4005 // Set the size that is at least reserved in caller of this function. Tail 4006 // call optimized functions' reserved stack space needs to be aligned so that 4007 // taking the difference between two stack areas will result in an aligned 4008 // stack. 4009 MinReservedArea = 4010 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 4011 FuncInfo->setMinReservedArea(MinReservedArea); 4012 4013 // If the function takes variable number of arguments, make a frame index for 4014 // the start of the first vararg value... for expansion of llvm.va_start. 4015 if (isVarArg) { 4016 int Depth = ArgOffset; 4017 4018 FuncInfo->setVarArgsFrameIndex( 4019 MFI.CreateFixedObject(PtrByteSize, Depth, true)); 4020 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 4021 4022 // If this function is vararg, store any remaining integer argument regs 4023 // to their spots on the stack so that they may be loaded by dereferencing 4024 // the result of va_next. 4025 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 4026 GPR_idx < Num_GPR_Regs; ++GPR_idx) { 4027 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 4028 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 4029 SDValue Store = 4030 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 4031 MemOps.push_back(Store); 4032 // Increment the address by four for the next argument to store 4033 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT); 4034 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 4035 } 4036 } 4037 4038 if (!MemOps.empty()) 4039 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 4040 4041 return Chain; 4042 } 4043 4044 SDValue PPCTargetLowering::LowerFormalArguments_Darwin( 4045 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 4046 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 4047 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 4048 // TODO: add description of PPC stack frame format, or at least some docs. 4049 // 4050 MachineFunction &MF = DAG.getMachineFunction(); 4051 MachineFrameInfo &MFI = MF.getFrameInfo(); 4052 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 4053 4054 EVT PtrVT = getPointerTy(MF.getDataLayout()); 4055 bool isPPC64 = PtrVT == MVT::i64; 4056 // Potential tail calls could cause overwriting of argument stack slots. 4057 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 4058 (CallConv == CallingConv::Fast)); 4059 unsigned PtrByteSize = isPPC64 ? 8 : 4; 4060 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 4061 unsigned ArgOffset = LinkageSize; 4062 // Area that is at least reserved in caller of this function. 4063 unsigned MinReservedArea = ArgOffset; 4064 4065 static const MCPhysReg GPR_32[] = { // 32-bit registers. 4066 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 4067 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 4068 }; 4069 static const MCPhysReg GPR_64[] = { // 64-bit registers. 4070 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4071 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4072 }; 4073 static const MCPhysReg VR[] = { 4074 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4075 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4076 }; 4077 4078 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 4079 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13; 4080 const unsigned Num_VR_Regs = array_lengthof( VR); 4081 4082 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 4083 4084 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 4085 4086 // In 32-bit non-varargs functions, the stack space for vectors is after the 4087 // stack space for non-vectors. We do not use this space unless we have 4088 // too many vectors to fit in registers, something that only occurs in 4089 // constructed examples:), but we have to walk the arglist to figure 4090 // that out...for the pathological case, compute VecArgOffset as the 4091 // start of the vector parameter area. Computing VecArgOffset is the 4092 // entire point of the following loop. 4093 unsigned VecArgOffset = ArgOffset; 4094 if (!isVarArg && !isPPC64) { 4095 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 4096 ++ArgNo) { 4097 EVT ObjectVT = Ins[ArgNo].VT; 4098 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 4099 4100 if (Flags.isByVal()) { 4101 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 4102 unsigned ObjSize = Flags.getByValSize(); 4103 unsigned ArgSize = 4104 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 4105 VecArgOffset += ArgSize; 4106 continue; 4107 } 4108 4109 switch(ObjectVT.getSimpleVT().SimpleTy) { 4110 default: llvm_unreachable("Unhandled argument type!"); 4111 case MVT::i1: 4112 case MVT::i32: 4113 case MVT::f32: 4114 VecArgOffset += 4; 4115 break; 4116 case MVT::i64: // PPC64 4117 case MVT::f64: 4118 // FIXME: We are guaranteed to be !isPPC64 at this point. 4119 // Does MVT::i64 apply? 4120 VecArgOffset += 8; 4121 break; 4122 case MVT::v4f32: 4123 case MVT::v4i32: 4124 case MVT::v8i16: 4125 case MVT::v16i8: 4126 // Nothing to do, we're only looking at Nonvector args here. 4127 break; 4128 } 4129 } 4130 } 4131 // We've found where the vector parameter area in memory is. Skip the 4132 // first 12 parameters; these don't use that memory. 4133 VecArgOffset = ((VecArgOffset+15)/16)*16; 4134 VecArgOffset += 12*16; 4135 4136 // Add DAG nodes to load the arguments or copy them out of registers. On 4137 // entry to a function on PPC, the arguments start after the linkage area, 4138 // although the first ones are often in registers. 4139 4140 SmallVector<SDValue, 8> MemOps; 4141 unsigned nAltivecParamsAtEnd = 0; 4142 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin(); 4143 unsigned CurArgIdx = 0; 4144 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 4145 SDValue ArgVal; 4146 bool needsLoad = false; 4147 EVT ObjectVT = Ins[ArgNo].VT; 4148 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 4149 unsigned ArgSize = ObjSize; 4150 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 4151 if (Ins[ArgNo].isOrigArg()) { 4152 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 4153 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 4154 } 4155 unsigned CurArgOffset = ArgOffset; 4156 4157 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 4158 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 4159 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 4160 if (isVarArg || isPPC64) { 4161 MinReservedArea = ((MinReservedArea+15)/16)*16; 4162 MinReservedArea += CalculateStackSlotSize(ObjectVT, 4163 Flags, 4164 PtrByteSize); 4165 } else nAltivecParamsAtEnd++; 4166 } else 4167 // Calculate min reserved area. 4168 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 4169 Flags, 4170 PtrByteSize); 4171 4172 // FIXME the codegen can be much improved in some cases. 4173 // We do not have to keep everything in memory. 4174 if (Flags.isByVal()) { 4175 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 4176 4177 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 4178 ObjSize = Flags.getByValSize(); 4179 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 4180 // Objects of size 1 and 2 are right justified, everything else is 4181 // left justified. This means the memory address is adjusted forwards. 4182 if (ObjSize==1 || ObjSize==2) { 4183 CurArgOffset = CurArgOffset + (4 - ObjSize); 4184 } 4185 // The value of the object is its address. 4186 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true); 4187 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 4188 InVals.push_back(FIN); 4189 if (ObjSize==1 || ObjSize==2) { 4190 if (GPR_idx != Num_GPR_Regs) { 4191 unsigned VReg; 4192 if (isPPC64) 4193 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 4194 else 4195 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 4196 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 4197 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; 4198 SDValue Store = 4199 DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 4200 MachinePointerInfo(&*FuncArg), ObjType); 4201 MemOps.push_back(Store); 4202 ++GPR_idx; 4203 } 4204 4205 ArgOffset += PtrByteSize; 4206 4207 continue; 4208 } 4209 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 4210 // Store whatever pieces of the object are in registers 4211 // to memory. ArgOffset will be the address of the beginning 4212 // of the object. 4213 if (GPR_idx != Num_GPR_Regs) { 4214 unsigned VReg; 4215 if (isPPC64) 4216 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 4217 else 4218 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 4219 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true); 4220 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 4221 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 4222 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 4223 MachinePointerInfo(&*FuncArg, j)); 4224 MemOps.push_back(Store); 4225 ++GPR_idx; 4226 ArgOffset += PtrByteSize; 4227 } else { 4228 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 4229 break; 4230 } 4231 } 4232 continue; 4233 } 4234 4235 switch (ObjectVT.getSimpleVT().SimpleTy) { 4236 default: llvm_unreachable("Unhandled argument type!"); 4237 case MVT::i1: 4238 case MVT::i32: 4239 if (!isPPC64) { 4240 if (GPR_idx != Num_GPR_Regs) { 4241 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 4242 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 4243 4244 if (ObjectVT == MVT::i1) 4245 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); 4246 4247 ++GPR_idx; 4248 } else { 4249 needsLoad = true; 4250 ArgSize = PtrByteSize; 4251 } 4252 // All int arguments reserve stack space in the Darwin ABI. 4253 ArgOffset += PtrByteSize; 4254 break; 4255 } 4256 LLVM_FALLTHROUGH; 4257 case MVT::i64: // PPC64 4258 if (GPR_idx != Num_GPR_Regs) { 4259 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 4260 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 4261 4262 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 4263 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 4264 // value to MVT::i64 and then truncate to the correct register size. 4265 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 4266 4267 ++GPR_idx; 4268 } else { 4269 needsLoad = true; 4270 ArgSize = PtrByteSize; 4271 } 4272 // All int arguments reserve stack space in the Darwin ABI. 4273 ArgOffset += 8; 4274 break; 4275 4276 case MVT::f32: 4277 case MVT::f64: 4278 // Every 4 bytes of argument space consumes one of the GPRs available for 4279 // argument passing. 4280 if (GPR_idx != Num_GPR_Regs) { 4281 ++GPR_idx; 4282 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 4283 ++GPR_idx; 4284 } 4285 if (FPR_idx != Num_FPR_Regs) { 4286 unsigned VReg; 4287 4288 if (ObjectVT == MVT::f32) 4289 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 4290 else 4291 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 4292 4293 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 4294 ++FPR_idx; 4295 } else { 4296 needsLoad = true; 4297 } 4298 4299 // All FP arguments reserve stack space in the Darwin ABI. 4300 ArgOffset += isPPC64 ? 8 : ObjSize; 4301 break; 4302 case MVT::v4f32: 4303 case MVT::v4i32: 4304 case MVT::v8i16: 4305 case MVT::v16i8: 4306 // Note that vector arguments in registers don't reserve stack space, 4307 // except in varargs functions. 4308 if (VR_idx != Num_VR_Regs) { 4309 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 4310 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 4311 if (isVarArg) { 4312 while ((ArgOffset % 16) != 0) { 4313 ArgOffset += PtrByteSize; 4314 if (GPR_idx != Num_GPR_Regs) 4315 GPR_idx++; 4316 } 4317 ArgOffset += 16; 4318 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 4319 } 4320 ++VR_idx; 4321 } else { 4322 if (!isVarArg && !isPPC64) { 4323 // Vectors go after all the nonvectors. 4324 CurArgOffset = VecArgOffset; 4325 VecArgOffset += 16; 4326 } else { 4327 // Vectors are aligned. 4328 ArgOffset = ((ArgOffset+15)/16)*16; 4329 CurArgOffset = ArgOffset; 4330 ArgOffset += 16; 4331 } 4332 needsLoad = true; 4333 } 4334 break; 4335 } 4336 4337 // We need to load the argument to a virtual register if we determined above 4338 // that we ran out of physical registers of the appropriate type. 4339 if (needsLoad) { 4340 int FI = MFI.CreateFixedObject(ObjSize, 4341 CurArgOffset + (ArgSize - ObjSize), 4342 isImmutable); 4343 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 4344 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); 4345 } 4346 4347 InVals.push_back(ArgVal); 4348 } 4349 4350 // Allow for Altivec parameters at the end, if needed. 4351 if (nAltivecParamsAtEnd) { 4352 MinReservedArea = ((MinReservedArea+15)/16)*16; 4353 MinReservedArea += 16*nAltivecParamsAtEnd; 4354 } 4355 4356 // Area that is at least reserved in the caller of this function. 4357 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); 4358 4359 // Set the size that is at least reserved in caller of this function. Tail 4360 // call optimized functions' reserved stack space needs to be aligned so that 4361 // taking the difference between two stack areas will result in an aligned 4362 // stack. 4363 MinReservedArea = 4364 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 4365 FuncInfo->setMinReservedArea(MinReservedArea); 4366 4367 // If the function takes variable number of arguments, make a frame index for 4368 // the start of the first vararg value... for expansion of llvm.va_start. 4369 if (isVarArg) { 4370 int Depth = ArgOffset; 4371 4372 FuncInfo->setVarArgsFrameIndex( 4373 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8, 4374 Depth, true)); 4375 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 4376 4377 // If this function is vararg, store any remaining integer argument regs 4378 // to their spots on the stack so that they may be loaded by dereferencing 4379 // the result of va_next. 4380 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 4381 unsigned VReg; 4382 4383 if (isPPC64) 4384 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 4385 else 4386 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 4387 4388 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 4389 SDValue Store = 4390 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 4391 MemOps.push_back(Store); 4392 // Increment the address by four for the next argument to store 4393 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); 4394 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 4395 } 4396 } 4397 4398 if (!MemOps.empty()) 4399 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 4400 4401 return Chain; 4402 } 4403 4404 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 4405 /// adjusted to accommodate the arguments for the tailcall. 4406 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 4407 unsigned ParamSize) { 4408 4409 if (!isTailCall) return 0; 4410 4411 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 4412 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 4413 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 4414 // Remember only if the new adjustment is bigger. 4415 if (SPDiff < FI->getTailCallSPDelta()) 4416 FI->setTailCallSPDelta(SPDiff); 4417 4418 return SPDiff; 4419 } 4420 4421 static bool isFunctionGlobalAddress(SDValue Callee); 4422 4423 static bool 4424 callsShareTOCBase(const Function *Caller, SDValue Callee, 4425 const TargetMachine &TM) { 4426 // Need a GlobalValue to determine if a Caller and Callee share the same 4427 // TOCBase. 4428 const GlobalValue *GV = nullptr; 4429 4430 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 4431 GV = G->getGlobal(); 4432 } else if (MCSymbolSDNode *M = dyn_cast<MCSymbolSDNode>(Callee)) { 4433 // On AIX only, we replace GlobalAddressSDNode with MCSymbolSDNode for 4434 // the callee of a direct function call. The MCSymbolSDNode contains the 4435 // MCSymbol for the funtion entry point. 4436 const auto *S = cast<MCSymbolXCOFF>(M->getMCSymbol()); 4437 GV = S->getGlobalValue(); 4438 } 4439 4440 // If we failed to get a GlobalValue, then pessimistically assume they do not 4441 // share a TOCBase. 4442 if (!GV) 4443 return false; 4444 4445 // The medium and large code models are expected to provide a sufficiently 4446 // large TOC to provide all data addressing needs of a module with a 4447 // single TOC. Since each module will be addressed with a single TOC then we 4448 // only need to check that caller and callee don't cross dso boundaries. 4449 if (CodeModel::Medium == TM.getCodeModel() || 4450 CodeModel::Large == TM.getCodeModel()) 4451 return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV); 4452 4453 // Otherwise we need to ensure callee and caller are in the same section, 4454 // since the linker may allocate multiple TOCs, and we don't know which 4455 // sections will belong to the same TOC base. 4456 4457 if (!GV->isStrongDefinitionForLinker()) 4458 return false; 4459 4460 // Any explicitly-specified sections and section prefixes must also match. 4461 // Also, if we're using -ffunction-sections, then each function is always in 4462 // a different section (the same is true for COMDAT functions). 4463 if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() || 4464 GV->getSection() != Caller->getSection()) 4465 return false; 4466 if (const auto *F = dyn_cast<Function>(GV)) { 4467 if (F->getSectionPrefix() != Caller->getSectionPrefix()) 4468 return false; 4469 } 4470 4471 // If the callee might be interposed, then we can't assume the ultimate call 4472 // target will be in the same section. Even in cases where we can assume that 4473 // interposition won't happen, in any case where the linker might insert a 4474 // stub to allow for interposition, we must generate code as though 4475 // interposition might occur. To understand why this matters, consider a 4476 // situation where: a -> b -> c where the arrows indicate calls. b and c are 4477 // in the same section, but a is in a different module (i.e. has a different 4478 // TOC base pointer). If the linker allows for interposition between b and c, 4479 // then it will generate a stub for the call edge between b and c which will 4480 // save the TOC pointer into the designated stack slot allocated by b. If we 4481 // return true here, and therefore allow a tail call between b and c, that 4482 // stack slot won't exist and the b -> c stub will end up saving b'c TOC base 4483 // pointer into the stack slot allocated by a (where the a -> b stub saved 4484 // a's TOC base pointer). If we're not considering a tail call, but rather, 4485 // whether a nop is needed after the call instruction in b, because the linker 4486 // will insert a stub, it might complain about a missing nop if we omit it 4487 // (although many don't complain in this case). 4488 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV)) 4489 return false; 4490 4491 return true; 4492 } 4493 4494 static bool 4495 needStackSlotPassParameters(const PPCSubtarget &Subtarget, 4496 const SmallVectorImpl<ISD::OutputArg> &Outs) { 4497 assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64()); 4498 4499 const unsigned PtrByteSize = 8; 4500 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 4501 4502 static const MCPhysReg GPR[] = { 4503 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4504 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4505 }; 4506 static const MCPhysReg VR[] = { 4507 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4508 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4509 }; 4510 4511 const unsigned NumGPRs = array_lengthof(GPR); 4512 const unsigned NumFPRs = 13; 4513 const unsigned NumVRs = array_lengthof(VR); 4514 const unsigned ParamAreaSize = NumGPRs * PtrByteSize; 4515 4516 unsigned NumBytes = LinkageSize; 4517 unsigned AvailableFPRs = NumFPRs; 4518 unsigned AvailableVRs = NumVRs; 4519 4520 for (const ISD::OutputArg& Param : Outs) { 4521 if (Param.Flags.isNest()) continue; 4522 4523 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, 4524 PtrByteSize, LinkageSize, ParamAreaSize, 4525 NumBytes, AvailableFPRs, AvailableVRs, 4526 Subtarget.hasQPX())) 4527 return true; 4528 } 4529 return false; 4530 } 4531 4532 static bool 4533 hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) { 4534 if (CS.arg_size() != CallerFn->arg_size()) 4535 return false; 4536 4537 ImmutableCallSite::arg_iterator CalleeArgIter = CS.arg_begin(); 4538 ImmutableCallSite::arg_iterator CalleeArgEnd = CS.arg_end(); 4539 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin(); 4540 4541 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) { 4542 const Value* CalleeArg = *CalleeArgIter; 4543 const Value* CallerArg = &(*CallerArgIter); 4544 if (CalleeArg == CallerArg) 4545 continue; 4546 4547 // e.g. @caller([4 x i64] %a, [4 x i64] %b) { 4548 // tail call @callee([4 x i64] undef, [4 x i64] %b) 4549 // } 4550 // 1st argument of callee is undef and has the same type as caller. 4551 if (CalleeArg->getType() == CallerArg->getType() && 4552 isa<UndefValue>(CalleeArg)) 4553 continue; 4554 4555 return false; 4556 } 4557 4558 return true; 4559 } 4560 4561 // Returns true if TCO is possible between the callers and callees 4562 // calling conventions. 4563 static bool 4564 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, 4565 CallingConv::ID CalleeCC) { 4566 // Tail calls are possible with fastcc and ccc. 4567 auto isTailCallableCC = [] (CallingConv::ID CC){ 4568 return CC == CallingConv::C || CC == CallingConv::Fast; 4569 }; 4570 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC)) 4571 return false; 4572 4573 // We can safely tail call both fastcc and ccc callees from a c calling 4574 // convention caller. If the caller is fastcc, we may have less stack space 4575 // than a non-fastcc caller with the same signature so disable tail-calls in 4576 // that case. 4577 return CallerCC == CallingConv::C || CallerCC == CalleeCC; 4578 } 4579 4580 bool 4581 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( 4582 SDValue Callee, 4583 CallingConv::ID CalleeCC, 4584 ImmutableCallSite CS, 4585 bool isVarArg, 4586 const SmallVectorImpl<ISD::OutputArg> &Outs, 4587 const SmallVectorImpl<ISD::InputArg> &Ins, 4588 SelectionDAG& DAG) const { 4589 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; 4590 4591 if (DisableSCO && !TailCallOpt) return false; 4592 4593 // Variadic argument functions are not supported. 4594 if (isVarArg) return false; 4595 4596 auto &Caller = DAG.getMachineFunction().getFunction(); 4597 // Check that the calling conventions are compatible for tco. 4598 if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC)) 4599 return false; 4600 4601 // Caller contains any byval parameter is not supported. 4602 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); })) 4603 return false; 4604 4605 // Callee contains any byval parameter is not supported, too. 4606 // Note: This is a quick work around, because in some cases, e.g. 4607 // caller's stack size > callee's stack size, we are still able to apply 4608 // sibling call optimization. For example, gcc is able to do SCO for caller1 4609 // in the following example, but not for caller2. 4610 // struct test { 4611 // long int a; 4612 // char ary[56]; 4613 // } gTest; 4614 // __attribute__((noinline)) int callee(struct test v, struct test *b) { 4615 // b->a = v.a; 4616 // return 0; 4617 // } 4618 // void caller1(struct test a, struct test c, struct test *b) { 4619 // callee(gTest, b); } 4620 // void caller2(struct test *b) { callee(gTest, b); } 4621 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); })) 4622 return false; 4623 4624 // If callee and caller use different calling conventions, we cannot pass 4625 // parameters on stack since offsets for the parameter area may be different. 4626 if (Caller.getCallingConv() != CalleeCC && 4627 needStackSlotPassParameters(Subtarget, Outs)) 4628 return false; 4629 4630 // No TCO/SCO on indirect call because Caller have to restore its TOC 4631 if (!isFunctionGlobalAddress(Callee) && 4632 !isa<ExternalSymbolSDNode>(Callee)) 4633 return false; 4634 4635 // If the caller and callee potentially have different TOC bases then we 4636 // cannot tail call since we need to restore the TOC pointer after the call. 4637 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 4638 if (!callsShareTOCBase(&Caller, Callee, getTargetMachine())) 4639 return false; 4640 4641 // TCO allows altering callee ABI, so we don't have to check further. 4642 if (CalleeCC == CallingConv::Fast && TailCallOpt) 4643 return true; 4644 4645 if (DisableSCO) return false; 4646 4647 // If callee use the same argument list that caller is using, then we can 4648 // apply SCO on this case. If it is not, then we need to check if callee needs 4649 // stack for passing arguments. 4650 if (!hasSameArgumentList(&Caller, CS) && 4651 needStackSlotPassParameters(Subtarget, Outs)) { 4652 return false; 4653 } 4654 4655 return true; 4656 } 4657 4658 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 4659 /// for tail call optimization. Targets which want to do tail call 4660 /// optimization should implement this function. 4661 bool 4662 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 4663 CallingConv::ID CalleeCC, 4664 bool isVarArg, 4665 const SmallVectorImpl<ISD::InputArg> &Ins, 4666 SelectionDAG& DAG) const { 4667 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 4668 return false; 4669 4670 // Variable argument functions are not supported. 4671 if (isVarArg) 4672 return false; 4673 4674 MachineFunction &MF = DAG.getMachineFunction(); 4675 CallingConv::ID CallerCC = MF.getFunction().getCallingConv(); 4676 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 4677 // Functions containing by val parameters are not supported. 4678 for (unsigned i = 0; i != Ins.size(); i++) { 4679 ISD::ArgFlagsTy Flags = Ins[i].Flags; 4680 if (Flags.isByVal()) return false; 4681 } 4682 4683 // Non-PIC/GOT tail calls are supported. 4684 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 4685 return true; 4686 4687 // At the moment we can only do local tail calls (in same module, hidden 4688 // or protected) if we are generating PIC. 4689 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4690 return G->getGlobal()->hasHiddenVisibility() 4691 || G->getGlobal()->hasProtectedVisibility(); 4692 } 4693 4694 return false; 4695 } 4696 4697 /// isCallCompatibleAddress - Return the immediate to use if the specified 4698 /// 32-bit value is representable in the immediate field of a BxA instruction. 4699 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 4700 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 4701 if (!C) return nullptr; 4702 4703 int Addr = C->getZExtValue(); 4704 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 4705 SignExtend32<26>(Addr) != Addr) 4706 return nullptr; // Top 6 bits have to be sext of immediate. 4707 4708 return DAG 4709 .getConstant( 4710 (int)C->getZExtValue() >> 2, SDLoc(Op), 4711 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())) 4712 .getNode(); 4713 } 4714 4715 namespace { 4716 4717 struct TailCallArgumentInfo { 4718 SDValue Arg; 4719 SDValue FrameIdxOp; 4720 int FrameIdx = 0; 4721 4722 TailCallArgumentInfo() = default; 4723 }; 4724 4725 } // end anonymous namespace 4726 4727 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 4728 static void StoreTailCallArgumentsToStackSlot( 4729 SelectionDAG &DAG, SDValue Chain, 4730 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs, 4731 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) { 4732 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 4733 SDValue Arg = TailCallArgs[i].Arg; 4734 SDValue FIN = TailCallArgs[i].FrameIdxOp; 4735 int FI = TailCallArgs[i].FrameIdx; 4736 // Store relative to framepointer. 4737 MemOpChains.push_back(DAG.getStore( 4738 Chain, dl, Arg, FIN, 4739 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); 4740 } 4741 } 4742 4743 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 4744 /// the appropriate stack slot for the tail call optimized function call. 4745 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, 4746 SDValue OldRetAddr, SDValue OldFP, 4747 int SPDiff, const SDLoc &dl) { 4748 if (SPDiff) { 4749 // Calculate the new stack slot for the return address. 4750 MachineFunction &MF = DAG.getMachineFunction(); 4751 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); 4752 const PPCFrameLowering *FL = Subtarget.getFrameLowering(); 4753 bool isPPC64 = Subtarget.isPPC64(); 4754 int SlotSize = isPPC64 ? 8 : 4; 4755 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset(); 4756 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize, 4757 NewRetAddrLoc, true); 4758 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 4759 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 4760 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, 4761 MachinePointerInfo::getFixedStack(MF, NewRetAddr)); 4762 4763 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 4764 // slot as the FP is never overwritten. 4765 if (Subtarget.isDarwinABI()) { 4766 int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset(); 4767 int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc, 4768 true); 4769 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 4770 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, 4771 MachinePointerInfo::getFixedStack( 4772 DAG.getMachineFunction(), NewFPIdx)); 4773 } 4774 } 4775 return Chain; 4776 } 4777 4778 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 4779 /// the position of the argument. 4780 static void 4781 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 4782 SDValue Arg, int SPDiff, unsigned ArgOffset, 4783 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) { 4784 int Offset = ArgOffset + SPDiff; 4785 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8; 4786 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); 4787 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 4788 SDValue FIN = DAG.getFrameIndex(FI, VT); 4789 TailCallArgumentInfo Info; 4790 Info.Arg = Arg; 4791 Info.FrameIdxOp = FIN; 4792 Info.FrameIdx = FI; 4793 TailCallArguments.push_back(Info); 4794 } 4795 4796 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 4797 /// stack slot. Returns the chain as result and the loaded frame pointers in 4798 /// LROpOut/FPOpout. Used when tail calling. 4799 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr( 4800 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut, 4801 SDValue &FPOpOut, const SDLoc &dl) const { 4802 if (SPDiff) { 4803 // Load the LR and FP stack slot for later adjusting. 4804 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; 4805 LROpOut = getReturnAddrFrameIndex(DAG); 4806 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo()); 4807 Chain = SDValue(LROpOut.getNode(), 1); 4808 4809 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 4810 // slot as the FP is never overwritten. 4811 if (Subtarget.isDarwinABI()) { 4812 FPOpOut = getFramePointerFrameIndex(DAG); 4813 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo()); 4814 Chain = SDValue(FPOpOut.getNode(), 1); 4815 } 4816 } 4817 return Chain; 4818 } 4819 4820 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 4821 /// by "Src" to address "Dst" of size "Size". Alignment information is 4822 /// specified by the specific parameter attribute. The copy will be passed as 4823 /// a byval function parameter. 4824 /// Sometimes what we are copying is the end of a larger object, the part that 4825 /// does not fit in registers. 4826 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, 4827 SDValue Chain, ISD::ArgFlagsTy Flags, 4828 SelectionDAG &DAG, const SDLoc &dl) { 4829 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); 4830 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 4831 false, false, false, MachinePointerInfo(), 4832 MachinePointerInfo()); 4833 } 4834 4835 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 4836 /// tail calls. 4837 static void LowerMemOpCallTo( 4838 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, 4839 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, 4840 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains, 4841 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) { 4842 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 4843 if (!isTailCall) { 4844 if (isVector) { 4845 SDValue StackPtr; 4846 if (isPPC64) 4847 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4848 else 4849 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4850 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 4851 DAG.getConstant(ArgOffset, dl, PtrVT)); 4852 } 4853 MemOpChains.push_back( 4854 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); 4855 // Calculate and remember argument location. 4856 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 4857 TailCallArguments); 4858 } 4859 4860 static void 4861 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 4862 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, 4863 SDValue FPOp, 4864 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) { 4865 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 4866 // might overwrite each other in case of tail call optimization. 4867 SmallVector<SDValue, 8> MemOpChains2; 4868 // Do not flag preceding copytoreg stuff together with the following stuff. 4869 InFlag = SDValue(); 4870 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 4871 MemOpChains2, dl); 4872 if (!MemOpChains2.empty()) 4873 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); 4874 4875 // Store the return address to the appropriate stack slot. 4876 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl); 4877 4878 // Emit callseq_end just before tailcall node. 4879 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 4880 DAG.getIntPtrConstant(0, dl, true), InFlag, dl); 4881 InFlag = Chain.getValue(1); 4882 } 4883 4884 // Is this global address that of a function that can be called by name? (as 4885 // opposed to something that must hold a descriptor for an indirect call). 4886 static bool isFunctionGlobalAddress(SDValue Callee) { 4887 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 4888 if (Callee.getOpcode() == ISD::GlobalTLSAddress || 4889 Callee.getOpcode() == ISD::TargetGlobalTLSAddress) 4890 return false; 4891 4892 return G->getGlobal()->getValueType()->isFunctionTy(); 4893 } 4894 4895 return false; 4896 } 4897 4898 static unsigned 4899 PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, 4900 SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall, 4901 bool isPatchPoint, bool hasNest, 4902 SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, 4903 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, 4904 ImmutableCallSite CS, const PPCSubtarget &Subtarget) { 4905 bool isPPC64 = Subtarget.isPPC64(); 4906 bool isSVR4ABI = Subtarget.isSVR4ABI(); 4907 bool isELFv2ABI = Subtarget.isELFv2ABI(); 4908 bool isAIXABI = Subtarget.isAIXABI(); 4909 4910 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 4911 NodeTys.push_back(MVT::Other); // Returns a chain 4912 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 4913 4914 unsigned CallOpc = PPCISD::CALL; 4915 4916 bool needIndirectCall = true; 4917 if (!isSVR4ABI || !isPPC64) 4918 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 4919 // If this is an absolute destination address, use the munged value. 4920 Callee = SDValue(Dest, 0); 4921 needIndirectCall = false; 4922 } 4923 4924 // PC-relative references to external symbols should go through $stub, unless 4925 // we're building with the leopard linker or later, which automatically 4926 // synthesizes these stubs. 4927 const TargetMachine &TM = DAG.getTarget(); 4928 MachineFunction &MF = DAG.getMachineFunction(); 4929 const Module *Mod = MF.getFunction().getParent(); 4930 const GlobalValue *GV = nullptr; 4931 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4932 GV = G->getGlobal(); 4933 bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); 4934 bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64; 4935 4936 if (isFunctionGlobalAddress(Callee)) { 4937 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); 4938 4939 if (TM.getTargetTriple().isOSAIX()) { 4940 // Direct function calls reference the symbol for the function's entry 4941 // point, which is named by inserting a "." before the function's 4942 // C-linkage name. 4943 auto &Context = MF.getMMI().getContext(); 4944 MCSymbol *S = Context.getOrCreateSymbol(Twine(".") + 4945 Twine(G->getGlobal()->getName())); 4946 cast<MCSymbolXCOFF>(S)->setGlobalValue(GV); 4947 Callee = DAG.getMCSymbol(S, PtrVT); 4948 } else { 4949 // A call to a TLS address is actually an indirect call to a 4950 // thread-specific pointer. 4951 unsigned OpFlags = 0; 4952 if (UsePlt) 4953 OpFlags = PPCII::MO_PLT; 4954 4955 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 4956 // every direct call is) turn it into a TargetGlobalAddress / 4957 // TargetExternalSymbol node so that legalize doesn't hack it. 4958 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 4959 Callee.getValueType(), 0, OpFlags); 4960 } 4961 needIndirectCall = false; 4962 } 4963 4964 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 4965 unsigned char OpFlags = 0; 4966 4967 if (UsePlt) 4968 OpFlags = PPCII::MO_PLT; 4969 4970 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 4971 OpFlags); 4972 needIndirectCall = false; 4973 } 4974 4975 if (isPatchPoint) { 4976 // We'll form an invalid direct call when lowering a patchpoint; the full 4977 // sequence for an indirect call is complicated, and many of the 4978 // instructions introduced might have side effects (and, thus, can't be 4979 // removed later). The call itself will be removed as soon as the 4980 // argument/return lowering is complete, so the fact that it has the wrong 4981 // kind of operands should not really matter. 4982 needIndirectCall = false; 4983 } 4984 4985 if (needIndirectCall) { 4986 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 4987 // to do the call, we can't use PPCISD::CALL. 4988 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 4989 4990 if (isSVR4ABI && isPPC64 && !isELFv2ABI) { 4991 // Function pointers in the 64-bit SVR4 ABI do not point to the function 4992 // entry point, but to the function descriptor (the function entry point 4993 // address is part of the function descriptor though). 4994 // The function descriptor is a three doubleword structure with the 4995 // following fields: function entry point, TOC base address and 4996 // environment pointer. 4997 // Thus for a call through a function pointer, the following actions need 4998 // to be performed: 4999 // 1. Save the TOC of the caller in the TOC save area of its stack 5000 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). 5001 // 2. Load the address of the function entry point from the function 5002 // descriptor. 5003 // 3. Load the TOC of the callee from the function descriptor into r2. 5004 // 4. Load the environment pointer from the function descriptor into 5005 // r11. 5006 // 5. Branch to the function entry point address. 5007 // 6. On return of the callee, the TOC of the caller needs to be 5008 // restored (this is done in FinishCall()). 5009 // 5010 // The loads are scheduled at the beginning of the call sequence, and the 5011 // register copies are flagged together to ensure that no other 5012 // operations can be scheduled in between. E.g. without flagging the 5013 // copies together, a TOC access in the caller could be scheduled between 5014 // the assignment of the callee TOC and the branch to the callee, which 5015 // results in the TOC access going through the TOC of the callee instead 5016 // of going through the TOC of the caller, which leads to incorrect code. 5017 5018 // Load the address of the function entry point from the function 5019 // descriptor. 5020 SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1); 5021 if (LDChain.getValueType() == MVT::Glue) 5022 LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2); 5023 5024 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() 5025 ? (MachineMemOperand::MODereferenceable | 5026 MachineMemOperand::MOInvariant) 5027 : MachineMemOperand::MONone; 5028 5029 MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr); 5030 SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, 5031 /* Alignment = */ 8, MMOFlags); 5032 5033 // Load environment pointer into r11. 5034 SDValue PtrOff = DAG.getIntPtrConstant(16, dl); 5035 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 5036 SDValue LoadEnvPtr = 5037 DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16), 5038 /* Alignment = */ 8, MMOFlags); 5039 5040 SDValue TOCOff = DAG.getIntPtrConstant(8, dl); 5041 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); 5042 SDValue TOCPtr = 5043 DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8), 5044 /* Alignment = */ 8, MMOFlags); 5045 5046 setUsesTOCBasePtr(DAG); 5047 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, 5048 InFlag); 5049 Chain = TOCVal.getValue(0); 5050 InFlag = TOCVal.getValue(1); 5051 5052 // If the function call has an explicit 'nest' parameter, it takes the 5053 // place of the environment pointer. 5054 if (!hasNest) { 5055 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 5056 InFlag); 5057 5058 Chain = EnvVal.getValue(0); 5059 InFlag = EnvVal.getValue(1); 5060 } 5061 5062 MTCTROps[0] = Chain; 5063 MTCTROps[1] = LoadFuncPtr; 5064 MTCTROps[2] = InFlag; 5065 } 5066 5067 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, 5068 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); 5069 InFlag = Chain.getValue(1); 5070 5071 NodeTys.clear(); 5072 NodeTys.push_back(MVT::Other); 5073 NodeTys.push_back(MVT::Glue); 5074 Ops.push_back(Chain); 5075 CallOpc = PPCISD::BCTRL; 5076 Callee.setNode(nullptr); 5077 // Add use of X11 (holding environment pointer) 5078 if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest) 5079 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); 5080 // Add CTR register as callee so a bctr can be emitted later. 5081 if (isTailCall) 5082 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 5083 } 5084 5085 // If this is a direct call, pass the chain and the callee. 5086 if (Callee.getNode()) { 5087 Ops.push_back(Chain); 5088 Ops.push_back(Callee); 5089 } 5090 // If this is a tail call add stack pointer delta. 5091 if (isTailCall) 5092 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32)); 5093 5094 // Add argument registers to the end of the list so that they are known live 5095 // into the call. 5096 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 5097 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 5098 RegsToPass[i].second.getValueType())); 5099 5100 // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register 5101 // live into the call. 5102 // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT. 5103 if ((isSVR4ABI && isPPC64) || isAIXABI) { 5104 setUsesTOCBasePtr(DAG); 5105 5106 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is 5107 // no way to mark dependencies as implicit here. 5108 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter. 5109 if (!isPatchPoint) 5110 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2 5111 : PPC::R2, PtrVT)); 5112 } 5113 5114 return CallOpc; 5115 } 5116 5117 SDValue PPCTargetLowering::LowerCallResult( 5118 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, 5119 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 5120 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 5121 SmallVector<CCValAssign, 16> RVLocs; 5122 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 5123 *DAG.getContext()); 5124 5125 CCRetInfo.AnalyzeCallResult( 5126 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold) 5127 ? RetCC_PPC_Cold 5128 : RetCC_PPC); 5129 5130 // Copy all of the result registers out of their specified physreg. 5131 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 5132 CCValAssign &VA = RVLocs[i]; 5133 assert(VA.isRegLoc() && "Can only return in registers!"); 5134 5135 SDValue Val = DAG.getCopyFromReg(Chain, dl, 5136 VA.getLocReg(), VA.getLocVT(), InFlag); 5137 Chain = Val.getValue(1); 5138 InFlag = Val.getValue(2); 5139 5140 switch (VA.getLocInfo()) { 5141 default: llvm_unreachable("Unknown loc info!"); 5142 case CCValAssign::Full: break; 5143 case CCValAssign::AExt: 5144 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 5145 break; 5146 case CCValAssign::ZExt: 5147 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, 5148 DAG.getValueType(VA.getValVT())); 5149 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 5150 break; 5151 case CCValAssign::SExt: 5152 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, 5153 DAG.getValueType(VA.getValVT())); 5154 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 5155 break; 5156 } 5157 5158 InVals.push_back(Val); 5159 } 5160 5161 return Chain; 5162 } 5163 5164 SDValue PPCTargetLowering::FinishCall( 5165 CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg, 5166 bool isPatchPoint, bool hasNest, SelectionDAG &DAG, 5167 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag, 5168 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, 5169 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, 5170 SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const { 5171 std::vector<EVT> NodeTys; 5172 SmallVector<SDValue, 8> Ops; 5173 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, 5174 SPDiff, isTailCall, isPatchPoint, hasNest, 5175 RegsToPass, Ops, NodeTys, CS, Subtarget); 5176 5177 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls 5178 if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) 5179 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); 5180 5181 // When performing tail call optimization the callee pops its arguments off 5182 // the stack. Account for this here so these bytes can be pushed back on in 5183 // PPCFrameLowering::eliminateCallFramePseudoInstr. 5184 int BytesCalleePops = 5185 (CallConv == CallingConv::Fast && 5186 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 5187 5188 // Add a register mask operand representing the call-preserved registers. 5189 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 5190 const uint32_t *Mask = 5191 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); 5192 assert(Mask && "Missing call preserved mask for calling convention"); 5193 Ops.push_back(DAG.getRegisterMask(Mask)); 5194 5195 if (InFlag.getNode()) 5196 Ops.push_back(InFlag); 5197 5198 // Emit tail call. 5199 if (isTailCall) { 5200 assert(((Callee.getOpcode() == ISD::Register && 5201 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 5202 Callee.getOpcode() == ISD::TargetExternalSymbol || 5203 Callee.getOpcode() == ISD::TargetGlobalAddress || 5204 isa<ConstantSDNode>(Callee)) && 5205 "Expecting an global address, external symbol, absolute value or register"); 5206 5207 DAG.getMachineFunction().getFrameInfo().setHasTailCall(); 5208 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); 5209 } 5210 5211 // Add a NOP immediately after the branch instruction when using the 64-bit 5212 // SVR4 or the AIX ABI. 5213 // At link time, if caller and callee are in a different module and 5214 // thus have a different TOC, the call will be replaced with a call to a stub 5215 // function which saves the current TOC, loads the TOC of the callee and 5216 // branches to the callee. The NOP will be replaced with a load instruction 5217 // which restores the TOC of the caller from the TOC save slot of the current 5218 // stack frame. If caller and callee belong to the same module (and have the 5219 // same TOC), the NOP will remain unchanged, or become some other NOP. 5220 5221 MachineFunction &MF = DAG.getMachineFunction(); 5222 if (!isTailCall && !isPatchPoint && 5223 ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) || 5224 Subtarget.isAIXABI())) { 5225 if (CallOpc == PPCISD::BCTRL) { 5226 if (Subtarget.isAIXABI()) 5227 report_fatal_error("Indirect call on AIX is not implemented."); 5228 5229 // This is a call through a function pointer. 5230 // Restore the caller TOC from the save area into R2. 5231 // See PrepareCall() for more information about calls through function 5232 // pointers in the 64-bit SVR4 ABI. 5233 // We are using a target-specific load with r2 hard coded, because the 5234 // result of a target-independent load would never go directly into r2, 5235 // since r2 is a reserved register (which prevents the register allocator 5236 // from allocating it), resulting in an additional register being 5237 // allocated and an unnecessary move instruction being generated. 5238 CallOpc = PPCISD::BCTRL_LOAD_TOC; 5239 5240 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5241 SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); 5242 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); 5243 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); 5244 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); 5245 5246 // The address needs to go after the chain input but before the flag (or 5247 // any other variadic arguments). 5248 Ops.insert(std::next(Ops.begin()), AddTOC); 5249 } else if (CallOpc == PPCISD::CALL && 5250 !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) { 5251 // Otherwise insert NOP for non-local calls. 5252 CallOpc = PPCISD::CALL_NOP; 5253 } 5254 } 5255 5256 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); 5257 InFlag = Chain.getValue(1); 5258 5259 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 5260 DAG.getIntPtrConstant(BytesCalleePops, dl, true), 5261 InFlag, dl); 5262 if (!Ins.empty()) 5263 InFlag = Chain.getValue(1); 5264 5265 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 5266 Ins, dl, DAG, InVals); 5267 } 5268 5269 SDValue 5270 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 5271 SmallVectorImpl<SDValue> &InVals) const { 5272 SelectionDAG &DAG = CLI.DAG; 5273 SDLoc &dl = CLI.DL; 5274 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 5275 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 5276 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 5277 SDValue Chain = CLI.Chain; 5278 SDValue Callee = CLI.Callee; 5279 bool &isTailCall = CLI.IsTailCall; 5280 CallingConv::ID CallConv = CLI.CallConv; 5281 bool isVarArg = CLI.IsVarArg; 5282 bool isPatchPoint = CLI.IsPatchPoint; 5283 ImmutableCallSite CS = CLI.CS; 5284 5285 if (isTailCall) { 5286 if (Subtarget.useLongCalls() && !(CS && CS.isMustTailCall())) 5287 isTailCall = false; 5288 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) 5289 isTailCall = 5290 IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS, 5291 isVarArg, Outs, Ins, DAG); 5292 else 5293 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 5294 Ins, DAG); 5295 if (isTailCall) { 5296 ++NumTailCalls; 5297 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 5298 ++NumSiblingCalls; 5299 5300 assert(isa<GlobalAddressSDNode>(Callee) && 5301 "Callee should be an llvm::Function object."); 5302 LLVM_DEBUG( 5303 const GlobalValue *GV = 5304 cast<GlobalAddressSDNode>(Callee)->getGlobal(); 5305 const unsigned Width = 5306 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0"); 5307 dbgs() << "TCO caller: " 5308 << left_justify(DAG.getMachineFunction().getName(), Width) 5309 << ", callee linkage: " << GV->getVisibility() << ", " 5310 << GV->getLinkage() << "\n"); 5311 } 5312 } 5313 5314 if (!isTailCall && CS && CS.isMustTailCall()) 5315 report_fatal_error("failed to perform tail call elimination on a call " 5316 "site marked musttail"); 5317 5318 // When long calls (i.e. indirect calls) are always used, calls are always 5319 // made via function pointer. If we have a function name, first translate it 5320 // into a pointer. 5321 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) && 5322 !isTailCall) 5323 Callee = LowerGlobalAddress(Callee, DAG); 5324 5325 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) 5326 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, 5327 isTailCall, isPatchPoint, Outs, OutVals, Ins, 5328 dl, DAG, InVals, CS); 5329 5330 if (Subtarget.isSVR4ABI()) 5331 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, 5332 isTailCall, isPatchPoint, Outs, OutVals, Ins, 5333 dl, DAG, InVals, CS); 5334 5335 if (Subtarget.isAIXABI()) 5336 return LowerCall_AIX(Chain, Callee, CallConv, isVarArg, 5337 isTailCall, isPatchPoint, Outs, OutVals, Ins, 5338 dl, DAG, InVals, CS); 5339 5340 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 5341 isTailCall, isPatchPoint, Outs, OutVals, Ins, 5342 dl, DAG, InVals, CS); 5343 } 5344 5345 SDValue PPCTargetLowering::LowerCall_32SVR4( 5346 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 5347 bool isTailCall, bool isPatchPoint, 5348 const SmallVectorImpl<ISD::OutputArg> &Outs, 5349 const SmallVectorImpl<SDValue> &OutVals, 5350 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 5351 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 5352 ImmutableCallSite CS) const { 5353 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description 5354 // of the 32-bit SVR4 ABI stack frame layout. 5355 5356 assert((CallConv == CallingConv::C || 5357 CallConv == CallingConv::Cold || 5358 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 5359 5360 unsigned PtrByteSize = 4; 5361 5362 MachineFunction &MF = DAG.getMachineFunction(); 5363 5364 // Mark this function as potentially containing a function that contains a 5365 // tail call. As a consequence the frame pointer will be used for dynamicalloc 5366 // and restoring the callers stack pointer in this functions epilog. This is 5367 // done because by tail calling the called function might overwrite the value 5368 // in this function's (MF) stack pointer stack slot 0(SP). 5369 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5370 CallConv == CallingConv::Fast) 5371 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 5372 5373 // Count how many bytes are to be pushed on the stack, including the linkage 5374 // area, parameter list area and the part of the local variable space which 5375 // contains copies of aggregates which are passed by value. 5376 5377 // Assign locations to all of the outgoing arguments. 5378 SmallVector<CCValAssign, 16> ArgLocs; 5379 PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); 5380 5381 // Reserve space for the linkage area on the stack. 5382 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(), 5383 PtrByteSize); 5384 if (useSoftFloat()) 5385 CCInfo.PreAnalyzeCallOperands(Outs); 5386 5387 if (isVarArg) { 5388 // Handle fixed and variable vector arguments differently. 5389 // Fixed vector arguments go into registers as long as registers are 5390 // available. Variable vector arguments always go into memory. 5391 unsigned NumArgs = Outs.size(); 5392 5393 for (unsigned i = 0; i != NumArgs; ++i) { 5394 MVT ArgVT = Outs[i].VT; 5395 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 5396 bool Result; 5397 5398 if (Outs[i].IsFixed) { 5399 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 5400 CCInfo); 5401 } else { 5402 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 5403 ArgFlags, CCInfo); 5404 } 5405 5406 if (Result) { 5407 #ifndef NDEBUG 5408 errs() << "Call operand #" << i << " has unhandled type " 5409 << EVT(ArgVT).getEVTString() << "\n"; 5410 #endif 5411 llvm_unreachable(nullptr); 5412 } 5413 } 5414 } else { 5415 // All arguments are treated the same. 5416 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); 5417 } 5418 CCInfo.clearWasPPCF128(); 5419 5420 // Assign locations to all of the outgoing aggregate by value arguments. 5421 SmallVector<CCValAssign, 16> ByValArgLocs; 5422 CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext()); 5423 5424 // Reserve stack space for the allocations in CCInfo. 5425 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 5426 5427 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); 5428 5429 // Size of the linkage area, parameter list area and the part of the local 5430 // space variable where copies of aggregates which are passed by value are 5431 // stored. 5432 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 5433 5434 // Calculate by how many bytes the stack has to be adjusted in case of tail 5435 // call optimization. 5436 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 5437 5438 // Adjust the stack pointer for the new arguments... 5439 // These operations are automatically eliminated by the prolog/epilog pass 5440 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); 5441 SDValue CallSeqStart = Chain; 5442 5443 // Load the return address and frame pointer so it can be moved somewhere else 5444 // later. 5445 SDValue LROp, FPOp; 5446 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 5447 5448 // Set up a copy of the stack pointer for use loading and storing any 5449 // arguments that may not fit in the registers available for argument 5450 // passing. 5451 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 5452 5453 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 5454 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 5455 SmallVector<SDValue, 8> MemOpChains; 5456 5457 bool seenFloatArg = false; 5458 // Walk the register/memloc assignments, inserting copies/loads. 5459 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 5460 i != e; 5461 ++i) { 5462 CCValAssign &VA = ArgLocs[i]; 5463 SDValue Arg = OutVals[i]; 5464 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5465 5466 if (Flags.isByVal()) { 5467 // Argument is an aggregate which is passed by value, thus we need to 5468 // create a copy of it in the local variable space of the current stack 5469 // frame (which is the stack frame of the caller) and pass the address of 5470 // this copy to the callee. 5471 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 5472 CCValAssign &ByValVA = ByValArgLocs[j++]; 5473 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 5474 5475 // Memory reserved in the local variable space of the callers stack frame. 5476 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 5477 5478 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 5479 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), 5480 StackPtr, PtrOff); 5481 5482 // Create a copy of the argument in the local area of the current 5483 // stack frame. 5484 SDValue MemcpyCall = 5485 CreateCopyOfByValArgument(Arg, PtrOff, 5486 CallSeqStart.getNode()->getOperand(0), 5487 Flags, DAG, dl); 5488 5489 // This must go outside the CALLSEQ_START..END. 5490 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0, 5491 SDLoc(MemcpyCall)); 5492 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 5493 NewCallSeqStart.getNode()); 5494 Chain = CallSeqStart = NewCallSeqStart; 5495 5496 // Pass the address of the aggregate copy on the stack either in a 5497 // physical register or in the parameter list area of the current stack 5498 // frame to the callee. 5499 Arg = PtrOff; 5500 } 5501 5502 // When useCRBits() is true, there can be i1 arguments. 5503 // It is because getRegisterType(MVT::i1) => MVT::i1, 5504 // and for other integer types getRegisterType() => MVT::i32. 5505 // Extend i1 and ensure callee will get i32. 5506 if (Arg.getValueType() == MVT::i1) 5507 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, 5508 dl, MVT::i32, Arg); 5509 5510 if (VA.isRegLoc()) { 5511 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 5512 // Put argument in a physical register. 5513 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 5514 } else { 5515 // Put argument in the parameter list area of the current stack frame. 5516 assert(VA.isMemLoc()); 5517 unsigned LocMemOffset = VA.getLocMemOffset(); 5518 5519 if (!isTailCall) { 5520 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 5521 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), 5522 StackPtr, PtrOff); 5523 5524 MemOpChains.push_back( 5525 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); 5526 } else { 5527 // Calculate and remember argument location. 5528 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 5529 TailCallArguments); 5530 } 5531 } 5532 } 5533 5534 if (!MemOpChains.empty()) 5535 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5536 5537 // Build a sequence of copy-to-reg nodes chained together with token chain 5538 // and flag operands which copy the outgoing args into the appropriate regs. 5539 SDValue InFlag; 5540 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5541 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5542 RegsToPass[i].second, InFlag); 5543 InFlag = Chain.getValue(1); 5544 } 5545 5546 // Set CR bit 6 to true if this is a vararg call with floating args passed in 5547 // registers. 5548 if (isVarArg) { 5549 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 5550 SDValue Ops[] = { Chain, InFlag }; 5551 5552 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, 5553 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); 5554 5555 InFlag = Chain.getValue(1); 5556 } 5557 5558 if (isTailCall) 5559 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 5560 TailCallArguments); 5561 5562 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, 5563 /* unused except on PPC64 ELFv1 */ false, DAG, 5564 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 5565 NumBytes, Ins, InVals, CS); 5566 } 5567 5568 // Copy an argument into memory, being careful to do this outside the 5569 // call sequence for the call to which the argument belongs. 5570 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq( 5571 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags, 5572 SelectionDAG &DAG, const SDLoc &dl) const { 5573 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 5574 CallSeqStart.getNode()->getOperand(0), 5575 Flags, DAG, dl); 5576 // The MEMCPY must go outside the CALLSEQ_START..END. 5577 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1); 5578 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0, 5579 SDLoc(MemcpyCall)); 5580 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 5581 NewCallSeqStart.getNode()); 5582 return NewCallSeqStart; 5583 } 5584 5585 SDValue PPCTargetLowering::LowerCall_64SVR4( 5586 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 5587 bool isTailCall, bool isPatchPoint, 5588 const SmallVectorImpl<ISD::OutputArg> &Outs, 5589 const SmallVectorImpl<SDValue> &OutVals, 5590 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 5591 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 5592 ImmutableCallSite CS) const { 5593 bool isELFv2ABI = Subtarget.isELFv2ABI(); 5594 bool isLittleEndian = Subtarget.isLittleEndian(); 5595 unsigned NumOps = Outs.size(); 5596 bool hasNest = false; 5597 bool IsSibCall = false; 5598 5599 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5600 unsigned PtrByteSize = 8; 5601 5602 MachineFunction &MF = DAG.getMachineFunction(); 5603 5604 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt) 5605 IsSibCall = true; 5606 5607 // Mark this function as potentially containing a function that contains a 5608 // tail call. As a consequence the frame pointer will be used for dynamicalloc 5609 // and restoring the callers stack pointer in this functions epilog. This is 5610 // done because by tail calling the called function might overwrite the value 5611 // in this function's (MF) stack pointer stack slot 0(SP). 5612 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5613 CallConv == CallingConv::Fast) 5614 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 5615 5616 assert(!(CallConv == CallingConv::Fast && isVarArg) && 5617 "fastcc not supported on varargs functions"); 5618 5619 // Count how many bytes are to be pushed on the stack, including the linkage 5620 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes 5621 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage 5622 // area is 32 bytes reserved space for [SP][CR][LR][TOC]. 5623 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 5624 unsigned NumBytes = LinkageSize; 5625 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 5626 unsigned &QFPR_idx = FPR_idx; 5627 5628 static const MCPhysReg GPR[] = { 5629 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 5630 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 5631 }; 5632 static const MCPhysReg VR[] = { 5633 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 5634 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 5635 }; 5636 5637 const unsigned NumGPRs = array_lengthof(GPR); 5638 const unsigned NumFPRs = useSoftFloat() ? 0 : 13; 5639 const unsigned NumVRs = array_lengthof(VR); 5640 const unsigned NumQFPRs = NumFPRs; 5641 5642 // On ELFv2, we can avoid allocating the parameter area if all the arguments 5643 // can be passed to the callee in registers. 5644 // For the fast calling convention, there is another check below. 5645 // Note: We should keep consistent with LowerFormalArguments_64SVR4() 5646 bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast; 5647 if (!HasParameterArea) { 5648 unsigned ParamAreaSize = NumGPRs * PtrByteSize; 5649 unsigned AvailableFPRs = NumFPRs; 5650 unsigned AvailableVRs = NumVRs; 5651 unsigned NumBytesTmp = NumBytes; 5652 for (unsigned i = 0; i != NumOps; ++i) { 5653 if (Outs[i].Flags.isNest()) continue; 5654 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags, 5655 PtrByteSize, LinkageSize, ParamAreaSize, 5656 NumBytesTmp, AvailableFPRs, AvailableVRs, 5657 Subtarget.hasQPX())) 5658 HasParameterArea = true; 5659 } 5660 } 5661 5662 // When using the fast calling convention, we don't provide backing for 5663 // arguments that will be in registers. 5664 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0; 5665 5666 // Avoid allocating parameter area for fastcc functions if all the arguments 5667 // can be passed in the registers. 5668 if (CallConv == CallingConv::Fast) 5669 HasParameterArea = false; 5670 5671 // Add up all the space actually used. 5672 for (unsigned i = 0; i != NumOps; ++i) { 5673 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5674 EVT ArgVT = Outs[i].VT; 5675 EVT OrigVT = Outs[i].ArgVT; 5676 5677 if (Flags.isNest()) 5678 continue; 5679 5680 if (CallConv == CallingConv::Fast) { 5681 if (Flags.isByVal()) { 5682 NumGPRsUsed += (Flags.getByValSize()+7)/8; 5683 if (NumGPRsUsed > NumGPRs) 5684 HasParameterArea = true; 5685 } else { 5686 switch (ArgVT.getSimpleVT().SimpleTy) { 5687 default: llvm_unreachable("Unexpected ValueType for argument!"); 5688 case MVT::i1: 5689 case MVT::i32: 5690 case MVT::i64: 5691 if (++NumGPRsUsed <= NumGPRs) 5692 continue; 5693 break; 5694 case MVT::v4i32: 5695 case MVT::v8i16: 5696 case MVT::v16i8: 5697 case MVT::v2f64: 5698 case MVT::v2i64: 5699 case MVT::v1i128: 5700 case MVT::f128: 5701 if (++NumVRsUsed <= NumVRs) 5702 continue; 5703 break; 5704 case MVT::v4f32: 5705 // When using QPX, this is handled like a FP register, otherwise, it 5706 // is an Altivec register. 5707 if (Subtarget.hasQPX()) { 5708 if (++NumFPRsUsed <= NumFPRs) 5709 continue; 5710 } else { 5711 if (++NumVRsUsed <= NumVRs) 5712 continue; 5713 } 5714 break; 5715 case MVT::f32: 5716 case MVT::f64: 5717 case MVT::v4f64: // QPX 5718 case MVT::v4i1: // QPX 5719 if (++NumFPRsUsed <= NumFPRs) 5720 continue; 5721 break; 5722 } 5723 HasParameterArea = true; 5724 } 5725 } 5726 5727 /* Respect alignment of argument on the stack. */ 5728 unsigned Align = 5729 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 5730 NumBytes = ((NumBytes + Align - 1) / Align) * Align; 5731 5732 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 5733 if (Flags.isInConsecutiveRegsLast()) 5734 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 5735 } 5736 5737 unsigned NumBytesActuallyUsed = NumBytes; 5738 5739 // In the old ELFv1 ABI, 5740 // the prolog code of the callee may store up to 8 GPR argument registers to 5741 // the stack, allowing va_start to index over them in memory if its varargs. 5742 // Because we cannot tell if this is needed on the caller side, we have to 5743 // conservatively assume that it is needed. As such, make sure we have at 5744 // least enough stack space for the caller to store the 8 GPRs. 5745 // In the ELFv2 ABI, we allocate the parameter area iff a callee 5746 // really requires memory operands, e.g. a vararg function. 5747 if (HasParameterArea) 5748 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 5749 else 5750 NumBytes = LinkageSize; 5751 5752 // Tail call needs the stack to be aligned. 5753 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5754 CallConv == CallingConv::Fast) 5755 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); 5756 5757 int SPDiff = 0; 5758 5759 // Calculate by how many bytes the stack has to be adjusted in case of tail 5760 // call optimization. 5761 if (!IsSibCall) 5762 SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 5763 5764 // To protect arguments on the stack from being clobbered in a tail call, 5765 // force all the loads to happen before doing any other lowering. 5766 if (isTailCall) 5767 Chain = DAG.getStackArgumentTokenFactor(Chain); 5768 5769 // Adjust the stack pointer for the new arguments... 5770 // These operations are automatically eliminated by the prolog/epilog pass 5771 if (!IsSibCall) 5772 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); 5773 SDValue CallSeqStart = Chain; 5774 5775 // Load the return address and frame pointer so it can be move somewhere else 5776 // later. 5777 SDValue LROp, FPOp; 5778 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 5779 5780 // Set up a copy of the stack pointer for use loading and storing any 5781 // arguments that may not fit in the registers available for argument 5782 // passing. 5783 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 5784 5785 // Figure out which arguments are going to go in registers, and which in 5786 // memory. Also, if this is a vararg function, floating point operations 5787 // must be stored to our stack, and loaded into integer regs as well, if 5788 // any integer regs are available for argument passing. 5789 unsigned ArgOffset = LinkageSize; 5790 5791 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 5792 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 5793 5794 SmallVector<SDValue, 8> MemOpChains; 5795 for (unsigned i = 0; i != NumOps; ++i) { 5796 SDValue Arg = OutVals[i]; 5797 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5798 EVT ArgVT = Outs[i].VT; 5799 EVT OrigVT = Outs[i].ArgVT; 5800 5801 // PtrOff will be used to store the current argument to the stack if a 5802 // register cannot be found for it. 5803 SDValue PtrOff; 5804 5805 // We re-align the argument offset for each argument, except when using the 5806 // fast calling convention, when we need to make sure we do that only when 5807 // we'll actually use a stack slot. 5808 auto ComputePtrOff = [&]() { 5809 /* Respect alignment of argument on the stack. */ 5810 unsigned Align = 5811 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 5812 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 5813 5814 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); 5815 5816 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5817 }; 5818 5819 if (CallConv != CallingConv::Fast) { 5820 ComputePtrOff(); 5821 5822 /* Compute GPR index associated with argument offset. */ 5823 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 5824 GPR_idx = std::min(GPR_idx, NumGPRs); 5825 } 5826 5827 // Promote integers to 64-bit values. 5828 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { 5829 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 5830 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 5831 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 5832 } 5833 5834 // FIXME memcpy is used way more than necessary. Correctness first. 5835 // Note: "by value" is code for passing a structure by value, not 5836 // basic types. 5837 if (Flags.isByVal()) { 5838 // Note: Size includes alignment padding, so 5839 // struct x { short a; char b; } 5840 // will have Size = 4. With #pragma pack(1), it will have Size = 3. 5841 // These are the proper values we need for right-justifying the 5842 // aggregate in a parameter register. 5843 unsigned Size = Flags.getByValSize(); 5844 5845 // An empty aggregate parameter takes up no storage and no 5846 // registers. 5847 if (Size == 0) 5848 continue; 5849 5850 if (CallConv == CallingConv::Fast) 5851 ComputePtrOff(); 5852 5853 // All aggregates smaller than 8 bytes must be passed right-justified. 5854 if (Size==1 || Size==2 || Size==4) { 5855 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); 5856 if (GPR_idx != NumGPRs) { 5857 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 5858 MachinePointerInfo(), VT); 5859 MemOpChains.push_back(Load.getValue(1)); 5860 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5861 5862 ArgOffset += PtrByteSize; 5863 continue; 5864 } 5865 } 5866 5867 if (GPR_idx == NumGPRs && Size < 8) { 5868 SDValue AddPtr = PtrOff; 5869 if (!isLittleEndian) { 5870 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, 5871 PtrOff.getValueType()); 5872 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5873 } 5874 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5875 CallSeqStart, 5876 Flags, DAG, dl); 5877 ArgOffset += PtrByteSize; 5878 continue; 5879 } 5880 // Copy entire object into memory. There are cases where gcc-generated 5881 // code assumes it is there, even if it could be put entirely into 5882 // registers. (This is not what the doc says.) 5883 5884 // FIXME: The above statement is likely due to a misunderstanding of the 5885 // documents. All arguments must be copied into the parameter area BY 5886 // THE CALLEE in the event that the callee takes the address of any 5887 // formal argument. That has not yet been implemented. However, it is 5888 // reasonable to use the stack area as a staging area for the register 5889 // load. 5890 5891 // Skip this for small aggregates, as we will use the same slot for a 5892 // right-justified copy, below. 5893 if (Size >= 8) 5894 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 5895 CallSeqStart, 5896 Flags, DAG, dl); 5897 5898 // When a register is available, pass a small aggregate right-justified. 5899 if (Size < 8 && GPR_idx != NumGPRs) { 5900 // The easiest way to get this right-justified in a register 5901 // is to copy the structure into the rightmost portion of a 5902 // local variable slot, then load the whole slot into the 5903 // register. 5904 // FIXME: The memcpy seems to produce pretty awful code for 5905 // small aggregates, particularly for packed ones. 5906 // FIXME: It would be preferable to use the slot in the 5907 // parameter save area instead of a new local variable. 5908 SDValue AddPtr = PtrOff; 5909 if (!isLittleEndian) { 5910 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType()); 5911 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5912 } 5913 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5914 CallSeqStart, 5915 Flags, DAG, dl); 5916 5917 // Load the slot into the register. 5918 SDValue Load = 5919 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo()); 5920 MemOpChains.push_back(Load.getValue(1)); 5921 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5922 5923 // Done with this argument. 5924 ArgOffset += PtrByteSize; 5925 continue; 5926 } 5927 5928 // For aggregates larger than PtrByteSize, copy the pieces of the 5929 // object that fit into registers from the parameter save area. 5930 for (unsigned j=0; j<Size; j+=PtrByteSize) { 5931 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); 5932 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 5933 if (GPR_idx != NumGPRs) { 5934 SDValue Load = 5935 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo()); 5936 MemOpChains.push_back(Load.getValue(1)); 5937 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5938 ArgOffset += PtrByteSize; 5939 } else { 5940 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 5941 break; 5942 } 5943 } 5944 continue; 5945 } 5946 5947 switch (Arg.getSimpleValueType().SimpleTy) { 5948 default: llvm_unreachable("Unexpected ValueType for argument!"); 5949 case MVT::i1: 5950 case MVT::i32: 5951 case MVT::i64: 5952 if (Flags.isNest()) { 5953 // The 'nest' parameter, if any, is passed in R11. 5954 RegsToPass.push_back(std::make_pair(PPC::X11, Arg)); 5955 hasNest = true; 5956 break; 5957 } 5958 5959 // These can be scalar arguments or elements of an integer array type 5960 // passed directly. Clang may use those instead of "byval" aggregate 5961 // types to avoid forcing arguments to memory unnecessarily. 5962 if (GPR_idx != NumGPRs) { 5963 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 5964 } else { 5965 if (CallConv == CallingConv::Fast) 5966 ComputePtrOff(); 5967 5968 assert(HasParameterArea && 5969 "Parameter area must exist to pass an argument in memory."); 5970 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5971 true, isTailCall, false, MemOpChains, 5972 TailCallArguments, dl); 5973 if (CallConv == CallingConv::Fast) 5974 ArgOffset += PtrByteSize; 5975 } 5976 if (CallConv != CallingConv::Fast) 5977 ArgOffset += PtrByteSize; 5978 break; 5979 case MVT::f32: 5980 case MVT::f64: { 5981 // These can be scalar arguments or elements of a float array type 5982 // passed directly. The latter are used to implement ELFv2 homogenous 5983 // float aggregates. 5984 5985 // Named arguments go into FPRs first, and once they overflow, the 5986 // remaining arguments go into GPRs and then the parameter save area. 5987 // Unnamed arguments for vararg functions always go to GPRs and 5988 // then the parameter save area. For now, put all arguments to vararg 5989 // routines always in both locations (FPR *and* GPR or stack slot). 5990 bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs; 5991 bool NeededLoad = false; 5992 5993 // First load the argument into the next available FPR. 5994 if (FPR_idx != NumFPRs) 5995 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 5996 5997 // Next, load the argument into GPR or stack slot if needed. 5998 if (!NeedGPROrStack) 5999 ; 6000 else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) { 6001 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 6002 // once we support fp <-> gpr moves. 6003 6004 // In the non-vararg case, this can only ever happen in the 6005 // presence of f32 array types, since otherwise we never run 6006 // out of FPRs before running out of GPRs. 6007 SDValue ArgVal; 6008 6009 // Double values are always passed in a single GPR. 6010 if (Arg.getValueType() != MVT::f32) { 6011 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); 6012 6013 // Non-array float values are extended and passed in a GPR. 6014 } else if (!Flags.isInConsecutiveRegs()) { 6015 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 6016 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 6017 6018 // If we have an array of floats, we collect every odd element 6019 // together with its predecessor into one GPR. 6020 } else if (ArgOffset % PtrByteSize != 0) { 6021 SDValue Lo, Hi; 6022 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); 6023 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 6024 if (!isLittleEndian) 6025 std::swap(Lo, Hi); 6026 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 6027 6028 // The final element, if even, goes into the first half of a GPR. 6029 } else if (Flags.isInConsecutiveRegsLast()) { 6030 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 6031 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 6032 if (!isLittleEndian) 6033 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, 6034 DAG.getConstant(32, dl, MVT::i32)); 6035 6036 // Non-final even elements are skipped; they will be handled 6037 // together the with subsequent argument on the next go-around. 6038 } else 6039 ArgVal = SDValue(); 6040 6041 if (ArgVal.getNode()) 6042 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal)); 6043 } else { 6044 if (CallConv == CallingConv::Fast) 6045 ComputePtrOff(); 6046 6047 // Single-precision floating-point values are mapped to the 6048 // second (rightmost) word of the stack doubleword. 6049 if (Arg.getValueType() == MVT::f32 && 6050 !isLittleEndian && !Flags.isInConsecutiveRegs()) { 6051 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); 6052 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 6053 } 6054 6055 assert(HasParameterArea && 6056 "Parameter area must exist to pass an argument in memory."); 6057 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 6058 true, isTailCall, false, MemOpChains, 6059 TailCallArguments, dl); 6060 6061 NeededLoad = true; 6062 } 6063 // When passing an array of floats, the array occupies consecutive 6064 // space in the argument area; only round up to the next doubleword 6065 // at the end of the array. Otherwise, each float takes 8 bytes. 6066 if (CallConv != CallingConv::Fast || NeededLoad) { 6067 ArgOffset += (Arg.getValueType() == MVT::f32 && 6068 Flags.isInConsecutiveRegs()) ? 4 : 8; 6069 if (Flags.isInConsecutiveRegsLast()) 6070 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 6071 } 6072 break; 6073 } 6074 case MVT::v4f32: 6075 case MVT::v4i32: 6076 case MVT::v8i16: 6077 case MVT::v16i8: 6078 case MVT::v2f64: 6079 case MVT::v2i64: 6080 case MVT::v1i128: 6081 case MVT::f128: 6082 if (!Subtarget.hasQPX()) { 6083 // These can be scalar arguments or elements of a vector array type 6084 // passed directly. The latter are used to implement ELFv2 homogenous 6085 // vector aggregates. 6086 6087 // For a varargs call, named arguments go into VRs or on the stack as 6088 // usual; unnamed arguments always go to the stack or the corresponding 6089 // GPRs when within range. For now, we always put the value in both 6090 // locations (or even all three). 6091 if (isVarArg) { 6092 assert(HasParameterArea && 6093 "Parameter area must exist if we have a varargs call."); 6094 // We could elide this store in the case where the object fits 6095 // entirely in R registers. Maybe later. 6096 SDValue Store = 6097 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 6098 MemOpChains.push_back(Store); 6099 if (VR_idx != NumVRs) { 6100 SDValue Load = 6101 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); 6102 MemOpChains.push_back(Load.getValue(1)); 6103 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 6104 } 6105 ArgOffset += 16; 6106 for (unsigned i=0; i<16; i+=PtrByteSize) { 6107 if (GPR_idx == NumGPRs) 6108 break; 6109 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 6110 DAG.getConstant(i, dl, PtrVT)); 6111 SDValue Load = 6112 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 6113 MemOpChains.push_back(Load.getValue(1)); 6114 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 6115 } 6116 break; 6117 } 6118 6119 // Non-varargs Altivec params go into VRs or on the stack. 6120 if (VR_idx != NumVRs) { 6121 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 6122 } else { 6123 if (CallConv == CallingConv::Fast) 6124 ComputePtrOff(); 6125 6126 assert(HasParameterArea && 6127 "Parameter area must exist to pass an argument in memory."); 6128 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 6129 true, isTailCall, true, MemOpChains, 6130 TailCallArguments, dl); 6131 if (CallConv == CallingConv::Fast) 6132 ArgOffset += 16; 6133 } 6134 6135 if (CallConv != CallingConv::Fast) 6136 ArgOffset += 16; 6137 break; 6138 } // not QPX 6139 6140 assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 && 6141 "Invalid QPX parameter type"); 6142 6143 LLVM_FALLTHROUGH; 6144 case MVT::v4f64: 6145 case MVT::v4i1: { 6146 bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32; 6147 if (isVarArg) { 6148 assert(HasParameterArea && 6149 "Parameter area must exist if we have a varargs call."); 6150 // We could elide this store in the case where the object fits 6151 // entirely in R registers. Maybe later. 6152 SDValue Store = 6153 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 6154 MemOpChains.push_back(Store); 6155 if (QFPR_idx != NumQFPRs) { 6156 SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store, 6157 PtrOff, MachinePointerInfo()); 6158 MemOpChains.push_back(Load.getValue(1)); 6159 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load)); 6160 } 6161 ArgOffset += (IsF32 ? 16 : 32); 6162 for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) { 6163 if (GPR_idx == NumGPRs) 6164 break; 6165 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 6166 DAG.getConstant(i, dl, PtrVT)); 6167 SDValue Load = 6168 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 6169 MemOpChains.push_back(Load.getValue(1)); 6170 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 6171 } 6172 break; 6173 } 6174 6175 // Non-varargs QPX params go into registers or on the stack. 6176 if (QFPR_idx != NumQFPRs) { 6177 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg)); 6178 } else { 6179 if (CallConv == CallingConv::Fast) 6180 ComputePtrOff(); 6181 6182 assert(HasParameterArea && 6183 "Parameter area must exist to pass an argument in memory."); 6184 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 6185 true, isTailCall, true, MemOpChains, 6186 TailCallArguments, dl); 6187 if (CallConv == CallingConv::Fast) 6188 ArgOffset += (IsF32 ? 16 : 32); 6189 } 6190 6191 if (CallConv != CallingConv::Fast) 6192 ArgOffset += (IsF32 ? 16 : 32); 6193 break; 6194 } 6195 } 6196 } 6197 6198 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && 6199 "mismatch in size of parameter area"); 6200 (void)NumBytesActuallyUsed; 6201 6202 if (!MemOpChains.empty()) 6203 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 6204 6205 // Check if this is an indirect call (MTCTR/BCTRL). 6206 // See PrepareCall() for more information about calls through function 6207 // pointers in the 64-bit SVR4 ABI. 6208 if (!isTailCall && !isPatchPoint && 6209 !isFunctionGlobalAddress(Callee) && 6210 !isa<ExternalSymbolSDNode>(Callee)) { 6211 // Load r2 into a virtual register and store it to the TOC save area. 6212 setUsesTOCBasePtr(DAG); 6213 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 6214 // TOC save area offset. 6215 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); 6216 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); 6217 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 6218 Chain = DAG.getStore( 6219 Val.getValue(1), dl, Val, AddPtr, 6220 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset)); 6221 // In the ELFv2 ABI, R12 must contain the address of an indirect callee. 6222 // This does not mean the MTCTR instruction must use R12; it's easier 6223 // to model this as an extra parameter, so do that. 6224 if (isELFv2ABI && !isPatchPoint) 6225 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); 6226 } 6227 6228 // Build a sequence of copy-to-reg nodes chained together with token chain 6229 // and flag operands which copy the outgoing args into the appropriate regs. 6230 SDValue InFlag; 6231 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 6232 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 6233 RegsToPass[i].second, InFlag); 6234 InFlag = Chain.getValue(1); 6235 } 6236 6237 if (isTailCall && !IsSibCall) 6238 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 6239 TailCallArguments); 6240 6241 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest, 6242 DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, 6243 SPDiff, NumBytes, Ins, InVals, CS); 6244 } 6245 6246 SDValue PPCTargetLowering::LowerCall_Darwin( 6247 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 6248 bool isTailCall, bool isPatchPoint, 6249 const SmallVectorImpl<ISD::OutputArg> &Outs, 6250 const SmallVectorImpl<SDValue> &OutVals, 6251 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 6252 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 6253 ImmutableCallSite CS) const { 6254 unsigned NumOps = Outs.size(); 6255 6256 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6257 bool isPPC64 = PtrVT == MVT::i64; 6258 unsigned PtrByteSize = isPPC64 ? 8 : 4; 6259 6260 MachineFunction &MF = DAG.getMachineFunction(); 6261 6262 // Mark this function as potentially containing a function that contains a 6263 // tail call. As a consequence the frame pointer will be used for dynamicalloc 6264 // and restoring the callers stack pointer in this functions epilog. This is 6265 // done because by tail calling the called function might overwrite the value 6266 // in this function's (MF) stack pointer stack slot 0(SP). 6267 if (getTargetMachine().Options.GuaranteedTailCallOpt && 6268 CallConv == CallingConv::Fast) 6269 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 6270 6271 // Count how many bytes are to be pushed on the stack, including the linkage 6272 // area, and parameter passing area. We start with 24/48 bytes, which is 6273 // prereserved space for [SP][CR][LR][3 x unused]. 6274 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 6275 unsigned NumBytes = LinkageSize; 6276 6277 // Add up all the space actually used. 6278 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 6279 // they all go in registers, but we must reserve stack space for them for 6280 // possible use by the caller. In varargs or 64-bit calls, parameters are 6281 // assigned stack space in order, with padding so Altivec parameters are 6282 // 16-byte aligned. 6283 unsigned nAltivecParamsAtEnd = 0; 6284 for (unsigned i = 0; i != NumOps; ++i) { 6285 ISD::ArgFlagsTy Flags = Outs[i].Flags; 6286 EVT ArgVT = Outs[i].VT; 6287 // Varargs Altivec parameters are padded to a 16 byte boundary. 6288 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 6289 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 6290 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) { 6291 if (!isVarArg && !isPPC64) { 6292 // Non-varargs Altivec parameters go after all the non-Altivec 6293 // parameters; handle those later so we know how much padding we need. 6294 nAltivecParamsAtEnd++; 6295 continue; 6296 } 6297 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 6298 NumBytes = ((NumBytes+15)/16)*16; 6299 } 6300 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 6301 } 6302 6303 // Allow for Altivec parameters at the end, if needed. 6304 if (nAltivecParamsAtEnd) { 6305 NumBytes = ((NumBytes+15)/16)*16; 6306 NumBytes += 16*nAltivecParamsAtEnd; 6307 } 6308 6309 // The prolog code of the callee may store up to 8 GPR argument registers to 6310 // the stack, allowing va_start to index over them in memory if its varargs. 6311 // Because we cannot tell if this is needed on the caller side, we have to 6312 // conservatively assume that it is needed. As such, make sure we have at 6313 // least enough stack space for the caller to store the 8 GPRs. 6314 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 6315 6316 // Tail call needs the stack to be aligned. 6317 if (getTargetMachine().Options.GuaranteedTailCallOpt && 6318 CallConv == CallingConv::Fast) 6319 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); 6320 6321 // Calculate by how many bytes the stack has to be adjusted in case of tail 6322 // call optimization. 6323 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 6324 6325 // To protect arguments on the stack from being clobbered in a tail call, 6326 // force all the loads to happen before doing any other lowering. 6327 if (isTailCall) 6328 Chain = DAG.getStackArgumentTokenFactor(Chain); 6329 6330 // Adjust the stack pointer for the new arguments... 6331 // These operations are automatically eliminated by the prolog/epilog pass 6332 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); 6333 SDValue CallSeqStart = Chain; 6334 6335 // Load the return address and frame pointer so it can be move somewhere else 6336 // later. 6337 SDValue LROp, FPOp; 6338 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 6339 6340 // Set up a copy of the stack pointer for use loading and storing any 6341 // arguments that may not fit in the registers available for argument 6342 // passing. 6343 SDValue StackPtr; 6344 if (isPPC64) 6345 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 6346 else 6347 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 6348 6349 // Figure out which arguments are going to go in registers, and which in 6350 // memory. Also, if this is a vararg function, floating point operations 6351 // must be stored to our stack, and loaded into integer regs as well, if 6352 // any integer regs are available for argument passing. 6353 unsigned ArgOffset = LinkageSize; 6354 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 6355 6356 static const MCPhysReg GPR_32[] = { // 32-bit registers. 6357 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 6358 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 6359 }; 6360 static const MCPhysReg GPR_64[] = { // 64-bit registers. 6361 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 6362 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 6363 }; 6364 static const MCPhysReg VR[] = { 6365 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 6366 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 6367 }; 6368 const unsigned NumGPRs = array_lengthof(GPR_32); 6369 const unsigned NumFPRs = 13; 6370 const unsigned NumVRs = array_lengthof(VR); 6371 6372 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 6373 6374 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 6375 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 6376 6377 SmallVector<SDValue, 8> MemOpChains; 6378 for (unsigned i = 0; i != NumOps; ++i) { 6379 SDValue Arg = OutVals[i]; 6380 ISD::ArgFlagsTy Flags = Outs[i].Flags; 6381 6382 // PtrOff will be used to store the current argument to the stack if a 6383 // register cannot be found for it. 6384 SDValue PtrOff; 6385 6386 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); 6387 6388 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 6389 6390 // On PPC64, promote integers to 64-bit values. 6391 if (isPPC64 && Arg.getValueType() == MVT::i32) { 6392 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 6393 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 6394 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 6395 } 6396 6397 // FIXME memcpy is used way more than necessary. Correctness first. 6398 // Note: "by value" is code for passing a structure by value, not 6399 // basic types. 6400 if (Flags.isByVal()) { 6401 unsigned Size = Flags.getByValSize(); 6402 // Very small objects are passed right-justified. Everything else is 6403 // passed left-justified. 6404 if (Size==1 || Size==2) { 6405 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 6406 if (GPR_idx != NumGPRs) { 6407 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 6408 MachinePointerInfo(), VT); 6409 MemOpChains.push_back(Load.getValue(1)); 6410 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 6411 6412 ArgOffset += PtrByteSize; 6413 } else { 6414 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, 6415 PtrOff.getValueType()); 6416 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 6417 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 6418 CallSeqStart, 6419 Flags, DAG, dl); 6420 ArgOffset += PtrByteSize; 6421 } 6422 continue; 6423 } 6424 // Copy entire object into memory. There are cases where gcc-generated 6425 // code assumes it is there, even if it could be put entirely into 6426 // registers. (This is not what the doc says.) 6427 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 6428 CallSeqStart, 6429 Flags, DAG, dl); 6430 6431 // For small aggregates (Darwin only) and aggregates >= PtrByteSize, 6432 // copy the pieces of the object that fit into registers from the 6433 // parameter save area. 6434 for (unsigned j=0; j<Size; j+=PtrByteSize) { 6435 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); 6436 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 6437 if (GPR_idx != NumGPRs) { 6438 SDValue Load = 6439 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo()); 6440 MemOpChains.push_back(Load.getValue(1)); 6441 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 6442 ArgOffset += PtrByteSize; 6443 } else { 6444 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 6445 break; 6446 } 6447 } 6448 continue; 6449 } 6450 6451 switch (Arg.getSimpleValueType().SimpleTy) { 6452 default: llvm_unreachable("Unexpected ValueType for argument!"); 6453 case MVT::i1: 6454 case MVT::i32: 6455 case MVT::i64: 6456 if (GPR_idx != NumGPRs) { 6457 if (Arg.getValueType() == MVT::i1) 6458 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg); 6459 6460 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 6461 } else { 6462 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 6463 isPPC64, isTailCall, false, MemOpChains, 6464 TailCallArguments, dl); 6465 } 6466 ArgOffset += PtrByteSize; 6467 break; 6468 case MVT::f32: 6469 case MVT::f64: 6470 if (FPR_idx != NumFPRs) { 6471 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 6472 6473 if (isVarArg) { 6474 SDValue Store = 6475 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 6476 MemOpChains.push_back(Store); 6477 6478 // Float varargs are always shadowed in available integer registers 6479 if (GPR_idx != NumGPRs) { 6480 SDValue Load = 6481 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo()); 6482 MemOpChains.push_back(Load.getValue(1)); 6483 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 6484 } 6485 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 6486 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); 6487 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 6488 SDValue Load = 6489 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo()); 6490 MemOpChains.push_back(Load.getValue(1)); 6491 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 6492 } 6493 } else { 6494 // If we have any FPRs remaining, we may also have GPRs remaining. 6495 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 6496 // GPRs. 6497 if (GPR_idx != NumGPRs) 6498 ++GPR_idx; 6499 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 6500 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 6501 ++GPR_idx; 6502 } 6503 } else 6504 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 6505 isPPC64, isTailCall, false, MemOpChains, 6506 TailCallArguments, dl); 6507 if (isPPC64) 6508 ArgOffset += 8; 6509 else 6510 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 6511 break; 6512 case MVT::v4f32: 6513 case MVT::v4i32: 6514 case MVT::v8i16: 6515 case MVT::v16i8: 6516 if (isVarArg) { 6517 // These go aligned on the stack, or in the corresponding R registers 6518 // when within range. The Darwin PPC ABI doc claims they also go in 6519 // V registers; in fact gcc does this only for arguments that are 6520 // prototyped, not for those that match the ... We do it for all 6521 // arguments, seems to work. 6522 while (ArgOffset % 16 !=0) { 6523 ArgOffset += PtrByteSize; 6524 if (GPR_idx != NumGPRs) 6525 GPR_idx++; 6526 } 6527 // We could elide this store in the case where the object fits 6528 // entirely in R registers. Maybe later. 6529 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 6530 DAG.getConstant(ArgOffset, dl, PtrVT)); 6531 SDValue Store = 6532 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 6533 MemOpChains.push_back(Store); 6534 if (VR_idx != NumVRs) { 6535 SDValue Load = 6536 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); 6537 MemOpChains.push_back(Load.getValue(1)); 6538 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 6539 } 6540 ArgOffset += 16; 6541 for (unsigned i=0; i<16; i+=PtrByteSize) { 6542 if (GPR_idx == NumGPRs) 6543 break; 6544 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 6545 DAG.getConstant(i, dl, PtrVT)); 6546 SDValue Load = 6547 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 6548 MemOpChains.push_back(Load.getValue(1)); 6549 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 6550 } 6551 break; 6552 } 6553 6554 // Non-varargs Altivec params generally go in registers, but have 6555 // stack space allocated at the end. 6556 if (VR_idx != NumVRs) { 6557 // Doesn't have GPR space allocated. 6558 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 6559 } else if (nAltivecParamsAtEnd==0) { 6560 // We are emitting Altivec params in order. 6561 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 6562 isPPC64, isTailCall, true, MemOpChains, 6563 TailCallArguments, dl); 6564 ArgOffset += 16; 6565 } 6566 break; 6567 } 6568 } 6569 // If all Altivec parameters fit in registers, as they usually do, 6570 // they get stack space following the non-Altivec parameters. We 6571 // don't track this here because nobody below needs it. 6572 // If there are more Altivec parameters than fit in registers emit 6573 // the stores here. 6574 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 6575 unsigned j = 0; 6576 // Offset is aligned; skip 1st 12 params which go in V registers. 6577 ArgOffset = ((ArgOffset+15)/16)*16; 6578 ArgOffset += 12*16; 6579 for (unsigned i = 0; i != NumOps; ++i) { 6580 SDValue Arg = OutVals[i]; 6581 EVT ArgType = Outs[i].VT; 6582 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 6583 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 6584 if (++j > NumVRs) { 6585 SDValue PtrOff; 6586 // We are emitting Altivec params in order. 6587 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 6588 isPPC64, isTailCall, true, MemOpChains, 6589 TailCallArguments, dl); 6590 ArgOffset += 16; 6591 } 6592 } 6593 } 6594 } 6595 6596 if (!MemOpChains.empty()) 6597 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 6598 6599 // On Darwin, R12 must contain the address of an indirect callee. This does 6600 // not mean the MTCTR instruction must use R12; it's easier to model this as 6601 // an extra parameter, so do that. 6602 if (!isTailCall && 6603 !isFunctionGlobalAddress(Callee) && 6604 !isa<ExternalSymbolSDNode>(Callee) && 6605 !isBLACompatibleAddress(Callee, DAG)) 6606 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 6607 PPC::R12), Callee)); 6608 6609 // Build a sequence of copy-to-reg nodes chained together with token chain 6610 // and flag operands which copy the outgoing args into the appropriate regs. 6611 SDValue InFlag; 6612 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 6613 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 6614 RegsToPass[i].second, InFlag); 6615 InFlag = Chain.getValue(1); 6616 } 6617 6618 if (isTailCall) 6619 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 6620 TailCallArguments); 6621 6622 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, 6623 /* unused except on PPC64 ELFv1 */ false, DAG, 6624 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 6625 NumBytes, Ins, InVals, CS); 6626 } 6627 6628 6629 SDValue PPCTargetLowering::LowerCall_AIX( 6630 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 6631 bool isTailCall, bool isPatchPoint, 6632 const SmallVectorImpl<ISD::OutputArg> &Outs, 6633 const SmallVectorImpl<SDValue> &OutVals, 6634 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 6635 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 6636 ImmutableCallSite CS) const { 6637 6638 assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) && 6639 "Unimplemented calling convention!"); 6640 if (isVarArg || isPatchPoint) 6641 report_fatal_error("This call type is unimplemented on AIX."); 6642 6643 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6644 bool isPPC64 = PtrVT == MVT::i64; 6645 unsigned PtrByteSize = isPPC64 ? 8 : 4; 6646 unsigned NumOps = Outs.size(); 6647 6648 6649 // Count how many bytes are to be pushed on the stack, including the linkage 6650 // area, parameter list area. 6651 // On XCOFF, we start with 24/48, which is reserved space for 6652 // [SP][CR][LR][2 x reserved][TOC]. 6653 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 6654 6655 // The prolog code of the callee may store up to 8 GPR argument registers to 6656 // the stack, allowing va_start to index over them in memory if the callee 6657 // is variadic. 6658 // Because we cannot tell if this is needed on the caller side, we have to 6659 // conservatively assume that it is needed. As such, make sure we have at 6660 // least enough stack space for the caller to store the 8 GPRs. 6661 unsigned NumBytes = LinkageSize + 8 * PtrByteSize; 6662 6663 // Adjust the stack pointer for the new arguments... 6664 // These operations are automatically eliminated by the prolog/epilog 6665 // inserter pass. 6666 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); 6667 SDValue CallSeqStart = Chain; 6668 6669 static const MCPhysReg GPR_32[] = { // 32-bit registers. 6670 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 6671 PPC::R7, PPC::R8, PPC::R9, PPC::R10 6672 }; 6673 static const MCPhysReg GPR_64[] = { // 64-bit registers. 6674 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 6675 PPC::X7, PPC::X8, PPC::X9, PPC::X10 6676 }; 6677 6678 const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64) 6679 : array_lengthof(GPR_32); 6680 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 6681 unsigned GPR_idx = 0; 6682 6683 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 6684 6685 if (isTailCall) 6686 report_fatal_error("Handling of tail call is unimplemented!"); 6687 int SPDiff = 0; 6688 6689 for (unsigned i = 0; i != NumOps; ++i) { 6690 SDValue Arg = OutVals[i]; 6691 ISD::ArgFlagsTy Flags = Outs[i].Flags; 6692 6693 // Promote integers if needed. 6694 if (Arg.getValueType() == MVT::i1 || 6695 (isPPC64 && Arg.getValueType() == MVT::i32)) { 6696 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 6697 Arg = DAG.getNode(ExtOp, dl, PtrVT, Arg); 6698 } 6699 6700 // Note: "by value" is code for passing a structure by value, not 6701 // basic types. 6702 if (Flags.isByVal()) 6703 report_fatal_error("Passing structure by value is unimplemented!"); 6704 6705 switch (Arg.getSimpleValueType().SimpleTy) { 6706 default: llvm_unreachable("Unexpected ValueType for argument!"); 6707 case MVT::i1: 6708 case MVT::i32: 6709 case MVT::i64: 6710 if (GPR_idx != NumGPRs) 6711 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 6712 else 6713 report_fatal_error("Handling of placing parameters on the stack is " 6714 "unimplemented!"); 6715 break; 6716 case MVT::f32: 6717 case MVT::f64: 6718 case MVT::v4f32: 6719 case MVT::v4i32: 6720 case MVT::v8i16: 6721 case MVT::v16i8: 6722 case MVT::v2f64: 6723 case MVT::v2i64: 6724 case MVT::v1i128: 6725 case MVT::f128: 6726 case MVT::v4f64: 6727 case MVT::v4i1: 6728 report_fatal_error("Handling of this parameter type is unimplemented!"); 6729 } 6730 } 6731 6732 if (!isFunctionGlobalAddress(Callee) && 6733 !isa<ExternalSymbolSDNode>(Callee)) 6734 report_fatal_error("Handling of indirect call is unimplemented!"); 6735 6736 // Build a sequence of copy-to-reg nodes chained together with token chain 6737 // and flag operands which copy the outgoing args into the appropriate regs. 6738 SDValue InFlag; 6739 for (auto Reg : RegsToPass) { 6740 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag); 6741 InFlag = Chain.getValue(1); 6742 } 6743 6744 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, 6745 /* unused except on PPC64 ELFv1 */ false, DAG, 6746 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 6747 NumBytes, Ins, InVals, CS); 6748 } 6749 6750 bool 6751 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 6752 MachineFunction &MF, bool isVarArg, 6753 const SmallVectorImpl<ISD::OutputArg> &Outs, 6754 LLVMContext &Context) const { 6755 SmallVector<CCValAssign, 16> RVLocs; 6756 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 6757 return CCInfo.CheckReturn( 6758 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold) 6759 ? RetCC_PPC_Cold 6760 : RetCC_PPC); 6761 } 6762 6763 SDValue 6764 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 6765 bool isVarArg, 6766 const SmallVectorImpl<ISD::OutputArg> &Outs, 6767 const SmallVectorImpl<SDValue> &OutVals, 6768 const SDLoc &dl, SelectionDAG &DAG) const { 6769 SmallVector<CCValAssign, 16> RVLocs; 6770 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 6771 *DAG.getContext()); 6772 CCInfo.AnalyzeReturn(Outs, 6773 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold) 6774 ? RetCC_PPC_Cold 6775 : RetCC_PPC); 6776 6777 SDValue Flag; 6778 SmallVector<SDValue, 4> RetOps(1, Chain); 6779 6780 // Copy the result values into the output registers. 6781 for (unsigned i = 0; i != RVLocs.size(); ++i) { 6782 CCValAssign &VA = RVLocs[i]; 6783 assert(VA.isRegLoc() && "Can only return in registers!"); 6784 6785 SDValue Arg = OutVals[i]; 6786 6787 switch (VA.getLocInfo()) { 6788 default: llvm_unreachable("Unknown loc info!"); 6789 case CCValAssign::Full: break; 6790 case CCValAssign::AExt: 6791 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 6792 break; 6793 case CCValAssign::ZExt: 6794 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 6795 break; 6796 case CCValAssign::SExt: 6797 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 6798 break; 6799 } 6800 6801 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 6802 Flag = Chain.getValue(1); 6803 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 6804 } 6805 6806 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 6807 const MCPhysReg *I = 6808 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); 6809 if (I) { 6810 for (; *I; ++I) { 6811 6812 if (PPC::G8RCRegClass.contains(*I)) 6813 RetOps.push_back(DAG.getRegister(*I, MVT::i64)); 6814 else if (PPC::F8RCRegClass.contains(*I)) 6815 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); 6816 else if (PPC::CRRCRegClass.contains(*I)) 6817 RetOps.push_back(DAG.getRegister(*I, MVT::i1)); 6818 else if (PPC::VRRCRegClass.contains(*I)) 6819 RetOps.push_back(DAG.getRegister(*I, MVT::Other)); 6820 else 6821 llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 6822 } 6823 } 6824 6825 RetOps[0] = Chain; // Update chain. 6826 6827 // Add the flag if we have it. 6828 if (Flag.getNode()) 6829 RetOps.push_back(Flag); 6830 6831 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); 6832 } 6833 6834 SDValue 6835 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, 6836 SelectionDAG &DAG) const { 6837 SDLoc dl(Op); 6838 6839 // Get the correct type for integers. 6840 EVT IntVT = Op.getValueType(); 6841 6842 // Get the inputs. 6843 SDValue Chain = Op.getOperand(0); 6844 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 6845 // Build a DYNAREAOFFSET node. 6846 SDValue Ops[2] = {Chain, FPSIdx}; 6847 SDVTList VTs = DAG.getVTList(IntVT); 6848 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops); 6849 } 6850 6851 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, 6852 SelectionDAG &DAG) const { 6853 // When we pop the dynamic allocation we need to restore the SP link. 6854 SDLoc dl(Op); 6855 6856 // Get the correct type for pointers. 6857 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6858 6859 // Construct the stack pointer operand. 6860 bool isPPC64 = Subtarget.isPPC64(); 6861 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 6862 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 6863 6864 // Get the operands for the STACKRESTORE. 6865 SDValue Chain = Op.getOperand(0); 6866 SDValue SaveSP = Op.getOperand(1); 6867 6868 // Load the old link SP. 6869 SDValue LoadLinkSP = 6870 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo()); 6871 6872 // Restore the stack pointer. 6873 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 6874 6875 // Store the old link SP. 6876 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo()); 6877 } 6878 6879 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const { 6880 MachineFunction &MF = DAG.getMachineFunction(); 6881 bool isPPC64 = Subtarget.isPPC64(); 6882 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6883 6884 // Get current frame pointer save index. The users of this index will be 6885 // primarily DYNALLOC instructions. 6886 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 6887 int RASI = FI->getReturnAddrSaveIndex(); 6888 6889 // If the frame pointer save index hasn't been defined yet. 6890 if (!RASI) { 6891 // Find out what the fix offset of the frame pointer save area. 6892 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset(); 6893 // Allocate the frame index for frame pointer save area. 6894 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false); 6895 // Save the result. 6896 FI->setReturnAddrSaveIndex(RASI); 6897 } 6898 return DAG.getFrameIndex(RASI, PtrVT); 6899 } 6900 6901 SDValue 6902 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 6903 MachineFunction &MF = DAG.getMachineFunction(); 6904 bool isPPC64 = Subtarget.isPPC64(); 6905 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6906 6907 // Get current frame pointer save index. The users of this index will be 6908 // primarily DYNALLOC instructions. 6909 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 6910 int FPSI = FI->getFramePointerSaveIndex(); 6911 6912 // If the frame pointer save index hasn't been defined yet. 6913 if (!FPSI) { 6914 // Find out what the fix offset of the frame pointer save area. 6915 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset(); 6916 // Allocate the frame index for frame pointer save area. 6917 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 6918 // Save the result. 6919 FI->setFramePointerSaveIndex(FPSI); 6920 } 6921 return DAG.getFrameIndex(FPSI, PtrVT); 6922 } 6923 6924 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 6925 SelectionDAG &DAG) const { 6926 // Get the inputs. 6927 SDValue Chain = Op.getOperand(0); 6928 SDValue Size = Op.getOperand(1); 6929 SDLoc dl(Op); 6930 6931 // Get the correct type for pointers. 6932 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6933 // Negate the size. 6934 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 6935 DAG.getConstant(0, dl, PtrVT), Size); 6936 // Construct a node for the frame pointer save index. 6937 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 6938 // Build a DYNALLOC node. 6939 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 6940 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 6941 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); 6942 } 6943 6944 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op, 6945 SelectionDAG &DAG) const { 6946 MachineFunction &MF = DAG.getMachineFunction(); 6947 6948 bool isPPC64 = Subtarget.isPPC64(); 6949 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6950 6951 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false); 6952 return DAG.getFrameIndex(FI, PtrVT); 6953 } 6954 6955 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, 6956 SelectionDAG &DAG) const { 6957 SDLoc DL(Op); 6958 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, 6959 DAG.getVTList(MVT::i32, MVT::Other), 6960 Op.getOperand(0), Op.getOperand(1)); 6961 } 6962 6963 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, 6964 SelectionDAG &DAG) const { 6965 SDLoc DL(Op); 6966 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, 6967 Op.getOperand(0), Op.getOperand(1)); 6968 } 6969 6970 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 6971 if (Op.getValueType().isVector()) 6972 return LowerVectorLoad(Op, DAG); 6973 6974 assert(Op.getValueType() == MVT::i1 && 6975 "Custom lowering only for i1 loads"); 6976 6977 // First, load 8 bits into 32 bits, then truncate to 1 bit. 6978 6979 SDLoc dl(Op); 6980 LoadSDNode *LD = cast<LoadSDNode>(Op); 6981 6982 SDValue Chain = LD->getChain(); 6983 SDValue BasePtr = LD->getBasePtr(); 6984 MachineMemOperand *MMO = LD->getMemOperand(); 6985 6986 SDValue NewLD = 6987 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain, 6988 BasePtr, MVT::i8, MMO); 6989 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); 6990 6991 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; 6992 return DAG.getMergeValues(Ops, dl); 6993 } 6994 6995 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 6996 if (Op.getOperand(1).getValueType().isVector()) 6997 return LowerVectorStore(Op, DAG); 6998 6999 assert(Op.getOperand(1).getValueType() == MVT::i1 && 7000 "Custom lowering only for i1 stores"); 7001 7002 // First, zero extend to 32 bits, then use a truncating store to 8 bits. 7003 7004 SDLoc dl(Op); 7005 StoreSDNode *ST = cast<StoreSDNode>(Op); 7006 7007 SDValue Chain = ST->getChain(); 7008 SDValue BasePtr = ST->getBasePtr(); 7009 SDValue Value = ST->getValue(); 7010 MachineMemOperand *MMO = ST->getMemOperand(); 7011 7012 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()), 7013 Value); 7014 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); 7015 } 7016 7017 // FIXME: Remove this once the ANDI glue bug is fixed: 7018 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { 7019 assert(Op.getValueType() == MVT::i1 && 7020 "Custom lowering only for i1 results"); 7021 7022 SDLoc DL(Op); 7023 return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, 7024 Op.getOperand(0)); 7025 } 7026 7027 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op, 7028 SelectionDAG &DAG) const { 7029 7030 // Implements a vector truncate that fits in a vector register as a shuffle. 7031 // We want to legalize vector truncates down to where the source fits in 7032 // a vector register (and target is therefore smaller than vector register 7033 // size). At that point legalization will try to custom lower the sub-legal 7034 // result and get here - where we can contain the truncate as a single target 7035 // operation. 7036 7037 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows: 7038 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2> 7039 // 7040 // We will implement it for big-endian ordering as this (where x denotes 7041 // undefined): 7042 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to 7043 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u> 7044 // 7045 // The same operation in little-endian ordering will be: 7046 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to 7047 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1> 7048 7049 assert(Op.getValueType().isVector() && "Vector type expected."); 7050 7051 SDLoc DL(Op); 7052 SDValue N1 = Op.getOperand(0); 7053 unsigned SrcSize = N1.getValueType().getSizeInBits(); 7054 assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector"); 7055 SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL); 7056 7057 EVT TrgVT = Op.getValueType(); 7058 unsigned TrgNumElts = TrgVT.getVectorNumElements(); 7059 EVT EltVT = TrgVT.getVectorElementType(); 7060 unsigned WideNumElts = 128 / EltVT.getSizeInBits(); 7061 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts); 7062 7063 // First list the elements we want to keep. 7064 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits(); 7065 SmallVector<int, 16> ShuffV; 7066 if (Subtarget.isLittleEndian()) 7067 for (unsigned i = 0; i < TrgNumElts; ++i) 7068 ShuffV.push_back(i * SizeMult); 7069 else 7070 for (unsigned i = 1; i <= TrgNumElts; ++i) 7071 ShuffV.push_back(i * SizeMult - 1); 7072 7073 // Populate the remaining elements with undefs. 7074 for (unsigned i = TrgNumElts; i < WideNumElts; ++i) 7075 // ShuffV.push_back(i + WideNumElts); 7076 ShuffV.push_back(WideNumElts + 1); 7077 7078 SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc); 7079 return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV); 7080 } 7081 7082 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 7083 /// possible. 7084 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 7085 // Not FP? Not a fsel. 7086 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 7087 !Op.getOperand(2).getValueType().isFloatingPoint()) 7088 return Op; 7089 7090 // We might be able to do better than this under some circumstances, but in 7091 // general, fsel-based lowering of select is a finite-math-only optimization. 7092 // For more information, see section F.3 of the 2.06 ISA specification. 7093 if (!DAG.getTarget().Options.NoInfsFPMath || 7094 !DAG.getTarget().Options.NoNaNsFPMath) 7095 return Op; 7096 // TODO: Propagate flags from the select rather than global settings. 7097 SDNodeFlags Flags; 7098 Flags.setNoInfs(true); 7099 Flags.setNoNaNs(true); 7100 7101 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 7102 7103 EVT ResVT = Op.getValueType(); 7104 EVT CmpVT = Op.getOperand(0).getValueType(); 7105 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 7106 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 7107 SDLoc dl(Op); 7108 7109 // If the RHS of the comparison is a 0.0, we don't need to do the 7110 // subtraction at all. 7111 SDValue Sel1; 7112 if (isFloatingPointZero(RHS)) 7113 switch (CC) { 7114 default: break; // SETUO etc aren't handled by fsel. 7115 case ISD::SETNE: 7116 std::swap(TV, FV); 7117 LLVM_FALLTHROUGH; 7118 case ISD::SETEQ: 7119 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 7120 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 7121 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 7122 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 7123 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 7124 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 7125 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); 7126 case ISD::SETULT: 7127 case ISD::SETLT: 7128 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 7129 LLVM_FALLTHROUGH; 7130 case ISD::SETOGE: 7131 case ISD::SETGE: 7132 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 7133 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 7134 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 7135 case ISD::SETUGT: 7136 case ISD::SETGT: 7137 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 7138 LLVM_FALLTHROUGH; 7139 case ISD::SETOLE: 7140 case ISD::SETLE: 7141 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 7142 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 7143 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 7144 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 7145 } 7146 7147 SDValue Cmp; 7148 switch (CC) { 7149 default: break; // SETUO etc aren't handled by fsel. 7150 case ISD::SETNE: 7151 std::swap(TV, FV); 7152 LLVM_FALLTHROUGH; 7153 case ISD::SETEQ: 7154 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); 7155 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 7156 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 7157 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 7158 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 7159 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 7160 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 7161 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); 7162 case ISD::SETULT: 7163 case ISD::SETLT: 7164 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); 7165 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 7166 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 7167 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 7168 case ISD::SETOGE: 7169 case ISD::SETGE: 7170 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); 7171 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 7172 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 7173 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 7174 case ISD::SETUGT: 7175 case ISD::SETGT: 7176 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags); 7177 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 7178 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 7179 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 7180 case ISD::SETOLE: 7181 case ISD::SETLE: 7182 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags); 7183 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 7184 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 7185 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 7186 } 7187 return Op; 7188 } 7189 7190 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, 7191 SelectionDAG &DAG, 7192 const SDLoc &dl) const { 7193 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 7194 SDValue Src = Op.getOperand(0); 7195 if (Src.getValueType() == MVT::f32) 7196 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 7197 7198 SDValue Tmp; 7199 switch (Op.getSimpleValueType().SimpleTy) { 7200 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 7201 case MVT::i32: 7202 Tmp = DAG.getNode( 7203 Op.getOpcode() == ISD::FP_TO_SINT 7204 ? PPCISD::FCTIWZ 7205 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), 7206 dl, MVT::f64, Src); 7207 break; 7208 case MVT::i64: 7209 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 7210 "i64 FP_TO_UINT is supported only with FPCVT"); 7211 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 7212 PPCISD::FCTIDUZ, 7213 dl, MVT::f64, Src); 7214 break; 7215 } 7216 7217 // Convert the FP value to an int value through memory. 7218 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && 7219 (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); 7220 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); 7221 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); 7222 MachinePointerInfo MPI = 7223 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); 7224 7225 // Emit a store to the stack slot. 7226 SDValue Chain; 7227 if (i32Stack) { 7228 MachineFunction &MF = DAG.getMachineFunction(); 7229 MachineMemOperand *MMO = 7230 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); 7231 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; 7232 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 7233 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); 7234 } else 7235 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI); 7236 7237 // Result is a load from the stack slot. If loading 4 bytes, make sure to 7238 // add in a bias on big endian. 7239 if (Op.getValueType() == MVT::i32 && !i32Stack) { 7240 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 7241 DAG.getConstant(4, dl, FIPtr.getValueType())); 7242 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4); 7243 } 7244 7245 RLI.Chain = Chain; 7246 RLI.Ptr = FIPtr; 7247 RLI.MPI = MPI; 7248 } 7249 7250 /// Custom lowers floating point to integer conversions to use 7251 /// the direct move instructions available in ISA 2.07 to avoid the 7252 /// need for load/store combinations. 7253 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, 7254 SelectionDAG &DAG, 7255 const SDLoc &dl) const { 7256 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 7257 SDValue Src = Op.getOperand(0); 7258 7259 if (Src.getValueType() == MVT::f32) 7260 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 7261 7262 SDValue Tmp; 7263 switch (Op.getSimpleValueType().SimpleTy) { 7264 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 7265 case MVT::i32: 7266 Tmp = DAG.getNode( 7267 Op.getOpcode() == ISD::FP_TO_SINT 7268 ? PPCISD::FCTIWZ 7269 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), 7270 dl, MVT::f64, Src); 7271 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); 7272 break; 7273 case MVT::i64: 7274 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 7275 "i64 FP_TO_UINT is supported only with FPCVT"); 7276 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 7277 PPCISD::FCTIDUZ, 7278 dl, MVT::f64, Src); 7279 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); 7280 break; 7281 } 7282 return Tmp; 7283 } 7284 7285 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 7286 const SDLoc &dl) const { 7287 7288 // FP to INT conversions are legal for f128. 7289 if (EnableQuadPrecision && (Op->getOperand(0).getValueType() == MVT::f128)) 7290 return Op; 7291 7292 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on 7293 // PPC (the libcall is not available). 7294 if (Op.getOperand(0).getValueType() == MVT::ppcf128) { 7295 if (Op.getValueType() == MVT::i32) { 7296 if (Op.getOpcode() == ISD::FP_TO_SINT) { 7297 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 7298 MVT::f64, Op.getOperand(0), 7299 DAG.getIntPtrConstant(0, dl)); 7300 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 7301 MVT::f64, Op.getOperand(0), 7302 DAG.getIntPtrConstant(1, dl)); 7303 7304 // Add the two halves of the long double in round-to-zero mode. 7305 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); 7306 7307 // Now use a smaller FP_TO_SINT. 7308 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); 7309 } 7310 if (Op.getOpcode() == ISD::FP_TO_UINT) { 7311 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; 7312 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31)); 7313 SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128); 7314 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X 7315 // FIXME: generated code sucks. 7316 // TODO: Are there fast-math-flags to propagate to this FSUB? 7317 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, 7318 Op.getOperand(0), Tmp); 7319 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True); 7320 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, 7321 DAG.getConstant(0x80000000, dl, MVT::i32)); 7322 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, 7323 Op.getOperand(0)); 7324 return DAG.getSelectCC(dl, Op.getOperand(0), Tmp, True, False, 7325 ISD::SETGE); 7326 } 7327 } 7328 7329 return SDValue(); 7330 } 7331 7332 if (Subtarget.hasDirectMove() && Subtarget.isPPC64()) 7333 return LowerFP_TO_INTDirectMove(Op, DAG, dl); 7334 7335 ReuseLoadInfo RLI; 7336 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 7337 7338 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, 7339 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges); 7340 } 7341 7342 // We're trying to insert a regular store, S, and then a load, L. If the 7343 // incoming value, O, is a load, we might just be able to have our load use the 7344 // address used by O. However, we don't know if anything else will store to 7345 // that address before we can load from it. To prevent this situation, we need 7346 // to insert our load, L, into the chain as a peer of O. To do this, we give L 7347 // the same chain operand as O, we create a token factor from the chain results 7348 // of O and L, and we replace all uses of O's chain result with that token 7349 // factor (see spliceIntoChain below for this last part). 7350 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, 7351 ReuseLoadInfo &RLI, 7352 SelectionDAG &DAG, 7353 ISD::LoadExtType ET) const { 7354 SDLoc dl(Op); 7355 if (ET == ISD::NON_EXTLOAD && 7356 (Op.getOpcode() == ISD::FP_TO_UINT || 7357 Op.getOpcode() == ISD::FP_TO_SINT) && 7358 isOperationLegalOrCustom(Op.getOpcode(), 7359 Op.getOperand(0).getValueType())) { 7360 7361 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 7362 return true; 7363 } 7364 7365 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op); 7366 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() || 7367 LD->isNonTemporal()) 7368 return false; 7369 if (LD->getMemoryVT() != MemVT) 7370 return false; 7371 7372 RLI.Ptr = LD->getBasePtr(); 7373 if (LD->isIndexed() && !LD->getOffset().isUndef()) { 7374 assert(LD->getAddressingMode() == ISD::PRE_INC && 7375 "Non-pre-inc AM on PPC?"); 7376 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, 7377 LD->getOffset()); 7378 } 7379 7380 RLI.Chain = LD->getChain(); 7381 RLI.MPI = LD->getPointerInfo(); 7382 RLI.IsDereferenceable = LD->isDereferenceable(); 7383 RLI.IsInvariant = LD->isInvariant(); 7384 RLI.Alignment = LD->getAlignment(); 7385 RLI.AAInfo = LD->getAAInfo(); 7386 RLI.Ranges = LD->getRanges(); 7387 7388 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1); 7389 return true; 7390 } 7391 7392 // Given the head of the old chain, ResChain, insert a token factor containing 7393 // it and NewResChain, and make users of ResChain now be users of that token 7394 // factor. 7395 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead. 7396 void PPCTargetLowering::spliceIntoChain(SDValue ResChain, 7397 SDValue NewResChain, 7398 SelectionDAG &DAG) const { 7399 if (!ResChain) 7400 return; 7401 7402 SDLoc dl(NewResChain); 7403 7404 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 7405 NewResChain, DAG.getUNDEF(MVT::Other)); 7406 assert(TF.getNode() != NewResChain.getNode() && 7407 "A new TF really is required here"); 7408 7409 DAG.ReplaceAllUsesOfValueWith(ResChain, TF); 7410 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); 7411 } 7412 7413 /// Analyze profitability of direct move 7414 /// prefer float load to int load plus direct move 7415 /// when there is no integer use of int load 7416 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const { 7417 SDNode *Origin = Op.getOperand(0).getNode(); 7418 if (Origin->getOpcode() != ISD::LOAD) 7419 return true; 7420 7421 // If there is no LXSIBZX/LXSIHZX, like Power8, 7422 // prefer direct move if the memory size is 1 or 2 bytes. 7423 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand(); 7424 if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2) 7425 return true; 7426 7427 for (SDNode::use_iterator UI = Origin->use_begin(), 7428 UE = Origin->use_end(); 7429 UI != UE; ++UI) { 7430 7431 // Only look at the users of the loaded value. 7432 if (UI.getUse().get().getResNo() != 0) 7433 continue; 7434 7435 if (UI->getOpcode() != ISD::SINT_TO_FP && 7436 UI->getOpcode() != ISD::UINT_TO_FP) 7437 return true; 7438 } 7439 7440 return false; 7441 } 7442 7443 /// Custom lowers integer to floating point conversions to use 7444 /// the direct move instructions available in ISA 2.07 to avoid the 7445 /// need for load/store combinations. 7446 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, 7447 SelectionDAG &DAG, 7448 const SDLoc &dl) const { 7449 assert((Op.getValueType() == MVT::f32 || 7450 Op.getValueType() == MVT::f64) && 7451 "Invalid floating point type as target of conversion"); 7452 assert(Subtarget.hasFPCVT() && 7453 "Int to FP conversions with direct moves require FPCVT"); 7454 SDValue FP; 7455 SDValue Src = Op.getOperand(0); 7456 bool SinglePrec = Op.getValueType() == MVT::f32; 7457 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; 7458 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; 7459 unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) : 7460 (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU); 7461 7462 if (WordInt) { 7463 FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ, 7464 dl, MVT::f64, Src); 7465 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); 7466 } 7467 else { 7468 FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); 7469 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); 7470 } 7471 7472 return FP; 7473 } 7474 7475 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) { 7476 7477 EVT VecVT = Vec.getValueType(); 7478 assert(VecVT.isVector() && "Expected a vector type."); 7479 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width."); 7480 7481 EVT EltVT = VecVT.getVectorElementType(); 7482 unsigned WideNumElts = 128 / EltVT.getSizeInBits(); 7483 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts); 7484 7485 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements(); 7486 SmallVector<SDValue, 16> Ops(NumConcat); 7487 Ops[0] = Vec; 7488 SDValue UndefVec = DAG.getUNDEF(VecVT); 7489 for (unsigned i = 1; i < NumConcat; ++i) 7490 Ops[i] = UndefVec; 7491 7492 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops); 7493 } 7494 7495 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG, 7496 const SDLoc &dl) const { 7497 7498 unsigned Opc = Op.getOpcode(); 7499 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) && 7500 "Unexpected conversion type"); 7501 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && 7502 "Supports conversions to v2f64/v4f32 only."); 7503 7504 bool SignedConv = Opc == ISD::SINT_TO_FP; 7505 bool FourEltRes = Op.getValueType() == MVT::v4f32; 7506 7507 SDValue Wide = widenVec(DAG, Op.getOperand(0), dl); 7508 EVT WideVT = Wide.getValueType(); 7509 unsigned WideNumElts = WideVT.getVectorNumElements(); 7510 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64; 7511 7512 SmallVector<int, 16> ShuffV; 7513 for (unsigned i = 0; i < WideNumElts; ++i) 7514 ShuffV.push_back(i + WideNumElts); 7515 7516 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2; 7517 int SaveElts = FourEltRes ? 4 : 2; 7518 if (Subtarget.isLittleEndian()) 7519 for (int i = 0; i < SaveElts; i++) 7520 ShuffV[i * Stride] = i; 7521 else 7522 for (int i = 1; i <= SaveElts; i++) 7523 ShuffV[i * Stride - 1] = i - 1; 7524 7525 SDValue ShuffleSrc2 = 7526 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT); 7527 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV); 7528 unsigned ExtendOp = 7529 SignedConv ? (unsigned)PPCISD::SExtVElems : (unsigned)ISD::BITCAST; 7530 7531 SDValue Extend; 7532 if (!Subtarget.hasP9Altivec() && SignedConv) { 7533 Arrange = DAG.getBitcast(IntermediateVT, Arrange); 7534 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange, 7535 DAG.getValueType(Op.getOperand(0).getValueType())); 7536 } else 7537 Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange); 7538 7539 return DAG.getNode(Opc, dl, Op.getValueType(), Extend); 7540 } 7541 7542 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, 7543 SelectionDAG &DAG) const { 7544 SDLoc dl(Op); 7545 7546 EVT InVT = Op.getOperand(0).getValueType(); 7547 EVT OutVT = Op.getValueType(); 7548 if (OutVT.isVector() && OutVT.isFloatingPoint() && 7549 isOperationCustom(Op.getOpcode(), InVT)) 7550 return LowerINT_TO_FPVector(Op, DAG, dl); 7551 7552 // Conversions to f128 are legal. 7553 if (EnableQuadPrecision && (Op.getValueType() == MVT::f128)) 7554 return Op; 7555 7556 if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) { 7557 if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64) 7558 return SDValue(); 7559 7560 SDValue Value = Op.getOperand(0); 7561 // The values are now known to be -1 (false) or 1 (true). To convert this 7562 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 7563 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 7564 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 7565 7566 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 7567 7568 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 7569 7570 if (Op.getValueType() != MVT::v4f64) 7571 Value = DAG.getNode(ISD::FP_ROUND, dl, 7572 Op.getValueType(), Value, 7573 DAG.getIntPtrConstant(1, dl)); 7574 return Value; 7575 } 7576 7577 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 7578 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 7579 return SDValue(); 7580 7581 if (Op.getOperand(0).getValueType() == MVT::i1) 7582 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), 7583 DAG.getConstantFP(1.0, dl, Op.getValueType()), 7584 DAG.getConstantFP(0.0, dl, Op.getValueType())); 7585 7586 // If we have direct moves, we can do all the conversion, skip the store/load 7587 // however, without FPCVT we can't do most conversions. 7588 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) && 7589 Subtarget.isPPC64() && Subtarget.hasFPCVT()) 7590 return LowerINT_TO_FPDirectMove(Op, DAG, dl); 7591 7592 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 7593 "UINT_TO_FP is supported only with FPCVT"); 7594 7595 // If we have FCFIDS, then use it when converting to single-precision. 7596 // Otherwise, convert to double-precision and then round. 7597 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 7598 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS 7599 : PPCISD::FCFIDS) 7600 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU 7601 : PPCISD::FCFID); 7602 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 7603 ? MVT::f32 7604 : MVT::f64; 7605 7606 if (Op.getOperand(0).getValueType() == MVT::i64) { 7607 SDValue SINT = Op.getOperand(0); 7608 // When converting to single-precision, we actually need to convert 7609 // to double-precision first and then round to single-precision. 7610 // To avoid double-rounding effects during that operation, we have 7611 // to prepare the input operand. Bits that might be truncated when 7612 // converting to double-precision are replaced by a bit that won't 7613 // be lost at this stage, but is below the single-precision rounding 7614 // position. 7615 // 7616 // However, if -enable-unsafe-fp-math is in effect, accept double 7617 // rounding to avoid the extra overhead. 7618 if (Op.getValueType() == MVT::f32 && 7619 !Subtarget.hasFPCVT() && 7620 !DAG.getTarget().Options.UnsafeFPMath) { 7621 7622 // Twiddle input to make sure the low 11 bits are zero. (If this 7623 // is the case, we are guaranteed the value will fit into the 53 bit 7624 // mantissa of an IEEE double-precision value without rounding.) 7625 // If any of those low 11 bits were not zero originally, make sure 7626 // bit 12 (value 2048) is set instead, so that the final rounding 7627 // to single-precision gets the correct result. 7628 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, 7629 SINT, DAG.getConstant(2047, dl, MVT::i64)); 7630 Round = DAG.getNode(ISD::ADD, dl, MVT::i64, 7631 Round, DAG.getConstant(2047, dl, MVT::i64)); 7632 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); 7633 Round = DAG.getNode(ISD::AND, dl, MVT::i64, 7634 Round, DAG.getConstant(-2048, dl, MVT::i64)); 7635 7636 // However, we cannot use that value unconditionally: if the magnitude 7637 // of the input value is small, the bit-twiddling we did above might 7638 // end up visibly changing the output. Fortunately, in that case, we 7639 // don't need to twiddle bits since the original input will convert 7640 // exactly to double-precision floating-point already. Therefore, 7641 // construct a conditional to use the original value if the top 11 7642 // bits are all sign-bit copies, and use the rounded value computed 7643 // above otherwise. 7644 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, 7645 SINT, DAG.getConstant(53, dl, MVT::i32)); 7646 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, 7647 Cond, DAG.getConstant(1, dl, MVT::i64)); 7648 Cond = DAG.getSetCC(dl, MVT::i32, 7649 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT); 7650 7651 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); 7652 } 7653 7654 ReuseLoadInfo RLI; 7655 SDValue Bits; 7656 7657 MachineFunction &MF = DAG.getMachineFunction(); 7658 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { 7659 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, 7660 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges); 7661 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 7662 } else if (Subtarget.hasLFIWAX() && 7663 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) { 7664 MachineMemOperand *MMO = 7665 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 7666 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 7667 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 7668 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, 7669 DAG.getVTList(MVT::f64, MVT::Other), 7670 Ops, MVT::i32, MMO); 7671 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 7672 } else if (Subtarget.hasFPCVT() && 7673 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) { 7674 MachineMemOperand *MMO = 7675 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 7676 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 7677 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 7678 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl, 7679 DAG.getVTList(MVT::f64, MVT::Other), 7680 Ops, MVT::i32, MMO); 7681 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 7682 } else if (((Subtarget.hasLFIWAX() && 7683 SINT.getOpcode() == ISD::SIGN_EXTEND) || 7684 (Subtarget.hasFPCVT() && 7685 SINT.getOpcode() == ISD::ZERO_EXTEND)) && 7686 SINT.getOperand(0).getValueType() == MVT::i32) { 7687 MachineFrameInfo &MFI = MF.getFrameInfo(); 7688 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 7689 7690 int FrameIdx = MFI.CreateStackObject(4, 4, false); 7691 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7692 7693 SDValue Store = 7694 DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, 7695 MachinePointerInfo::getFixedStack( 7696 DAG.getMachineFunction(), FrameIdx)); 7697 7698 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 7699 "Expected an i32 store"); 7700 7701 RLI.Ptr = FIdx; 7702 RLI.Chain = Store; 7703 RLI.MPI = 7704 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 7705 RLI.Alignment = 4; 7706 7707 MachineMemOperand *MMO = 7708 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 7709 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 7710 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 7711 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ? 7712 PPCISD::LFIWZX : PPCISD::LFIWAX, 7713 dl, DAG.getVTList(MVT::f64, MVT::Other), 7714 Ops, MVT::i32, MMO); 7715 } else 7716 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); 7717 7718 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); 7719 7720 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 7721 FP = DAG.getNode(ISD::FP_ROUND, dl, 7722 MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); 7723 return FP; 7724 } 7725 7726 assert(Op.getOperand(0).getValueType() == MVT::i32 && 7727 "Unhandled INT_TO_FP type in custom expander!"); 7728 // Since we only generate this in 64-bit mode, we can take advantage of 7729 // 64-bit registers. In particular, sign extend the input value into the 7730 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 7731 // then lfd it and fcfid it. 7732 MachineFunction &MF = DAG.getMachineFunction(); 7733 MachineFrameInfo &MFI = MF.getFrameInfo(); 7734 EVT PtrVT = getPointerTy(MF.getDataLayout()); 7735 7736 SDValue Ld; 7737 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { 7738 ReuseLoadInfo RLI; 7739 bool ReusingLoad; 7740 if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, 7741 DAG))) { 7742 int FrameIdx = MFI.CreateStackObject(4, 4, false); 7743 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7744 7745 SDValue Store = 7746 DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 7747 MachinePointerInfo::getFixedStack( 7748 DAG.getMachineFunction(), FrameIdx)); 7749 7750 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 7751 "Expected an i32 store"); 7752 7753 RLI.Ptr = FIdx; 7754 RLI.Chain = Store; 7755 RLI.MPI = 7756 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 7757 RLI.Alignment = 4; 7758 } 7759 7760 MachineMemOperand *MMO = 7761 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 7762 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 7763 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 7764 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? 7765 PPCISD::LFIWZX : PPCISD::LFIWAX, 7766 dl, DAG.getVTList(MVT::f64, MVT::Other), 7767 Ops, MVT::i32, MMO); 7768 if (ReusingLoad) 7769 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); 7770 } else { 7771 assert(Subtarget.isPPC64() && 7772 "i32->FP without LFIWAX supported only on PPC64"); 7773 7774 int FrameIdx = MFI.CreateStackObject(8, 8, false); 7775 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7776 7777 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, 7778 Op.getOperand(0)); 7779 7780 // STD the extended value into the stack slot. 7781 SDValue Store = DAG.getStore( 7782 DAG.getEntryNode(), dl, Ext64, FIdx, 7783 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); 7784 7785 // Load the value as a double. 7786 Ld = DAG.getLoad( 7787 MVT::f64, dl, Store, FIdx, 7788 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); 7789 } 7790 7791 // FCFID it and return it. 7792 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); 7793 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 7794 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, 7795 DAG.getIntPtrConstant(0, dl)); 7796 return FP; 7797 } 7798 7799 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 7800 SelectionDAG &DAG) const { 7801 SDLoc dl(Op); 7802 /* 7803 The rounding mode is in bits 30:31 of FPSR, and has the following 7804 settings: 7805 00 Round to nearest 7806 01 Round to 0 7807 10 Round to +inf 7808 11 Round to -inf 7809 7810 FLT_ROUNDS, on the other hand, expects the following: 7811 -1 Undefined 7812 0 Round to 0 7813 1 Round to nearest 7814 2 Round to +inf 7815 3 Round to -inf 7816 7817 To perform the conversion, we do: 7818 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 7819 */ 7820 7821 MachineFunction &MF = DAG.getMachineFunction(); 7822 EVT VT = Op.getValueType(); 7823 EVT PtrVT = getPointerTy(MF.getDataLayout()); 7824 7825 // Save FP Control Word to register 7826 EVT NodeTys[] = { 7827 MVT::f64, // return register 7828 MVT::Glue // unused in this context 7829 }; 7830 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); 7831 7832 // Save FP register to stack slot 7833 int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false); 7834 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 7835 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot, 7836 MachinePointerInfo()); 7837 7838 // Load FP Control Word from low 32 bits of stack slot. 7839 SDValue Four = DAG.getConstant(4, dl, PtrVT); 7840 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 7841 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo()); 7842 7843 // Transform as necessary 7844 SDValue CWD1 = 7845 DAG.getNode(ISD::AND, dl, MVT::i32, 7846 CWD, DAG.getConstant(3, dl, MVT::i32)); 7847 SDValue CWD2 = 7848 DAG.getNode(ISD::SRL, dl, MVT::i32, 7849 DAG.getNode(ISD::AND, dl, MVT::i32, 7850 DAG.getNode(ISD::XOR, dl, MVT::i32, 7851 CWD, DAG.getConstant(3, dl, MVT::i32)), 7852 DAG.getConstant(3, dl, MVT::i32)), 7853 DAG.getConstant(1, dl, MVT::i32)); 7854 7855 SDValue RetVal = 7856 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 7857 7858 return DAG.getNode((VT.getSizeInBits() < 16 ? 7859 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 7860 } 7861 7862 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 7863 EVT VT = Op.getValueType(); 7864 unsigned BitWidth = VT.getSizeInBits(); 7865 SDLoc dl(Op); 7866 assert(Op.getNumOperands() == 3 && 7867 VT == Op.getOperand(1).getValueType() && 7868 "Unexpected SHL!"); 7869 7870 // Expand into a bunch of logical ops. Note that these ops 7871 // depend on the PPC behavior for oversized shift amounts. 7872 SDValue Lo = Op.getOperand(0); 7873 SDValue Hi = Op.getOperand(1); 7874 SDValue Amt = Op.getOperand(2); 7875 EVT AmtVT = Amt.getValueType(); 7876 7877 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 7878 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 7879 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 7880 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 7881 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 7882 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 7883 DAG.getConstant(-BitWidth, dl, AmtVT)); 7884 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 7885 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 7886 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 7887 SDValue OutOps[] = { OutLo, OutHi }; 7888 return DAG.getMergeValues(OutOps, dl); 7889 } 7890 7891 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 7892 EVT VT = Op.getValueType(); 7893 SDLoc dl(Op); 7894 unsigned BitWidth = VT.getSizeInBits(); 7895 assert(Op.getNumOperands() == 3 && 7896 VT == Op.getOperand(1).getValueType() && 7897 "Unexpected SRL!"); 7898 7899 // Expand into a bunch of logical ops. Note that these ops 7900 // depend on the PPC behavior for oversized shift amounts. 7901 SDValue Lo = Op.getOperand(0); 7902 SDValue Hi = Op.getOperand(1); 7903 SDValue Amt = Op.getOperand(2); 7904 EVT AmtVT = Amt.getValueType(); 7905 7906 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 7907 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 7908 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 7909 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 7910 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 7911 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 7912 DAG.getConstant(-BitWidth, dl, AmtVT)); 7913 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 7914 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 7915 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 7916 SDValue OutOps[] = { OutLo, OutHi }; 7917 return DAG.getMergeValues(OutOps, dl); 7918 } 7919 7920 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 7921 SDLoc dl(Op); 7922 EVT VT = Op.getValueType(); 7923 unsigned BitWidth = VT.getSizeInBits(); 7924 assert(Op.getNumOperands() == 3 && 7925 VT == Op.getOperand(1).getValueType() && 7926 "Unexpected SRA!"); 7927 7928 // Expand into a bunch of logical ops, followed by a select_cc. 7929 SDValue Lo = Op.getOperand(0); 7930 SDValue Hi = Op.getOperand(1); 7931 SDValue Amt = Op.getOperand(2); 7932 EVT AmtVT = Amt.getValueType(); 7933 7934 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 7935 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 7936 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 7937 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 7938 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 7939 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 7940 DAG.getConstant(-BitWidth, dl, AmtVT)); 7941 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 7942 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 7943 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT), 7944 Tmp4, Tmp6, ISD::SETLE); 7945 SDValue OutOps[] = { OutLo, OutHi }; 7946 return DAG.getMergeValues(OutOps, dl); 7947 } 7948 7949 //===----------------------------------------------------------------------===// 7950 // Vector related lowering. 7951 // 7952 7953 /// BuildSplatI - Build a canonical splati of Val with an element size of 7954 /// SplatSize. Cast the result to VT. 7955 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 7956 SelectionDAG &DAG, const SDLoc &dl) { 7957 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 7958 7959 static const MVT VTys[] = { // canonical VT to use for each size. 7960 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 7961 }; 7962 7963 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 7964 7965 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 7966 if (Val == -1) 7967 SplatSize = 1; 7968 7969 EVT CanonicalVT = VTys[SplatSize-1]; 7970 7971 // Build a canonical splat for this value. 7972 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT)); 7973 } 7974 7975 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the 7976 /// specified intrinsic ID. 7977 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, 7978 const SDLoc &dl, EVT DestVT = MVT::Other) { 7979 if (DestVT == MVT::Other) DestVT = Op.getValueType(); 7980 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7981 DAG.getConstant(IID, dl, MVT::i32), Op); 7982 } 7983 7984 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the 7985 /// specified intrinsic ID. 7986 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 7987 SelectionDAG &DAG, const SDLoc &dl, 7988 EVT DestVT = MVT::Other) { 7989 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 7990 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7991 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS); 7992 } 7993 7994 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 7995 /// specified intrinsic ID. 7996 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 7997 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl, 7998 EVT DestVT = MVT::Other) { 7999 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 8000 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 8001 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2); 8002 } 8003 8004 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 8005 /// amount. The result has the specified value type. 8006 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, 8007 SelectionDAG &DAG, const SDLoc &dl) { 8008 // Force LHS/RHS to be the right type. 8009 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 8010 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 8011 8012 int Ops[16]; 8013 for (unsigned i = 0; i != 16; ++i) 8014 Ops[i] = i + Amt; 8015 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 8016 return DAG.getNode(ISD::BITCAST, dl, VT, T); 8017 } 8018 8019 /// Do we have an efficient pattern in a .td file for this node? 8020 /// 8021 /// \param V - pointer to the BuildVectorSDNode being matched 8022 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves? 8023 /// 8024 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR 8025 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where 8026 /// the opposite is true (expansion is beneficial) are: 8027 /// - The node builds a vector out of integers that are not 32 or 64-bits 8028 /// - The node builds a vector out of constants 8029 /// - The node is a "load-and-splat" 8030 /// In all other cases, we will choose to keep the BUILD_VECTOR. 8031 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, 8032 bool HasDirectMove, 8033 bool HasP8Vector) { 8034 EVT VecVT = V->getValueType(0); 8035 bool RightType = VecVT == MVT::v2f64 || 8036 (HasP8Vector && VecVT == MVT::v4f32) || 8037 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32)); 8038 if (!RightType) 8039 return false; 8040 8041 bool IsSplat = true; 8042 bool IsLoad = false; 8043 SDValue Op0 = V->getOperand(0); 8044 8045 // This function is called in a block that confirms the node is not a constant 8046 // splat. So a constant BUILD_VECTOR here means the vector is built out of 8047 // different constants. 8048 if (V->isConstant()) 8049 return false; 8050 for (int i = 0, e = V->getNumOperands(); i < e; ++i) { 8051 if (V->getOperand(i).isUndef()) 8052 return false; 8053 // We want to expand nodes that represent load-and-splat even if the 8054 // loaded value is a floating point truncation or conversion to int. 8055 if (V->getOperand(i).getOpcode() == ISD::LOAD || 8056 (V->getOperand(i).getOpcode() == ISD::FP_ROUND && 8057 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || 8058 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT && 8059 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || 8060 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT && 8061 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD)) 8062 IsLoad = true; 8063 // If the operands are different or the input is not a load and has more 8064 // uses than just this BV node, then it isn't a splat. 8065 if (V->getOperand(i) != Op0 || 8066 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode()))) 8067 IsSplat = false; 8068 } 8069 return !(IsSplat && IsLoad); 8070 } 8071 8072 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128. 8073 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { 8074 8075 SDLoc dl(Op); 8076 SDValue Op0 = Op->getOperand(0); 8077 8078 if (!EnableQuadPrecision || 8079 (Op.getValueType() != MVT::f128 ) || 8080 (Op0.getOpcode() != ISD::BUILD_PAIR) || 8081 (Op0.getOperand(0).getValueType() != MVT::i64) || 8082 (Op0.getOperand(1).getValueType() != MVT::i64)) 8083 return SDValue(); 8084 8085 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0), 8086 Op0.getOperand(1)); 8087 } 8088 8089 // If this is a case we can't handle, return null and let the default 8090 // expansion code take care of it. If we CAN select this case, and if it 8091 // selects to a single instruction, return Op. Otherwise, if we can codegen 8092 // this case more efficiently than a constant pool load, lower it to the 8093 // sequence of ops that should be used. 8094 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 8095 SelectionDAG &DAG) const { 8096 SDLoc dl(Op); 8097 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 8098 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 8099 8100 if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) { 8101 // We first build an i32 vector, load it into a QPX register, 8102 // then convert it to a floating-point vector and compare it 8103 // to a zero vector to get the boolean result. 8104 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 8105 int FrameIdx = MFI.CreateStackObject(16, 16, false); 8106 MachinePointerInfo PtrInfo = 8107 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 8108 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 8109 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 8110 8111 assert(BVN->getNumOperands() == 4 && 8112 "BUILD_VECTOR for v4i1 does not have 4 operands"); 8113 8114 bool IsConst = true; 8115 for (unsigned i = 0; i < 4; ++i) { 8116 if (BVN->getOperand(i).isUndef()) continue; 8117 if (!isa<ConstantSDNode>(BVN->getOperand(i))) { 8118 IsConst = false; 8119 break; 8120 } 8121 } 8122 8123 if (IsConst) { 8124 Constant *One = 8125 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0); 8126 Constant *NegOne = 8127 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0); 8128 8129 Constant *CV[4]; 8130 for (unsigned i = 0; i < 4; ++i) { 8131 if (BVN->getOperand(i).isUndef()) 8132 CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext())); 8133 else if (isNullConstant(BVN->getOperand(i))) 8134 CV[i] = NegOne; 8135 else 8136 CV[i] = One; 8137 } 8138 8139 Constant *CP = ConstantVector::get(CV); 8140 SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), 8141 16 /* alignment */); 8142 8143 SDValue Ops[] = {DAG.getEntryNode(), CPIdx}; 8144 SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other}); 8145 return DAG.getMemIntrinsicNode( 8146 PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32, 8147 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 8148 } 8149 8150 SmallVector<SDValue, 4> Stores; 8151 for (unsigned i = 0; i < 4; ++i) { 8152 if (BVN->getOperand(i).isUndef()) continue; 8153 8154 unsigned Offset = 4*i; 8155 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 8156 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 8157 8158 unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize(); 8159 if (StoreSize > 4) { 8160 Stores.push_back( 8161 DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx, 8162 PtrInfo.getWithOffset(Offset), MVT::i32)); 8163 } else { 8164 SDValue StoreValue = BVN->getOperand(i); 8165 if (StoreSize < 4) 8166 StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue); 8167 8168 Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx, 8169 PtrInfo.getWithOffset(Offset))); 8170 } 8171 } 8172 8173 SDValue StoreChain; 8174 if (!Stores.empty()) 8175 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 8176 else 8177 StoreChain = DAG.getEntryNode(); 8178 8179 // Now load from v4i32 into the QPX register; this will extend it to 8180 // v4i64 but not yet convert it to a floating point. Nevertheless, this 8181 // is typed as v4f64 because the QPX register integer states are not 8182 // explicitly represented. 8183 8184 SDValue Ops[] = {StoreChain, 8185 DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32), 8186 FIdx}; 8187 SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other}); 8188 8189 SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, 8190 dl, VTs, Ops, MVT::v4i32, PtrInfo); 8191 LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 8192 DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32), 8193 LoadedVect); 8194 8195 SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64); 8196 8197 return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ); 8198 } 8199 8200 // All other QPX vectors are handled by generic code. 8201 if (Subtarget.hasQPX()) 8202 return SDValue(); 8203 8204 // Check if this is a splat of a constant value. 8205 APInt APSplatBits, APSplatUndef; 8206 unsigned SplatBitSize; 8207 bool HasAnyUndefs; 8208 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 8209 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || 8210 SplatBitSize > 32) { 8211 // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be 8212 // lowered to VSX instructions under certain conditions. 8213 // Without VSX, there is no pattern more efficient than expanding the node. 8214 if (Subtarget.hasVSX() && 8215 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(), 8216 Subtarget.hasP8Vector())) 8217 return Op; 8218 return SDValue(); 8219 } 8220 8221 unsigned SplatBits = APSplatBits.getZExtValue(); 8222 unsigned SplatUndef = APSplatUndef.getZExtValue(); 8223 unsigned SplatSize = SplatBitSize / 8; 8224 8225 // First, handle single instruction cases. 8226 8227 // All zeros? 8228 if (SplatBits == 0) { 8229 // Canonicalize all zero vectors to be v4i32. 8230 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 8231 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32); 8232 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 8233 } 8234 return Op; 8235 } 8236 8237 // We have XXSPLTIB for constant splats one byte wide 8238 if (Subtarget.hasP9Vector() && SplatSize == 1) { 8239 // This is a splat of 1-byte elements with some elements potentially undef. 8240 // Rather than trying to match undef in the SDAG patterns, ensure that all 8241 // elements are the same constant. 8242 if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) { 8243 SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits, 8244 dl, MVT::i32)); 8245 SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops); 8246 if (Op.getValueType() != MVT::v16i8) 8247 return DAG.getBitcast(Op.getValueType(), NewBV); 8248 return NewBV; 8249 } 8250 8251 // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll 8252 // detect that constant splats like v8i16: 0xABAB are really just splats 8253 // of a 1-byte constant. In this case, we need to convert the node to a 8254 // splat of v16i8 and a bitcast. 8255 if (Op.getValueType() != MVT::v16i8) 8256 return DAG.getBitcast(Op.getValueType(), 8257 DAG.getConstant(SplatBits, dl, MVT::v16i8)); 8258 8259 return Op; 8260 } 8261 8262 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 8263 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 8264 (32-SplatBitSize)); 8265 if (SextVal >= -16 && SextVal <= 15) 8266 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 8267 8268 // Two instruction sequences. 8269 8270 // If this value is in the range [-32,30] and is even, use: 8271 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) 8272 // If this value is in the range [17,31] and is odd, use: 8273 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) 8274 // If this value is in the range [-31,-17] and is odd, use: 8275 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) 8276 // Note the last two are three-instruction sequences. 8277 if (SextVal >= -32 && SextVal <= 31) { 8278 // To avoid having these optimizations undone by constant folding, 8279 // we convert to a pseudo that will be expanded later into one of 8280 // the above forms. 8281 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32); 8282 EVT VT = (SplatSize == 1 ? MVT::v16i8 : 8283 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); 8284 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32); 8285 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); 8286 if (VT == Op.getValueType()) 8287 return RetVal; 8288 else 8289 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); 8290 } 8291 8292 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 8293 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 8294 // for fneg/fabs. 8295 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 8296 // Make -1 and vspltisw -1: 8297 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 8298 8299 // Make the VSLW intrinsic, computing 0x8000_0000. 8300 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 8301 OnesV, DAG, dl); 8302 8303 // xor by OnesV to invert it. 8304 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 8305 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 8306 } 8307 8308 // Check to see if this is a wide variety of vsplti*, binop self cases. 8309 static const signed char SplatCsts[] = { 8310 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 8311 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 8312 }; 8313 8314 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 8315 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 8316 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 8317 int i = SplatCsts[idx]; 8318 8319 // Figure out what shift amount will be used by altivec if shifted by i in 8320 // this splat size. 8321 unsigned TypeShiftAmt = i & (SplatBitSize-1); 8322 8323 // vsplti + shl self. 8324 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { 8325 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 8326 static const unsigned IIDs[] = { // Intrinsic to use for each size. 8327 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 8328 Intrinsic::ppc_altivec_vslw 8329 }; 8330 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 8331 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 8332 } 8333 8334 // vsplti + srl self. 8335 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 8336 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 8337 static const unsigned IIDs[] = { // Intrinsic to use for each size. 8338 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 8339 Intrinsic::ppc_altivec_vsrw 8340 }; 8341 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 8342 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 8343 } 8344 8345 // vsplti + sra self. 8346 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 8347 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 8348 static const unsigned IIDs[] = { // Intrinsic to use for each size. 8349 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 8350 Intrinsic::ppc_altivec_vsraw 8351 }; 8352 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 8353 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 8354 } 8355 8356 // vsplti + rol self. 8357 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 8358 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 8359 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 8360 static const unsigned IIDs[] = { // Intrinsic to use for each size. 8361 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 8362 Intrinsic::ppc_altivec_vrlw 8363 }; 8364 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 8365 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 8366 } 8367 8368 // t = vsplti c, result = vsldoi t, t, 1 8369 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { 8370 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 8371 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1; 8372 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 8373 } 8374 // t = vsplti c, result = vsldoi t, t, 2 8375 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { 8376 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 8377 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2; 8378 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 8379 } 8380 // t = vsplti c, result = vsldoi t, t, 3 8381 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 8382 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 8383 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3; 8384 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 8385 } 8386 } 8387 8388 return SDValue(); 8389 } 8390 8391 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 8392 /// the specified operations to build the shuffle. 8393 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 8394 SDValue RHS, SelectionDAG &DAG, 8395 const SDLoc &dl) { 8396 unsigned OpNum = (PFEntry >> 26) & 0x0F; 8397 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 8398 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 8399 8400 enum { 8401 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 8402 OP_VMRGHW, 8403 OP_VMRGLW, 8404 OP_VSPLTISW0, 8405 OP_VSPLTISW1, 8406 OP_VSPLTISW2, 8407 OP_VSPLTISW3, 8408 OP_VSLDOI4, 8409 OP_VSLDOI8, 8410 OP_VSLDOI12 8411 }; 8412 8413 if (OpNum == OP_COPY) { 8414 if (LHSID == (1*9+2)*9+3) return LHS; 8415 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 8416 return RHS; 8417 } 8418 8419 SDValue OpLHS, OpRHS; 8420 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 8421 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 8422 8423 int ShufIdxs[16]; 8424 switch (OpNum) { 8425 default: llvm_unreachable("Unknown i32 permute!"); 8426 case OP_VMRGHW: 8427 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 8428 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 8429 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 8430 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 8431 break; 8432 case OP_VMRGLW: 8433 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 8434 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 8435 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 8436 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 8437 break; 8438 case OP_VSPLTISW0: 8439 for (unsigned i = 0; i != 16; ++i) 8440 ShufIdxs[i] = (i&3)+0; 8441 break; 8442 case OP_VSPLTISW1: 8443 for (unsigned i = 0; i != 16; ++i) 8444 ShufIdxs[i] = (i&3)+4; 8445 break; 8446 case OP_VSPLTISW2: 8447 for (unsigned i = 0; i != 16; ++i) 8448 ShufIdxs[i] = (i&3)+8; 8449 break; 8450 case OP_VSPLTISW3: 8451 for (unsigned i = 0; i != 16; ++i) 8452 ShufIdxs[i] = (i&3)+12; 8453 break; 8454 case OP_VSLDOI4: 8455 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 8456 case OP_VSLDOI8: 8457 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 8458 case OP_VSLDOI12: 8459 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 8460 } 8461 EVT VT = OpLHS.getValueType(); 8462 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 8463 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 8464 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 8465 return DAG.getNode(ISD::BITCAST, dl, VT, T); 8466 } 8467 8468 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled 8469 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default 8470 /// SDValue. 8471 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N, 8472 SelectionDAG &DAG) const { 8473 const unsigned BytesInVector = 16; 8474 bool IsLE = Subtarget.isLittleEndian(); 8475 SDLoc dl(N); 8476 SDValue V1 = N->getOperand(0); 8477 SDValue V2 = N->getOperand(1); 8478 unsigned ShiftElts = 0, InsertAtByte = 0; 8479 bool Swap = false; 8480 8481 // Shifts required to get the byte we want at element 7. 8482 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1, 8483 0, 15, 14, 13, 12, 11, 10, 9}; 8484 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0, 8485 1, 2, 3, 4, 5, 6, 7, 8}; 8486 8487 ArrayRef<int> Mask = N->getMask(); 8488 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; 8489 8490 // For each mask element, find out if we're just inserting something 8491 // from V2 into V1 or vice versa. 8492 // Possible permutations inserting an element from V2 into V1: 8493 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 8494 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 8495 // ... 8496 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X 8497 // Inserting from V1 into V2 will be similar, except mask range will be 8498 // [16,31]. 8499 8500 bool FoundCandidate = false; 8501 // If both vector operands for the shuffle are the same vector, the mask 8502 // will contain only elements from the first one and the second one will be 8503 // undef. 8504 unsigned VINSERTBSrcElem = IsLE ? 8 : 7; 8505 // Go through the mask of half-words to find an element that's being moved 8506 // from one vector to the other. 8507 for (unsigned i = 0; i < BytesInVector; ++i) { 8508 unsigned CurrentElement = Mask[i]; 8509 // If 2nd operand is undefined, we should only look for element 7 in the 8510 // Mask. 8511 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem) 8512 continue; 8513 8514 bool OtherElementsInOrder = true; 8515 // Examine the other elements in the Mask to see if they're in original 8516 // order. 8517 for (unsigned j = 0; j < BytesInVector; ++j) { 8518 if (j == i) 8519 continue; 8520 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be 8521 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined, 8522 // in which we always assume we're always picking from the 1st operand. 8523 int MaskOffset = 8524 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0; 8525 if (Mask[j] != OriginalOrder[j] + MaskOffset) { 8526 OtherElementsInOrder = false; 8527 break; 8528 } 8529 } 8530 // If other elements are in original order, we record the number of shifts 8531 // we need to get the element we want into element 7. Also record which byte 8532 // in the vector we should insert into. 8533 if (OtherElementsInOrder) { 8534 // If 2nd operand is undefined, we assume no shifts and no swapping. 8535 if (V2.isUndef()) { 8536 ShiftElts = 0; 8537 Swap = false; 8538 } else { 8539 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4. 8540 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF] 8541 : BigEndianShifts[CurrentElement & 0xF]; 8542 Swap = CurrentElement < BytesInVector; 8543 } 8544 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i; 8545 FoundCandidate = true; 8546 break; 8547 } 8548 } 8549 8550 if (!FoundCandidate) 8551 return SDValue(); 8552 8553 // Candidate found, construct the proper SDAG sequence with VINSERTB, 8554 // optionally with VECSHL if shift is required. 8555 if (Swap) 8556 std::swap(V1, V2); 8557 if (V2.isUndef()) 8558 V2 = V1; 8559 if (ShiftElts) { 8560 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2, 8561 DAG.getConstant(ShiftElts, dl, MVT::i32)); 8562 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl, 8563 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 8564 } 8565 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2, 8566 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 8567 } 8568 8569 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled 8570 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default 8571 /// SDValue. 8572 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N, 8573 SelectionDAG &DAG) const { 8574 const unsigned NumHalfWords = 8; 8575 const unsigned BytesInVector = NumHalfWords * 2; 8576 // Check that the shuffle is on half-words. 8577 if (!isNByteElemShuffleMask(N, 2, 1)) 8578 return SDValue(); 8579 8580 bool IsLE = Subtarget.isLittleEndian(); 8581 SDLoc dl(N); 8582 SDValue V1 = N->getOperand(0); 8583 SDValue V2 = N->getOperand(1); 8584 unsigned ShiftElts = 0, InsertAtByte = 0; 8585 bool Swap = false; 8586 8587 // Shifts required to get the half-word we want at element 3. 8588 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5}; 8589 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4}; 8590 8591 uint32_t Mask = 0; 8592 uint32_t OriginalOrderLow = 0x1234567; 8593 uint32_t OriginalOrderHigh = 0x89ABCDEF; 8594 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a 8595 // 32-bit space, only need 4-bit nibbles per element. 8596 for (unsigned i = 0; i < NumHalfWords; ++i) { 8597 unsigned MaskShift = (NumHalfWords - 1 - i) * 4; 8598 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift); 8599 } 8600 8601 // For each mask element, find out if we're just inserting something 8602 // from V2 into V1 or vice versa. Possible permutations inserting an element 8603 // from V2 into V1: 8604 // X, 1, 2, 3, 4, 5, 6, 7 8605 // 0, X, 2, 3, 4, 5, 6, 7 8606 // 0, 1, X, 3, 4, 5, 6, 7 8607 // 0, 1, 2, X, 4, 5, 6, 7 8608 // 0, 1, 2, 3, X, 5, 6, 7 8609 // 0, 1, 2, 3, 4, X, 6, 7 8610 // 0, 1, 2, 3, 4, 5, X, 7 8611 // 0, 1, 2, 3, 4, 5, 6, X 8612 // Inserting from V1 into V2 will be similar, except mask range will be [8,15]. 8613 8614 bool FoundCandidate = false; 8615 // Go through the mask of half-words to find an element that's being moved 8616 // from one vector to the other. 8617 for (unsigned i = 0; i < NumHalfWords; ++i) { 8618 unsigned MaskShift = (NumHalfWords - 1 - i) * 4; 8619 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF; 8620 uint32_t MaskOtherElts = ~(0xF << MaskShift); 8621 uint32_t TargetOrder = 0x0; 8622 8623 // If both vector operands for the shuffle are the same vector, the mask 8624 // will contain only elements from the first one and the second one will be 8625 // undef. 8626 if (V2.isUndef()) { 8627 ShiftElts = 0; 8628 unsigned VINSERTHSrcElem = IsLE ? 4 : 3; 8629 TargetOrder = OriginalOrderLow; 8630 Swap = false; 8631 // Skip if not the correct element or mask of other elements don't equal 8632 // to our expected order. 8633 if (MaskOneElt == VINSERTHSrcElem && 8634 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) { 8635 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2; 8636 FoundCandidate = true; 8637 break; 8638 } 8639 } else { // If both operands are defined. 8640 // Target order is [8,15] if the current mask is between [0,7]. 8641 TargetOrder = 8642 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow; 8643 // Skip if mask of other elements don't equal our expected order. 8644 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) { 8645 // We only need the last 3 bits for the number of shifts. 8646 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7] 8647 : BigEndianShifts[MaskOneElt & 0x7]; 8648 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2; 8649 Swap = MaskOneElt < NumHalfWords; 8650 FoundCandidate = true; 8651 break; 8652 } 8653 } 8654 } 8655 8656 if (!FoundCandidate) 8657 return SDValue(); 8658 8659 // Candidate found, construct the proper SDAG sequence with VINSERTH, 8660 // optionally with VECSHL if shift is required. 8661 if (Swap) 8662 std::swap(V1, V2); 8663 if (V2.isUndef()) 8664 V2 = V1; 8665 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); 8666 if (ShiftElts) { 8667 // Double ShiftElts because we're left shifting on v16i8 type. 8668 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2, 8669 DAG.getConstant(2 * ShiftElts, dl, MVT::i32)); 8670 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl); 8671 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2, 8672 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 8673 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); 8674 } 8675 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2); 8676 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2, 8677 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 8678 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); 8679 } 8680 8681 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 8682 /// is a shuffle we can handle in a single instruction, return it. Otherwise, 8683 /// return the code it can be lowered into. Worst case, it can always be 8684 /// lowered into a vperm. 8685 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 8686 SelectionDAG &DAG) const { 8687 SDLoc dl(Op); 8688 SDValue V1 = Op.getOperand(0); 8689 SDValue V2 = Op.getOperand(1); 8690 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 8691 EVT VT = Op.getValueType(); 8692 bool isLittleEndian = Subtarget.isLittleEndian(); 8693 8694 unsigned ShiftElts, InsertAtByte; 8695 bool Swap = false; 8696 if (Subtarget.hasP9Vector() && 8697 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, 8698 isLittleEndian)) { 8699 if (Swap) 8700 std::swap(V1, V2); 8701 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); 8702 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2); 8703 if (ShiftElts) { 8704 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2, 8705 DAG.getConstant(ShiftElts, dl, MVT::i32)); 8706 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl, 8707 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 8708 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); 8709 } 8710 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2, 8711 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 8712 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); 8713 } 8714 8715 if (Subtarget.hasP9Altivec()) { 8716 SDValue NewISDNode; 8717 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG))) 8718 return NewISDNode; 8719 8720 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG))) 8721 return NewISDNode; 8722 } 8723 8724 if (Subtarget.hasVSX() && 8725 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { 8726 if (Swap) 8727 std::swap(V1, V2); 8728 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); 8729 SDValue Conv2 = 8730 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2); 8731 8732 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2, 8733 DAG.getConstant(ShiftElts, dl, MVT::i32)); 8734 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl); 8735 } 8736 8737 if (Subtarget.hasVSX() && 8738 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { 8739 if (Swap) 8740 std::swap(V1, V2); 8741 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); 8742 SDValue Conv2 = 8743 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2); 8744 8745 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2, 8746 DAG.getConstant(ShiftElts, dl, MVT::i32)); 8747 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI); 8748 } 8749 8750 if (Subtarget.hasP9Vector()) { 8751 if (PPC::isXXBRHShuffleMask(SVOp)) { 8752 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); 8753 SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv); 8754 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord); 8755 } else if (PPC::isXXBRWShuffleMask(SVOp)) { 8756 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); 8757 SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv); 8758 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord); 8759 } else if (PPC::isXXBRDShuffleMask(SVOp)) { 8760 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); 8761 SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv); 8762 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord); 8763 } else if (PPC::isXXBRQShuffleMask(SVOp)) { 8764 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1); 8765 SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv); 8766 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord); 8767 } 8768 } 8769 8770 if (Subtarget.hasVSX()) { 8771 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { 8772 int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); 8773 8774 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); 8775 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, 8776 DAG.getConstant(SplatIdx, dl, MVT::i32)); 8777 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat); 8778 } 8779 8780 // Left shifts of 8 bytes are actually swaps. Convert accordingly. 8781 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) { 8782 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); 8783 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv); 8784 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap); 8785 } 8786 } 8787 8788 if (Subtarget.hasQPX()) { 8789 if (VT.getVectorNumElements() != 4) 8790 return SDValue(); 8791 8792 if (V2.isUndef()) V2 = V1; 8793 8794 int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp); 8795 if (AlignIdx != -1) { 8796 return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2, 8797 DAG.getConstant(AlignIdx, dl, MVT::i32)); 8798 } else if (SVOp->isSplat()) { 8799 int SplatIdx = SVOp->getSplatIndex(); 8800 if (SplatIdx >= 4) { 8801 std::swap(V1, V2); 8802 SplatIdx -= 4; 8803 } 8804 8805 return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1, 8806 DAG.getConstant(SplatIdx, dl, MVT::i32)); 8807 } 8808 8809 // Lower this into a qvgpci/qvfperm pair. 8810 8811 // Compute the qvgpci literal 8812 unsigned idx = 0; 8813 for (unsigned i = 0; i < 4; ++i) { 8814 int m = SVOp->getMaskElt(i); 8815 unsigned mm = m >= 0 ? (unsigned) m : i; 8816 idx |= mm << (3-i)*3; 8817 } 8818 8819 SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64, 8820 DAG.getConstant(idx, dl, MVT::i32)); 8821 return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3); 8822 } 8823 8824 // Cases that are handled by instructions that take permute immediates 8825 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 8826 // selected by the instruction selector. 8827 if (V2.isUndef()) { 8828 if (PPC::isSplatShuffleMask(SVOp, 1) || 8829 PPC::isSplatShuffleMask(SVOp, 2) || 8830 PPC::isSplatShuffleMask(SVOp, 4) || 8831 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || 8832 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || 8833 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || 8834 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || 8835 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || 8836 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || 8837 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || 8838 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || 8839 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || 8840 (Subtarget.hasP8Altivec() && ( 8841 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || 8842 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || 8843 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) { 8844 return Op; 8845 } 8846 } 8847 8848 // Altivec has a variety of "shuffle immediates" that take two vector inputs 8849 // and produce a fixed permutation. If any of these match, do not lower to 8850 // VPERM. 8851 unsigned int ShuffleKind = isLittleEndian ? 2 : 0; 8852 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || 8853 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || 8854 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || 8855 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || 8856 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || 8857 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || 8858 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || 8859 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || 8860 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || 8861 (Subtarget.hasP8Altivec() && ( 8862 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || 8863 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || 8864 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)))) 8865 return Op; 8866 8867 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 8868 // perfect shuffle table to emit an optimal matching sequence. 8869 ArrayRef<int> PermMask = SVOp->getMask(); 8870 8871 unsigned PFIndexes[4]; 8872 bool isFourElementShuffle = true; 8873 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 8874 unsigned EltNo = 8; // Start out undef. 8875 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 8876 if (PermMask[i*4+j] < 0) 8877 continue; // Undef, ignore it. 8878 8879 unsigned ByteSource = PermMask[i*4+j]; 8880 if ((ByteSource & 3) != j) { 8881 isFourElementShuffle = false; 8882 break; 8883 } 8884 8885 if (EltNo == 8) { 8886 EltNo = ByteSource/4; 8887 } else if (EltNo != ByteSource/4) { 8888 isFourElementShuffle = false; 8889 break; 8890 } 8891 } 8892 PFIndexes[i] = EltNo; 8893 } 8894 8895 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 8896 // perfect shuffle vector to determine if it is cost effective to do this as 8897 // discrete instructions, or whether we should use a vperm. 8898 // For now, we skip this for little endian until such time as we have a 8899 // little-endian perfect shuffle table. 8900 if (isFourElementShuffle && !isLittleEndian) { 8901 // Compute the index in the perfect shuffle table. 8902 unsigned PFTableIndex = 8903 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 8904 8905 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 8906 unsigned Cost = (PFEntry >> 30); 8907 8908 // Determining when to avoid vperm is tricky. Many things affect the cost 8909 // of vperm, particularly how many times the perm mask needs to be computed. 8910 // For example, if the perm mask can be hoisted out of a loop or is already 8911 // used (perhaps because there are multiple permutes with the same shuffle 8912 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 8913 // the loop requires an extra register. 8914 // 8915 // As a compromise, we only emit discrete instructions if the shuffle can be 8916 // generated in 3 or fewer operations. When we have loop information 8917 // available, if this block is within a loop, we should avoid using vperm 8918 // for 3-operation perms and use a constant pool load instead. 8919 if (Cost < 3) 8920 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 8921 } 8922 8923 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 8924 // vector that will get spilled to the constant pool. 8925 if (V2.isUndef()) V2 = V1; 8926 8927 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 8928 // that it is in input element units, not in bytes. Convert now. 8929 8930 // For little endian, the order of the input vectors is reversed, and 8931 // the permutation mask is complemented with respect to 31. This is 8932 // necessary to produce proper semantics with the big-endian-biased vperm 8933 // instruction. 8934 EVT EltVT = V1.getValueType().getVectorElementType(); 8935 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 8936 8937 SmallVector<SDValue, 16> ResultMask; 8938 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 8939 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 8940 8941 for (unsigned j = 0; j != BytesPerElement; ++j) 8942 if (isLittleEndian) 8943 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j), 8944 dl, MVT::i32)); 8945 else 8946 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl, 8947 MVT::i32)); 8948 } 8949 8950 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask); 8951 if (isLittleEndian) 8952 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 8953 V2, V1, VPermMask); 8954 else 8955 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 8956 V1, V2, VPermMask); 8957 } 8958 8959 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a 8960 /// vector comparison. If it is, return true and fill in Opc/isDot with 8961 /// information about the intrinsic. 8962 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, 8963 bool &isDot, const PPCSubtarget &Subtarget) { 8964 unsigned IntrinsicID = 8965 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 8966 CompareOpc = -1; 8967 isDot = false; 8968 switch (IntrinsicID) { 8969 default: 8970 return false; 8971 // Comparison predicates. 8972 case Intrinsic::ppc_altivec_vcmpbfp_p: 8973 CompareOpc = 966; 8974 isDot = true; 8975 break; 8976 case Intrinsic::ppc_altivec_vcmpeqfp_p: 8977 CompareOpc = 198; 8978 isDot = true; 8979 break; 8980 case Intrinsic::ppc_altivec_vcmpequb_p: 8981 CompareOpc = 6; 8982 isDot = true; 8983 break; 8984 case Intrinsic::ppc_altivec_vcmpequh_p: 8985 CompareOpc = 70; 8986 isDot = true; 8987 break; 8988 case Intrinsic::ppc_altivec_vcmpequw_p: 8989 CompareOpc = 134; 8990 isDot = true; 8991 break; 8992 case Intrinsic::ppc_altivec_vcmpequd_p: 8993 if (Subtarget.hasP8Altivec()) { 8994 CompareOpc = 199; 8995 isDot = true; 8996 } else 8997 return false; 8998 break; 8999 case Intrinsic::ppc_altivec_vcmpneb_p: 9000 case Intrinsic::ppc_altivec_vcmpneh_p: 9001 case Intrinsic::ppc_altivec_vcmpnew_p: 9002 case Intrinsic::ppc_altivec_vcmpnezb_p: 9003 case Intrinsic::ppc_altivec_vcmpnezh_p: 9004 case Intrinsic::ppc_altivec_vcmpnezw_p: 9005 if (Subtarget.hasP9Altivec()) { 9006 switch (IntrinsicID) { 9007 default: 9008 llvm_unreachable("Unknown comparison intrinsic."); 9009 case Intrinsic::ppc_altivec_vcmpneb_p: 9010 CompareOpc = 7; 9011 break; 9012 case Intrinsic::ppc_altivec_vcmpneh_p: 9013 CompareOpc = 71; 9014 break; 9015 case Intrinsic::ppc_altivec_vcmpnew_p: 9016 CompareOpc = 135; 9017 break; 9018 case Intrinsic::ppc_altivec_vcmpnezb_p: 9019 CompareOpc = 263; 9020 break; 9021 case Intrinsic::ppc_altivec_vcmpnezh_p: 9022 CompareOpc = 327; 9023 break; 9024 case Intrinsic::ppc_altivec_vcmpnezw_p: 9025 CompareOpc = 391; 9026 break; 9027 } 9028 isDot = true; 9029 } else 9030 return false; 9031 break; 9032 case Intrinsic::ppc_altivec_vcmpgefp_p: 9033 CompareOpc = 454; 9034 isDot = true; 9035 break; 9036 case Intrinsic::ppc_altivec_vcmpgtfp_p: 9037 CompareOpc = 710; 9038 isDot = true; 9039 break; 9040 case Intrinsic::ppc_altivec_vcmpgtsb_p: 9041 CompareOpc = 774; 9042 isDot = true; 9043 break; 9044 case Intrinsic::ppc_altivec_vcmpgtsh_p: 9045 CompareOpc = 838; 9046 isDot = true; 9047 break; 9048 case Intrinsic::ppc_altivec_vcmpgtsw_p: 9049 CompareOpc = 902; 9050 isDot = true; 9051 break; 9052 case Intrinsic::ppc_altivec_vcmpgtsd_p: 9053 if (Subtarget.hasP8Altivec()) { 9054 CompareOpc = 967; 9055 isDot = true; 9056 } else 9057 return false; 9058 break; 9059 case Intrinsic::ppc_altivec_vcmpgtub_p: 9060 CompareOpc = 518; 9061 isDot = true; 9062 break; 9063 case Intrinsic::ppc_altivec_vcmpgtuh_p: 9064 CompareOpc = 582; 9065 isDot = true; 9066 break; 9067 case Intrinsic::ppc_altivec_vcmpgtuw_p: 9068 CompareOpc = 646; 9069 isDot = true; 9070 break; 9071 case Intrinsic::ppc_altivec_vcmpgtud_p: 9072 if (Subtarget.hasP8Altivec()) { 9073 CompareOpc = 711; 9074 isDot = true; 9075 } else 9076 return false; 9077 break; 9078 9079 // VSX predicate comparisons use the same infrastructure 9080 case Intrinsic::ppc_vsx_xvcmpeqdp_p: 9081 case Intrinsic::ppc_vsx_xvcmpgedp_p: 9082 case Intrinsic::ppc_vsx_xvcmpgtdp_p: 9083 case Intrinsic::ppc_vsx_xvcmpeqsp_p: 9084 case Intrinsic::ppc_vsx_xvcmpgesp_p: 9085 case Intrinsic::ppc_vsx_xvcmpgtsp_p: 9086 if (Subtarget.hasVSX()) { 9087 switch (IntrinsicID) { 9088 case Intrinsic::ppc_vsx_xvcmpeqdp_p: 9089 CompareOpc = 99; 9090 break; 9091 case Intrinsic::ppc_vsx_xvcmpgedp_p: 9092 CompareOpc = 115; 9093 break; 9094 case Intrinsic::ppc_vsx_xvcmpgtdp_p: 9095 CompareOpc = 107; 9096 break; 9097 case Intrinsic::ppc_vsx_xvcmpeqsp_p: 9098 CompareOpc = 67; 9099 break; 9100 case Intrinsic::ppc_vsx_xvcmpgesp_p: 9101 CompareOpc = 83; 9102 break; 9103 case Intrinsic::ppc_vsx_xvcmpgtsp_p: 9104 CompareOpc = 75; 9105 break; 9106 } 9107 isDot = true; 9108 } else 9109 return false; 9110 break; 9111 9112 // Normal Comparisons. 9113 case Intrinsic::ppc_altivec_vcmpbfp: 9114 CompareOpc = 966; 9115 break; 9116 case Intrinsic::ppc_altivec_vcmpeqfp: 9117 CompareOpc = 198; 9118 break; 9119 case Intrinsic::ppc_altivec_vcmpequb: 9120 CompareOpc = 6; 9121 break; 9122 case Intrinsic::ppc_altivec_vcmpequh: 9123 CompareOpc = 70; 9124 break; 9125 case Intrinsic::ppc_altivec_vcmpequw: 9126 CompareOpc = 134; 9127 break; 9128 case Intrinsic::ppc_altivec_vcmpequd: 9129 if (Subtarget.hasP8Altivec()) 9130 CompareOpc = 199; 9131 else 9132 return false; 9133 break; 9134 case Intrinsic::ppc_altivec_vcmpneb: 9135 case Intrinsic::ppc_altivec_vcmpneh: 9136 case Intrinsic::ppc_altivec_vcmpnew: 9137 case Intrinsic::ppc_altivec_vcmpnezb: 9138 case Intrinsic::ppc_altivec_vcmpnezh: 9139 case Intrinsic::ppc_altivec_vcmpnezw: 9140 if (Subtarget.hasP9Altivec()) 9141 switch (IntrinsicID) { 9142 default: 9143 llvm_unreachable("Unknown comparison intrinsic."); 9144 case Intrinsic::ppc_altivec_vcmpneb: 9145 CompareOpc = 7; 9146 break; 9147 case Intrinsic::ppc_altivec_vcmpneh: 9148 CompareOpc = 71; 9149 break; 9150 case Intrinsic::ppc_altivec_vcmpnew: 9151 CompareOpc = 135; 9152 break; 9153 case Intrinsic::ppc_altivec_vcmpnezb: 9154 CompareOpc = 263; 9155 break; 9156 case Intrinsic::ppc_altivec_vcmpnezh: 9157 CompareOpc = 327; 9158 break; 9159 case Intrinsic::ppc_altivec_vcmpnezw: 9160 CompareOpc = 391; 9161 break; 9162 } 9163 else 9164 return false; 9165 break; 9166 case Intrinsic::ppc_altivec_vcmpgefp: 9167 CompareOpc = 454; 9168 break; 9169 case Intrinsic::ppc_altivec_vcmpgtfp: 9170 CompareOpc = 710; 9171 break; 9172 case Intrinsic::ppc_altivec_vcmpgtsb: 9173 CompareOpc = 774; 9174 break; 9175 case Intrinsic::ppc_altivec_vcmpgtsh: 9176 CompareOpc = 838; 9177 break; 9178 case Intrinsic::ppc_altivec_vcmpgtsw: 9179 CompareOpc = 902; 9180 break; 9181 case Intrinsic::ppc_altivec_vcmpgtsd: 9182 if (Subtarget.hasP8Altivec()) 9183 CompareOpc = 967; 9184 else 9185 return false; 9186 break; 9187 case Intrinsic::ppc_altivec_vcmpgtub: 9188 CompareOpc = 518; 9189 break; 9190 case Intrinsic::ppc_altivec_vcmpgtuh: 9191 CompareOpc = 582; 9192 break; 9193 case Intrinsic::ppc_altivec_vcmpgtuw: 9194 CompareOpc = 646; 9195 break; 9196 case Intrinsic::ppc_altivec_vcmpgtud: 9197 if (Subtarget.hasP8Altivec()) 9198 CompareOpc = 711; 9199 else 9200 return false; 9201 break; 9202 } 9203 return true; 9204 } 9205 9206 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 9207 /// lower, do it, otherwise return null. 9208 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 9209 SelectionDAG &DAG) const { 9210 unsigned IntrinsicID = 9211 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 9212 9213 SDLoc dl(Op); 9214 9215 if (IntrinsicID == Intrinsic::thread_pointer) { 9216 // Reads the thread pointer register, used for __builtin_thread_pointer. 9217 if (Subtarget.isPPC64()) 9218 return DAG.getRegister(PPC::X13, MVT::i64); 9219 return DAG.getRegister(PPC::R2, MVT::i32); 9220 } 9221 9222 // If this is a lowered altivec predicate compare, CompareOpc is set to the 9223 // opcode number of the comparison. 9224 int CompareOpc; 9225 bool isDot; 9226 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget)) 9227 return SDValue(); // Don't custom lower most intrinsics. 9228 9229 // If this is a non-dot comparison, make the VCMP node and we are done. 9230 if (!isDot) { 9231 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 9232 Op.getOperand(1), Op.getOperand(2), 9233 DAG.getConstant(CompareOpc, dl, MVT::i32)); 9234 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 9235 } 9236 9237 // Create the PPCISD altivec 'dot' comparison node. 9238 SDValue Ops[] = { 9239 Op.getOperand(2), // LHS 9240 Op.getOperand(3), // RHS 9241 DAG.getConstant(CompareOpc, dl, MVT::i32) 9242 }; 9243 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; 9244 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 9245 9246 // Now that we have the comparison, emit a copy from the CR to a GPR. 9247 // This is flagged to the above dot comparison. 9248 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, 9249 DAG.getRegister(PPC::CR6, MVT::i32), 9250 CompNode.getValue(1)); 9251 9252 // Unpack the result based on how the target uses it. 9253 unsigned BitNo; // Bit # of CR6. 9254 bool InvertBit; // Invert result? 9255 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 9256 default: // Can't happen, don't crash on invalid number though. 9257 case 0: // Return the value of the EQ bit of CR6. 9258 BitNo = 0; InvertBit = false; 9259 break; 9260 case 1: // Return the inverted value of the EQ bit of CR6. 9261 BitNo = 0; InvertBit = true; 9262 break; 9263 case 2: // Return the value of the LT bit of CR6. 9264 BitNo = 2; InvertBit = false; 9265 break; 9266 case 3: // Return the inverted value of the LT bit of CR6. 9267 BitNo = 2; InvertBit = true; 9268 break; 9269 } 9270 9271 // Shift the bit into the low position. 9272 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 9273 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32)); 9274 // Isolate the bit. 9275 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 9276 DAG.getConstant(1, dl, MVT::i32)); 9277 9278 // If we are supposed to, toggle the bit. 9279 if (InvertBit) 9280 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 9281 DAG.getConstant(1, dl, MVT::i32)); 9282 return Flags; 9283 } 9284 9285 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, 9286 SelectionDAG &DAG) const { 9287 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to 9288 // the beginning of the argument list. 9289 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1; 9290 SDLoc DL(Op); 9291 switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) { 9292 case Intrinsic::ppc_cfence: { 9293 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."); 9294 assert(Subtarget.isPPC64() && "Only 64-bit is supported for now."); 9295 return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other, 9296 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 9297 Op.getOperand(ArgStart + 1)), 9298 Op.getOperand(0)), 9299 0); 9300 } 9301 default: 9302 break; 9303 } 9304 return SDValue(); 9305 } 9306 9307 SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const { 9308 // Check for a DIV with the same operands as this REM. 9309 for (auto UI : Op.getOperand(1)->uses()) { 9310 if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) || 9311 (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV)) 9312 if (UI->getOperand(0) == Op.getOperand(0) && 9313 UI->getOperand(1) == Op.getOperand(1)) 9314 return SDValue(); 9315 } 9316 return Op; 9317 } 9318 9319 // Lower scalar BSWAP64 to xxbrd. 9320 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const { 9321 SDLoc dl(Op); 9322 // MTVSRDD 9323 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0), 9324 Op.getOperand(0)); 9325 // XXBRD 9326 Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op); 9327 // MFVSRD 9328 int VectorIndex = 0; 9329 if (Subtarget.isLittleEndian()) 9330 VectorIndex = 1; 9331 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op, 9332 DAG.getTargetConstant(VectorIndex, dl, MVT::i32)); 9333 return Op; 9334 } 9335 9336 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be 9337 // compared to a value that is atomically loaded (atomic loads zero-extend). 9338 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, 9339 SelectionDAG &DAG) const { 9340 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && 9341 "Expecting an atomic compare-and-swap here."); 9342 SDLoc dl(Op); 9343 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode()); 9344 EVT MemVT = AtomicNode->getMemoryVT(); 9345 if (MemVT.getSizeInBits() >= 32) 9346 return Op; 9347 9348 SDValue CmpOp = Op.getOperand(2); 9349 // If this is already correctly zero-extended, leave it alone. 9350 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits()); 9351 if (DAG.MaskedValueIsZero(CmpOp, HighBits)) 9352 return Op; 9353 9354 // Clear the high bits of the compare operand. 9355 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1; 9356 SDValue NewCmpOp = 9357 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp, 9358 DAG.getConstant(MaskVal, dl, MVT::i32)); 9359 9360 // Replace the existing compare operand with the properly zero-extended one. 9361 SmallVector<SDValue, 4> Ops; 9362 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++) 9363 Ops.push_back(AtomicNode->getOperand(i)); 9364 Ops[2] = NewCmpOp; 9365 MachineMemOperand *MMO = AtomicNode->getMemOperand(); 9366 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other); 9367 auto NodeTy = 9368 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16; 9369 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO); 9370 } 9371 9372 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 9373 SelectionDAG &DAG) const { 9374 SDLoc dl(Op); 9375 // Create a stack slot that is 16-byte aligned. 9376 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 9377 int FrameIdx = MFI.CreateStackObject(16, 16, false); 9378 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 9379 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 9380 9381 // Store the input value into Value#0 of the stack slot. 9382 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 9383 MachinePointerInfo()); 9384 // Load it out. 9385 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo()); 9386 } 9387 9388 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, 9389 SelectionDAG &DAG) const { 9390 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && 9391 "Should only be called for ISD::INSERT_VECTOR_ELT"); 9392 9393 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 9394 // We have legal lowering for constant indices but not for variable ones. 9395 if (!C) 9396 return SDValue(); 9397 9398 EVT VT = Op.getValueType(); 9399 SDLoc dl(Op); 9400 SDValue V1 = Op.getOperand(0); 9401 SDValue V2 = Op.getOperand(1); 9402 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types. 9403 if (VT == MVT::v8i16 || VT == MVT::v16i8) { 9404 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2); 9405 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8; 9406 unsigned InsertAtElement = C->getZExtValue(); 9407 unsigned InsertAtByte = InsertAtElement * BytesInEachElement; 9408 if (Subtarget.isLittleEndian()) { 9409 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte; 9410 } 9411 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz, 9412 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 9413 } 9414 return Op; 9415 } 9416 9417 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, 9418 SelectionDAG &DAG) const { 9419 SDLoc dl(Op); 9420 SDNode *N = Op.getNode(); 9421 9422 assert(N->getOperand(0).getValueType() == MVT::v4i1 && 9423 "Unknown extract_vector_elt type"); 9424 9425 SDValue Value = N->getOperand(0); 9426 9427 // The first part of this is like the store lowering except that we don't 9428 // need to track the chain. 9429 9430 // The values are now known to be -1 (false) or 1 (true). To convert this 9431 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 9432 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 9433 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 9434 9435 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to 9436 // understand how to form the extending load. 9437 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 9438 9439 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 9440 9441 // Now convert to an integer and store. 9442 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 9443 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), 9444 Value); 9445 9446 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 9447 int FrameIdx = MFI.CreateStackObject(16, 16, false); 9448 MachinePointerInfo PtrInfo = 9449 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 9450 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 9451 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 9452 9453 SDValue StoreChain = DAG.getEntryNode(); 9454 SDValue Ops[] = {StoreChain, 9455 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), 9456 Value, FIdx}; 9457 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); 9458 9459 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, 9460 dl, VTs, Ops, MVT::v4i32, PtrInfo); 9461 9462 // Extract the value requested. 9463 unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 9464 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 9465 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 9466 9467 SDValue IntVal = 9468 DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset)); 9469 9470 if (!Subtarget.useCRBits()) 9471 return IntVal; 9472 9473 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal); 9474 } 9475 9476 /// Lowering for QPX v4i1 loads 9477 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, 9478 SelectionDAG &DAG) const { 9479 SDLoc dl(Op); 9480 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode()); 9481 SDValue LoadChain = LN->getChain(); 9482 SDValue BasePtr = LN->getBasePtr(); 9483 9484 if (Op.getValueType() == MVT::v4f64 || 9485 Op.getValueType() == MVT::v4f32) { 9486 EVT MemVT = LN->getMemoryVT(); 9487 unsigned Alignment = LN->getAlignment(); 9488 9489 // If this load is properly aligned, then it is legal. 9490 if (Alignment >= MemVT.getStoreSize()) 9491 return Op; 9492 9493 EVT ScalarVT = Op.getValueType().getScalarType(), 9494 ScalarMemVT = MemVT.getScalarType(); 9495 unsigned Stride = ScalarMemVT.getStoreSize(); 9496 9497 SDValue Vals[4], LoadChains[4]; 9498 for (unsigned Idx = 0; Idx < 4; ++Idx) { 9499 SDValue Load; 9500 if (ScalarVT != ScalarMemVT) 9501 Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain, 9502 BasePtr, 9503 LN->getPointerInfo().getWithOffset(Idx * Stride), 9504 ScalarMemVT, MinAlign(Alignment, Idx * Stride), 9505 LN->getMemOperand()->getFlags(), LN->getAAInfo()); 9506 else 9507 Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr, 9508 LN->getPointerInfo().getWithOffset(Idx * Stride), 9509 MinAlign(Alignment, Idx * Stride), 9510 LN->getMemOperand()->getFlags(), LN->getAAInfo()); 9511 9512 if (Idx == 0 && LN->isIndexed()) { 9513 assert(LN->getAddressingMode() == ISD::PRE_INC && 9514 "Unknown addressing mode on vector load"); 9515 Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(), 9516 LN->getAddressingMode()); 9517 } 9518 9519 Vals[Idx] = Load; 9520 LoadChains[Idx] = Load.getValue(1); 9521 9522 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 9523 DAG.getConstant(Stride, dl, 9524 BasePtr.getValueType())); 9525 } 9526 9527 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 9528 SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals); 9529 9530 if (LN->isIndexed()) { 9531 SDValue RetOps[] = { Value, Vals[0].getValue(1), TF }; 9532 return DAG.getMergeValues(RetOps, dl); 9533 } 9534 9535 SDValue RetOps[] = { Value, TF }; 9536 return DAG.getMergeValues(RetOps, dl); 9537 } 9538 9539 assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower"); 9540 assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported"); 9541 9542 // To lower v4i1 from a byte array, we load the byte elements of the 9543 // vector and then reuse the BUILD_VECTOR logic. 9544 9545 SDValue VectElmts[4], VectElmtChains[4]; 9546 for (unsigned i = 0; i < 4; ++i) { 9547 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); 9548 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); 9549 9550 VectElmts[i] = DAG.getExtLoad( 9551 ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx, 9552 LN->getPointerInfo().getWithOffset(i), MVT::i8, 9553 /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo()); 9554 VectElmtChains[i] = VectElmts[i].getValue(1); 9555 } 9556 9557 LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains); 9558 SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts); 9559 9560 SDValue RVals[] = { Value, LoadChain }; 9561 return DAG.getMergeValues(RVals, dl); 9562 } 9563 9564 /// Lowering for QPX v4i1 stores 9565 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, 9566 SelectionDAG &DAG) const { 9567 SDLoc dl(Op); 9568 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode()); 9569 SDValue StoreChain = SN->getChain(); 9570 SDValue BasePtr = SN->getBasePtr(); 9571 SDValue Value = SN->getValue(); 9572 9573 if (Value.getValueType() == MVT::v4f64 || 9574 Value.getValueType() == MVT::v4f32) { 9575 EVT MemVT = SN->getMemoryVT(); 9576 unsigned Alignment = SN->getAlignment(); 9577 9578 // If this store is properly aligned, then it is legal. 9579 if (Alignment >= MemVT.getStoreSize()) 9580 return Op; 9581 9582 EVT ScalarVT = Value.getValueType().getScalarType(), 9583 ScalarMemVT = MemVT.getScalarType(); 9584 unsigned Stride = ScalarMemVT.getStoreSize(); 9585 9586 SDValue Stores[4]; 9587 for (unsigned Idx = 0; Idx < 4; ++Idx) { 9588 SDValue Ex = DAG.getNode( 9589 ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, 9590 DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout()))); 9591 SDValue Store; 9592 if (ScalarVT != ScalarMemVT) 9593 Store = 9594 DAG.getTruncStore(StoreChain, dl, Ex, BasePtr, 9595 SN->getPointerInfo().getWithOffset(Idx * Stride), 9596 ScalarMemVT, MinAlign(Alignment, Idx * Stride), 9597 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 9598 else 9599 Store = DAG.getStore(StoreChain, dl, Ex, BasePtr, 9600 SN->getPointerInfo().getWithOffset(Idx * Stride), 9601 MinAlign(Alignment, Idx * Stride), 9602 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 9603 9604 if (Idx == 0 && SN->isIndexed()) { 9605 assert(SN->getAddressingMode() == ISD::PRE_INC && 9606 "Unknown addressing mode on vector store"); 9607 Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(), 9608 SN->getAddressingMode()); 9609 } 9610 9611 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 9612 DAG.getConstant(Stride, dl, 9613 BasePtr.getValueType())); 9614 Stores[Idx] = Store; 9615 } 9616 9617 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 9618 9619 if (SN->isIndexed()) { 9620 SDValue RetOps[] = { TF, Stores[0].getValue(1) }; 9621 return DAG.getMergeValues(RetOps, dl); 9622 } 9623 9624 return TF; 9625 } 9626 9627 assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported"); 9628 assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower"); 9629 9630 // The values are now known to be -1 (false) or 1 (true). To convert this 9631 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 9632 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 9633 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 9634 9635 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to 9636 // understand how to form the extending load. 9637 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 9638 9639 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 9640 9641 // Now convert to an integer and store. 9642 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 9643 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), 9644 Value); 9645 9646 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 9647 int FrameIdx = MFI.CreateStackObject(16, 16, false); 9648 MachinePointerInfo PtrInfo = 9649 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 9650 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 9651 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 9652 9653 SDValue Ops[] = {StoreChain, 9654 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), 9655 Value, FIdx}; 9656 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); 9657 9658 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, 9659 dl, VTs, Ops, MVT::v4i32, PtrInfo); 9660 9661 // Move data into the byte array. 9662 SDValue Loads[4], LoadChains[4]; 9663 for (unsigned i = 0; i < 4; ++i) { 9664 unsigned Offset = 4*i; 9665 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 9666 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 9667 9668 Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, 9669 PtrInfo.getWithOffset(Offset)); 9670 LoadChains[i] = Loads[i].getValue(1); 9671 } 9672 9673 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 9674 9675 SDValue Stores[4]; 9676 for (unsigned i = 0; i < 4; ++i) { 9677 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); 9678 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); 9679 9680 Stores[i] = DAG.getTruncStore( 9681 StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i), 9682 MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(), 9683 SN->getAAInfo()); 9684 } 9685 9686 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 9687 9688 return StoreChain; 9689 } 9690 9691 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 9692 SDLoc dl(Op); 9693 if (Op.getValueType() == MVT::v4i32) { 9694 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 9695 9696 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 9697 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 9698 9699 SDValue RHSSwap = // = vrlw RHS, 16 9700 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 9701 9702 // Shrinkify inputs to v8i16. 9703 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 9704 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 9705 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 9706 9707 // Low parts multiplied together, generating 32-bit results (we ignore the 9708 // top parts). 9709 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 9710 LHS, RHS, DAG, dl, MVT::v4i32); 9711 9712 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 9713 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 9714 // Shift the high parts up 16 bits. 9715 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 9716 Neg16, DAG, dl); 9717 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 9718 } else if (Op.getValueType() == MVT::v8i16) { 9719 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 9720 9721 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 9722 9723 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 9724 LHS, RHS, Zero, DAG, dl); 9725 } else if (Op.getValueType() == MVT::v16i8) { 9726 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 9727 bool isLittleEndian = Subtarget.isLittleEndian(); 9728 9729 // Multiply the even 8-bit parts, producing 16-bit sums. 9730 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 9731 LHS, RHS, DAG, dl, MVT::v8i16); 9732 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 9733 9734 // Multiply the odd 8-bit parts, producing 16-bit sums. 9735 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 9736 LHS, RHS, DAG, dl, MVT::v8i16); 9737 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 9738 9739 // Merge the results together. Because vmuleub and vmuloub are 9740 // instructions with a big-endian bias, we must reverse the 9741 // element numbering and reverse the meaning of "odd" and "even" 9742 // when generating little endian code. 9743 int Ops[16]; 9744 for (unsigned i = 0; i != 8; ++i) { 9745 if (isLittleEndian) { 9746 Ops[i*2 ] = 2*i; 9747 Ops[i*2+1] = 2*i+16; 9748 } else { 9749 Ops[i*2 ] = 2*i+1; 9750 Ops[i*2+1] = 2*i+1+16; 9751 } 9752 } 9753 if (isLittleEndian) 9754 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); 9755 else 9756 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 9757 } else { 9758 llvm_unreachable("Unknown mul to lower!"); 9759 } 9760 } 9761 9762 SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { 9763 9764 assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS"); 9765 9766 EVT VT = Op.getValueType(); 9767 assert(VT.isVector() && 9768 "Only set vector abs as custom, scalar abs shouldn't reach here!"); 9769 assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || 9770 VT == MVT::v16i8) && 9771 "Unexpected vector element type!"); 9772 assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && 9773 "Current subtarget doesn't support smax v2i64!"); 9774 9775 // For vector abs, it can be lowered to: 9776 // abs x 9777 // ==> 9778 // y = -x 9779 // smax(x, y) 9780 9781 SDLoc dl(Op); 9782 SDValue X = Op.getOperand(0); 9783 SDValue Zero = DAG.getConstant(0, dl, VT); 9784 SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X); 9785 9786 // SMAX patch https://reviews.llvm.org/D47332 9787 // hasn't landed yet, so use intrinsic first here. 9788 // TODO: Should use SMAX directly once SMAX patch landed 9789 Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw; 9790 if (VT == MVT::v2i64) 9791 BifID = Intrinsic::ppc_altivec_vmaxsd; 9792 else if (VT == MVT::v8i16) 9793 BifID = Intrinsic::ppc_altivec_vmaxsh; 9794 else if (VT == MVT::v16i8) 9795 BifID = Intrinsic::ppc_altivec_vmaxsb; 9796 9797 return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT); 9798 } 9799 9800 // Custom lowering for fpext vf32 to v2f64 9801 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { 9802 9803 assert(Op.getOpcode() == ISD::FP_EXTEND && 9804 "Should only be called for ISD::FP_EXTEND"); 9805 9806 // We only want to custom lower an extend from v2f32 to v2f64. 9807 if (Op.getValueType() != MVT::v2f64 || 9808 Op.getOperand(0).getValueType() != MVT::v2f32) 9809 return SDValue(); 9810 9811 SDLoc dl(Op); 9812 SDValue Op0 = Op.getOperand(0); 9813 9814 switch (Op0.getOpcode()) { 9815 default: 9816 return SDValue(); 9817 case ISD::FADD: 9818 case ISD::FMUL: 9819 case ISD::FSUB: { 9820 SDValue NewLoad[2]; 9821 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) { 9822 // Ensure both input are loads. 9823 SDValue LdOp = Op0.getOperand(i); 9824 if (LdOp.getOpcode() != ISD::LOAD) 9825 return SDValue(); 9826 // Generate new load node. 9827 LoadSDNode *LD = cast<LoadSDNode>(LdOp); 9828 SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() }; 9829 NewLoad[i] = 9830 DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl, 9831 DAG.getVTList(MVT::v4f32, MVT::Other), 9832 LoadOps, LD->getMemoryVT(), 9833 LD->getMemOperand()); 9834 } 9835 SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, 9836 NewLoad[0], NewLoad[1], 9837 Op0.getNode()->getFlags()); 9838 return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewOp); 9839 } 9840 case ISD::LOAD: { 9841 LoadSDNode *LD = cast<LoadSDNode>(Op0); 9842 SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() }; 9843 SDValue NewLd = 9844 DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl, 9845 DAG.getVTList(MVT::v4f32, MVT::Other), 9846 LoadOps, LD->getMemoryVT(), LD->getMemOperand()); 9847 return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewLd); 9848 } 9849 } 9850 llvm_unreachable("ERROR:Should return for all cases within swtich."); 9851 } 9852 9853 /// LowerOperation - Provide custom lowering hooks for some operations. 9854 /// 9855 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 9856 switch (Op.getOpcode()) { 9857 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 9858 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 9859 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 9860 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 9861 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 9862 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 9863 case ISD::SETCC: return LowerSETCC(Op, DAG); 9864 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 9865 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 9866 9867 // Variable argument lowering. 9868 case ISD::VASTART: return LowerVASTART(Op, DAG); 9869 case ISD::VAARG: return LowerVAARG(Op, DAG); 9870 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 9871 9872 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG); 9873 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 9874 case ISD::GET_DYNAMIC_AREA_OFFSET: 9875 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); 9876 9877 // Exception handling lowering. 9878 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG); 9879 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); 9880 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); 9881 9882 case ISD::LOAD: return LowerLOAD(Op, DAG); 9883 case ISD::STORE: return LowerSTORE(Op, DAG); 9884 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); 9885 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 9886 case ISD::FP_TO_UINT: 9887 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op)); 9888 case ISD::UINT_TO_FP: 9889 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 9890 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 9891 9892 // Lower 64-bit shifts. 9893 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 9894 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 9895 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 9896 9897 // Vector-related lowering. 9898 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 9899 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 9900 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 9901 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 9902 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 9903 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 9904 case ISD::MUL: return LowerMUL(Op, DAG); 9905 case ISD::ABS: return LowerABS(Op, DAG); 9906 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); 9907 9908 // For counter-based loop handling. 9909 case ISD::INTRINSIC_W_CHAIN: return SDValue(); 9910 9911 case ISD::BITCAST: return LowerBITCAST(Op, DAG); 9912 9913 // Frame & Return address. 9914 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 9915 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 9916 9917 case ISD::INTRINSIC_VOID: 9918 return LowerINTRINSIC_VOID(Op, DAG); 9919 case ISD::SREM: 9920 case ISD::UREM: 9921 return LowerREM(Op, DAG); 9922 case ISD::BSWAP: 9923 return LowerBSWAP(Op, DAG); 9924 case ISD::ATOMIC_CMP_SWAP: 9925 return LowerATOMIC_CMP_SWAP(Op, DAG); 9926 } 9927 } 9928 9929 void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 9930 SmallVectorImpl<SDValue>&Results, 9931 SelectionDAG &DAG) const { 9932 SDLoc dl(N); 9933 switch (N->getOpcode()) { 9934 default: 9935 llvm_unreachable("Do not know how to custom type legalize this operation!"); 9936 case ISD::READCYCLECOUNTER: { 9937 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 9938 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); 9939 9940 Results.push_back(RTB); 9941 Results.push_back(RTB.getValue(1)); 9942 Results.push_back(RTB.getValue(2)); 9943 break; 9944 } 9945 case ISD::INTRINSIC_W_CHAIN: { 9946 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 9947 Intrinsic::loop_decrement) 9948 break; 9949 9950 assert(N->getValueType(0) == MVT::i1 && 9951 "Unexpected result type for CTR decrement intrinsic"); 9952 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), 9953 N->getValueType(0)); 9954 SDVTList VTs = DAG.getVTList(SVT, MVT::Other); 9955 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), 9956 N->getOperand(1)); 9957 9958 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt)); 9959 Results.push_back(NewInt.getValue(1)); 9960 break; 9961 } 9962 case ISD::VAARG: { 9963 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64()) 9964 return; 9965 9966 EVT VT = N->getValueType(0); 9967 9968 if (VT == MVT::i64) { 9969 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG); 9970 9971 Results.push_back(NewNode); 9972 Results.push_back(NewNode.getValue(1)); 9973 } 9974 return; 9975 } 9976 case ISD::FP_TO_SINT: 9977 case ISD::FP_TO_UINT: 9978 // LowerFP_TO_INT() can only handle f32 and f64. 9979 if (N->getOperand(0).getValueType() == MVT::ppcf128) 9980 return; 9981 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 9982 return; 9983 case ISD::TRUNCATE: { 9984 EVT TrgVT = N->getValueType(0); 9985 if (TrgVT.isVector() && 9986 isOperationCustom(N->getOpcode(), TrgVT) && 9987 N->getOperand(0).getValueType().getSizeInBits() <= 128) 9988 Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG)); 9989 return; 9990 } 9991 case ISD::BITCAST: 9992 // Don't handle bitcast here. 9993 return; 9994 } 9995 } 9996 9997 //===----------------------------------------------------------------------===// 9998 // Other Lowering Code 9999 //===----------------------------------------------------------------------===// 10000 10001 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { 10002 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 10003 Function *Func = Intrinsic::getDeclaration(M, Id); 10004 return Builder.CreateCall(Func, {}); 10005 } 10006 10007 // The mappings for emitLeading/TrailingFence is taken from 10008 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html 10009 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 10010 Instruction *Inst, 10011 AtomicOrdering Ord) const { 10012 if (Ord == AtomicOrdering::SequentiallyConsistent) 10013 return callIntrinsic(Builder, Intrinsic::ppc_sync); 10014 if (isReleaseOrStronger(Ord)) 10015 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 10016 return nullptr; 10017 } 10018 10019 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 10020 Instruction *Inst, 10021 AtomicOrdering Ord) const { 10022 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) { 10023 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and 10024 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html 10025 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. 10026 if (isa<LoadInst>(Inst) && Subtarget.isPPC64()) 10027 return Builder.CreateCall( 10028 Intrinsic::getDeclaration( 10029 Builder.GetInsertBlock()->getParent()->getParent(), 10030 Intrinsic::ppc_cfence, {Inst->getType()}), 10031 {Inst}); 10032 // FIXME: Can use isync for rmw operation. 10033 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 10034 } 10035 return nullptr; 10036 } 10037 10038 MachineBasicBlock * 10039 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, 10040 unsigned AtomicSize, 10041 unsigned BinOpcode, 10042 unsigned CmpOpcode, 10043 unsigned CmpPred) const { 10044 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 10045 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 10046 10047 auto LoadMnemonic = PPC::LDARX; 10048 auto StoreMnemonic = PPC::STDCX; 10049 switch (AtomicSize) { 10050 default: 10051 llvm_unreachable("Unexpected size of atomic entity"); 10052 case 1: 10053 LoadMnemonic = PPC::LBARX; 10054 StoreMnemonic = PPC::STBCX; 10055 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); 10056 break; 10057 case 2: 10058 LoadMnemonic = PPC::LHARX; 10059 StoreMnemonic = PPC::STHCX; 10060 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); 10061 break; 10062 case 4: 10063 LoadMnemonic = PPC::LWARX; 10064 StoreMnemonic = PPC::STWCX; 10065 break; 10066 case 8: 10067 LoadMnemonic = PPC::LDARX; 10068 StoreMnemonic = PPC::STDCX; 10069 break; 10070 } 10071 10072 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 10073 MachineFunction *F = BB->getParent(); 10074 MachineFunction::iterator It = ++BB->getIterator(); 10075 10076 unsigned dest = MI.getOperand(0).getReg(); 10077 unsigned ptrA = MI.getOperand(1).getReg(); 10078 unsigned ptrB = MI.getOperand(2).getReg(); 10079 unsigned incr = MI.getOperand(3).getReg(); 10080 DebugLoc dl = MI.getDebugLoc(); 10081 10082 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 10083 MachineBasicBlock *loop2MBB = 10084 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; 10085 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 10086 F->insert(It, loopMBB); 10087 if (CmpOpcode) 10088 F->insert(It, loop2MBB); 10089 F->insert(It, exitMBB); 10090 exitMBB->splice(exitMBB->begin(), BB, 10091 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 10092 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 10093 10094 MachineRegisterInfo &RegInfo = F->getRegInfo(); 10095 unsigned TmpReg = (!BinOpcode) ? incr : 10096 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass 10097 : &PPC::GPRCRegClass); 10098 10099 // thisMBB: 10100 // ... 10101 // fallthrough --> loopMBB 10102 BB->addSuccessor(loopMBB); 10103 10104 // loopMBB: 10105 // l[wd]arx dest, ptr 10106 // add r0, dest, incr 10107 // st[wd]cx. r0, ptr 10108 // bne- loopMBB 10109 // fallthrough --> exitMBB 10110 10111 // For max/min... 10112 // loopMBB: 10113 // l[wd]arx dest, ptr 10114 // cmpl?[wd] incr, dest 10115 // bgt exitMBB 10116 // loop2MBB: 10117 // st[wd]cx. dest, ptr 10118 // bne- loopMBB 10119 // fallthrough --> exitMBB 10120 10121 BB = loopMBB; 10122 BuildMI(BB, dl, TII->get(LoadMnemonic), dest) 10123 .addReg(ptrA).addReg(ptrB); 10124 if (BinOpcode) 10125 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 10126 if (CmpOpcode) { 10127 // Signed comparisons of byte or halfword values must be sign-extended. 10128 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) { 10129 unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 10130 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH), 10131 ExtReg).addReg(dest); 10132 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) 10133 .addReg(incr).addReg(ExtReg); 10134 } else 10135 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) 10136 .addReg(incr).addReg(dest); 10137 10138 BuildMI(BB, dl, TII->get(PPC::BCC)) 10139 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); 10140 BB->addSuccessor(loop2MBB); 10141 BB->addSuccessor(exitMBB); 10142 BB = loop2MBB; 10143 } 10144 BuildMI(BB, dl, TII->get(StoreMnemonic)) 10145 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 10146 BuildMI(BB, dl, TII->get(PPC::BCC)) 10147 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 10148 BB->addSuccessor(loopMBB); 10149 BB->addSuccessor(exitMBB); 10150 10151 // exitMBB: 10152 // ... 10153 BB = exitMBB; 10154 return BB; 10155 } 10156 10157 MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( 10158 MachineInstr &MI, MachineBasicBlock *BB, 10159 bool is8bit, // operation 10160 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const { 10161 // If we support part-word atomic mnemonics, just use them 10162 if (Subtarget.hasPartwordAtomics()) 10163 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode, 10164 CmpPred); 10165 10166 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 10167 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 10168 // In 64 bit mode we have to use 64 bits for addresses, even though the 10169 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 10170 // registers without caring whether they're 32 or 64, but here we're 10171 // doing actual arithmetic on the addresses. 10172 bool is64bit = Subtarget.isPPC64(); 10173 bool isLittleEndian = Subtarget.isLittleEndian(); 10174 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 10175 10176 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 10177 MachineFunction *F = BB->getParent(); 10178 MachineFunction::iterator It = ++BB->getIterator(); 10179 10180 unsigned dest = MI.getOperand(0).getReg(); 10181 unsigned ptrA = MI.getOperand(1).getReg(); 10182 unsigned ptrB = MI.getOperand(2).getReg(); 10183 unsigned incr = MI.getOperand(3).getReg(); 10184 DebugLoc dl = MI.getDebugLoc(); 10185 10186 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 10187 MachineBasicBlock *loop2MBB = 10188 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; 10189 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 10190 F->insert(It, loopMBB); 10191 if (CmpOpcode) 10192 F->insert(It, loop2MBB); 10193 F->insert(It, exitMBB); 10194 exitMBB->splice(exitMBB->begin(), BB, 10195 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 10196 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 10197 10198 MachineRegisterInfo &RegInfo = F->getRegInfo(); 10199 const TargetRegisterClass *RC = 10200 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 10201 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; 10202 10203 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 10204 unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC); 10205 unsigned ShiftReg = 10206 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC); 10207 unsigned Incr2Reg = RegInfo.createVirtualRegister(GPRC); 10208 unsigned MaskReg = RegInfo.createVirtualRegister(GPRC); 10209 unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC); 10210 unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC); 10211 unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC); 10212 unsigned Tmp3Reg = RegInfo.createVirtualRegister(GPRC); 10213 unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC); 10214 unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC); 10215 unsigned Ptr1Reg; 10216 unsigned TmpReg = 10217 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC); 10218 10219 // thisMBB: 10220 // ... 10221 // fallthrough --> loopMBB 10222 BB->addSuccessor(loopMBB); 10223 10224 // The 4-byte load must be aligned, while a char or short may be 10225 // anywhere in the word. Hence all this nasty bookkeeping code. 10226 // add ptr1, ptrA, ptrB [copy if ptrA==0] 10227 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 10228 // xori shift, shift1, 24 [16] 10229 // rlwinm ptr, ptr1, 0, 0, 29 10230 // slw incr2, incr, shift 10231 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 10232 // slw mask, mask2, shift 10233 // loopMBB: 10234 // lwarx tmpDest, ptr 10235 // add tmp, tmpDest, incr2 10236 // andc tmp2, tmpDest, mask 10237 // and tmp3, tmp, mask 10238 // or tmp4, tmp3, tmp2 10239 // stwcx. tmp4, ptr 10240 // bne- loopMBB 10241 // fallthrough --> exitMBB 10242 // srw dest, tmpDest, shift 10243 if (ptrA != ZeroReg) { 10244 Ptr1Reg = RegInfo.createVirtualRegister(RC); 10245 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 10246 .addReg(ptrA) 10247 .addReg(ptrB); 10248 } else { 10249 Ptr1Reg = ptrB; 10250 } 10251 // We need use 32-bit subregister to avoid mismatch register class in 64-bit 10252 // mode. 10253 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg) 10254 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0) 10255 .addImm(3) 10256 .addImm(27) 10257 .addImm(is8bit ? 28 : 27); 10258 if (!isLittleEndian) 10259 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg) 10260 .addReg(Shift1Reg) 10261 .addImm(is8bit ? 24 : 16); 10262 if (is64bit) 10263 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 10264 .addReg(Ptr1Reg) 10265 .addImm(0) 10266 .addImm(61); 10267 else 10268 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 10269 .addReg(Ptr1Reg) 10270 .addImm(0) 10271 .addImm(0) 10272 .addImm(29); 10273 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg); 10274 if (is8bit) 10275 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 10276 else { 10277 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 10278 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 10279 .addReg(Mask3Reg) 10280 .addImm(65535); 10281 } 10282 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 10283 .addReg(Mask2Reg) 10284 .addReg(ShiftReg); 10285 10286 BB = loopMBB; 10287 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 10288 .addReg(ZeroReg) 10289 .addReg(PtrReg); 10290 if (BinOpcode) 10291 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 10292 .addReg(Incr2Reg) 10293 .addReg(TmpDestReg); 10294 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg) 10295 .addReg(TmpDestReg) 10296 .addReg(MaskReg); 10297 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg); 10298 if (CmpOpcode) { 10299 // For unsigned comparisons, we can directly compare the shifted values. 10300 // For signed comparisons we shift and sign extend. 10301 unsigned SReg = RegInfo.createVirtualRegister(GPRC); 10302 BuildMI(BB, dl, TII->get(PPC::AND), SReg) 10303 .addReg(TmpDestReg) 10304 .addReg(MaskReg); 10305 unsigned ValueReg = SReg; 10306 unsigned CmpReg = Incr2Reg; 10307 if (CmpOpcode == PPC::CMPW) { 10308 ValueReg = RegInfo.createVirtualRegister(GPRC); 10309 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) 10310 .addReg(SReg) 10311 .addReg(ShiftReg); 10312 unsigned ValueSReg = RegInfo.createVirtualRegister(GPRC); 10313 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg) 10314 .addReg(ValueReg); 10315 ValueReg = ValueSReg; 10316 CmpReg = incr; 10317 } 10318 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) 10319 .addReg(CmpReg) 10320 .addReg(ValueReg); 10321 BuildMI(BB, dl, TII->get(PPC::BCC)) 10322 .addImm(CmpPred) 10323 .addReg(PPC::CR0) 10324 .addMBB(exitMBB); 10325 BB->addSuccessor(loop2MBB); 10326 BB->addSuccessor(exitMBB); 10327 BB = loop2MBB; 10328 } 10329 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg); 10330 BuildMI(BB, dl, TII->get(PPC::STWCX)) 10331 .addReg(Tmp4Reg) 10332 .addReg(ZeroReg) 10333 .addReg(PtrReg); 10334 BuildMI(BB, dl, TII->get(PPC::BCC)) 10335 .addImm(PPC::PRED_NE) 10336 .addReg(PPC::CR0) 10337 .addMBB(loopMBB); 10338 BB->addSuccessor(loopMBB); 10339 BB->addSuccessor(exitMBB); 10340 10341 // exitMBB: 10342 // ... 10343 BB = exitMBB; 10344 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest) 10345 .addReg(TmpDestReg) 10346 .addReg(ShiftReg); 10347 return BB; 10348 } 10349 10350 llvm::MachineBasicBlock * 10351 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, 10352 MachineBasicBlock *MBB) const { 10353 DebugLoc DL = MI.getDebugLoc(); 10354 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 10355 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 10356 10357 MachineFunction *MF = MBB->getParent(); 10358 MachineRegisterInfo &MRI = MF->getRegInfo(); 10359 10360 const BasicBlock *BB = MBB->getBasicBlock(); 10361 MachineFunction::iterator I = ++MBB->getIterator(); 10362 10363 unsigned DstReg = MI.getOperand(0).getReg(); 10364 const TargetRegisterClass *RC = MRI.getRegClass(DstReg); 10365 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); 10366 unsigned mainDstReg = MRI.createVirtualRegister(RC); 10367 unsigned restoreDstReg = MRI.createVirtualRegister(RC); 10368 10369 MVT PVT = getPointerTy(MF->getDataLayout()); 10370 assert((PVT == MVT::i64 || PVT == MVT::i32) && 10371 "Invalid Pointer Size!"); 10372 // For v = setjmp(buf), we generate 10373 // 10374 // thisMBB: 10375 // SjLjSetup mainMBB 10376 // bl mainMBB 10377 // v_restore = 1 10378 // b sinkMBB 10379 // 10380 // mainMBB: 10381 // buf[LabelOffset] = LR 10382 // v_main = 0 10383 // 10384 // sinkMBB: 10385 // v = phi(main, restore) 10386 // 10387 10388 MachineBasicBlock *thisMBB = MBB; 10389 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); 10390 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); 10391 MF->insert(I, mainMBB); 10392 MF->insert(I, sinkMBB); 10393 10394 MachineInstrBuilder MIB; 10395 10396 // Transfer the remainder of BB and its successor edges to sinkMBB. 10397 sinkMBB->splice(sinkMBB->begin(), MBB, 10398 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 10399 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 10400 10401 // Note that the structure of the jmp_buf used here is not compatible 10402 // with that used by libc, and is not designed to be. Specifically, it 10403 // stores only those 'reserved' registers that LLVM does not otherwise 10404 // understand how to spill. Also, by convention, by the time this 10405 // intrinsic is called, Clang has already stored the frame address in the 10406 // first slot of the buffer and stack address in the third. Following the 10407 // X86 target code, we'll store the jump address in the second slot. We also 10408 // need to save the TOC pointer (R2) to handle jumps between shared 10409 // libraries, and that will be stored in the fourth slot. The thread 10410 // identifier (R13) is not affected. 10411 10412 // thisMBB: 10413 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 10414 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 10415 const int64_t BPOffset = 4 * PVT.getStoreSize(); 10416 10417 // Prepare IP either in reg. 10418 const TargetRegisterClass *PtrRC = getRegClassFor(PVT); 10419 unsigned LabelReg = MRI.createVirtualRegister(PtrRC); 10420 unsigned BufReg = MI.getOperand(1).getReg(); 10421 10422 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { 10423 setUsesTOCBasePtr(*MBB->getParent()); 10424 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) 10425 .addReg(PPC::X2) 10426 .addImm(TOCOffset) 10427 .addReg(BufReg) 10428 .cloneMemRefs(MI); 10429 } 10430 10431 // Naked functions never have a base pointer, and so we use r1. For all 10432 // other functions, this decision must be delayed until during PEI. 10433 unsigned BaseReg; 10434 if (MF->getFunction().hasFnAttribute(Attribute::Naked)) 10435 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; 10436 else 10437 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; 10438 10439 MIB = BuildMI(*thisMBB, MI, DL, 10440 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) 10441 .addReg(BaseReg) 10442 .addImm(BPOffset) 10443 .addReg(BufReg) 10444 .cloneMemRefs(MI); 10445 10446 // Setup 10447 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); 10448 MIB.addRegMask(TRI->getNoPreservedMask()); 10449 10450 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); 10451 10452 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) 10453 .addMBB(mainMBB); 10454 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); 10455 10456 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero()); 10457 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne()); 10458 10459 // mainMBB: 10460 // mainDstReg = 0 10461 MIB = 10462 BuildMI(mainMBB, DL, 10463 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); 10464 10465 // Store IP 10466 if (Subtarget.isPPC64()) { 10467 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) 10468 .addReg(LabelReg) 10469 .addImm(LabelOffset) 10470 .addReg(BufReg); 10471 } else { 10472 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) 10473 .addReg(LabelReg) 10474 .addImm(LabelOffset) 10475 .addReg(BufReg); 10476 } 10477 MIB.cloneMemRefs(MI); 10478 10479 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); 10480 mainMBB->addSuccessor(sinkMBB); 10481 10482 // sinkMBB: 10483 BuildMI(*sinkMBB, sinkMBB->begin(), DL, 10484 TII->get(PPC::PHI), DstReg) 10485 .addReg(mainDstReg).addMBB(mainMBB) 10486 .addReg(restoreDstReg).addMBB(thisMBB); 10487 10488 MI.eraseFromParent(); 10489 return sinkMBB; 10490 } 10491 10492 MachineBasicBlock * 10493 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, 10494 MachineBasicBlock *MBB) const { 10495 DebugLoc DL = MI.getDebugLoc(); 10496 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 10497 10498 MachineFunction *MF = MBB->getParent(); 10499 MachineRegisterInfo &MRI = MF->getRegInfo(); 10500 10501 MVT PVT = getPointerTy(MF->getDataLayout()); 10502 assert((PVT == MVT::i64 || PVT == MVT::i32) && 10503 "Invalid Pointer Size!"); 10504 10505 const TargetRegisterClass *RC = 10506 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 10507 unsigned Tmp = MRI.createVirtualRegister(RC); 10508 // Since FP is only updated here but NOT referenced, it's treated as GPR. 10509 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; 10510 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; 10511 unsigned BP = 10512 (PVT == MVT::i64) 10513 ? PPC::X30 10514 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29 10515 : PPC::R30); 10516 10517 MachineInstrBuilder MIB; 10518 10519 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 10520 const int64_t SPOffset = 2 * PVT.getStoreSize(); 10521 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 10522 const int64_t BPOffset = 4 * PVT.getStoreSize(); 10523 10524 unsigned BufReg = MI.getOperand(0).getReg(); 10525 10526 // Reload FP (the jumped-to function may not have had a 10527 // frame pointer, and if so, then its r31 will be restored 10528 // as necessary). 10529 if (PVT == MVT::i64) { 10530 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) 10531 .addImm(0) 10532 .addReg(BufReg); 10533 } else { 10534 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) 10535 .addImm(0) 10536 .addReg(BufReg); 10537 } 10538 MIB.cloneMemRefs(MI); 10539 10540 // Reload IP 10541 if (PVT == MVT::i64) { 10542 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) 10543 .addImm(LabelOffset) 10544 .addReg(BufReg); 10545 } else { 10546 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) 10547 .addImm(LabelOffset) 10548 .addReg(BufReg); 10549 } 10550 MIB.cloneMemRefs(MI); 10551 10552 // Reload SP 10553 if (PVT == MVT::i64) { 10554 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) 10555 .addImm(SPOffset) 10556 .addReg(BufReg); 10557 } else { 10558 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) 10559 .addImm(SPOffset) 10560 .addReg(BufReg); 10561 } 10562 MIB.cloneMemRefs(MI); 10563 10564 // Reload BP 10565 if (PVT == MVT::i64) { 10566 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) 10567 .addImm(BPOffset) 10568 .addReg(BufReg); 10569 } else { 10570 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) 10571 .addImm(BPOffset) 10572 .addReg(BufReg); 10573 } 10574 MIB.cloneMemRefs(MI); 10575 10576 // Reload TOC 10577 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { 10578 setUsesTOCBasePtr(*MBB->getParent()); 10579 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) 10580 .addImm(TOCOffset) 10581 .addReg(BufReg) 10582 .cloneMemRefs(MI); 10583 } 10584 10585 // Jump 10586 BuildMI(*MBB, MI, DL, 10587 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); 10588 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); 10589 10590 MI.eraseFromParent(); 10591 return MBB; 10592 } 10593 10594 MachineBasicBlock * 10595 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 10596 MachineBasicBlock *BB) const { 10597 if (MI.getOpcode() == TargetOpcode::STACKMAP || 10598 MI.getOpcode() == TargetOpcode::PATCHPOINT) { 10599 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() && 10600 MI.getOpcode() == TargetOpcode::PATCHPOINT) { 10601 // Call lowering should have added an r2 operand to indicate a dependence 10602 // on the TOC base pointer value. It can't however, because there is no 10603 // way to mark the dependence as implicit there, and so the stackmap code 10604 // will confuse it with a regular operand. Instead, add the dependence 10605 // here. 10606 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true)); 10607 } 10608 10609 return emitPatchPoint(MI, BB); 10610 } 10611 10612 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 || 10613 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) { 10614 return emitEHSjLjSetJmp(MI, BB); 10615 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 || 10616 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) { 10617 return emitEHSjLjLongJmp(MI, BB); 10618 } 10619 10620 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 10621 10622 // To "insert" these instructions we actually have to insert their 10623 // control-flow patterns. 10624 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 10625 MachineFunction::iterator It = ++BB->getIterator(); 10626 10627 MachineFunction *F = BB->getParent(); 10628 10629 if (MI.getOpcode() == PPC::SELECT_CC_I4 || 10630 MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 || 10631 MI.getOpcode() == PPC::SELECT_I8) { 10632 SmallVector<MachineOperand, 2> Cond; 10633 if (MI.getOpcode() == PPC::SELECT_CC_I4 || 10634 MI.getOpcode() == PPC::SELECT_CC_I8) 10635 Cond.push_back(MI.getOperand(4)); 10636 else 10637 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); 10638 Cond.push_back(MI.getOperand(1)); 10639 10640 DebugLoc dl = MI.getDebugLoc(); 10641 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond, 10642 MI.getOperand(2).getReg(), MI.getOperand(3).getReg()); 10643 } else if (MI.getOpcode() == PPC::SELECT_CC_I4 || 10644 MI.getOpcode() == PPC::SELECT_CC_I8 || 10645 MI.getOpcode() == PPC::SELECT_CC_F4 || 10646 MI.getOpcode() == PPC::SELECT_CC_F8 || 10647 MI.getOpcode() == PPC::SELECT_CC_F16 || 10648 MI.getOpcode() == PPC::SELECT_CC_QFRC || 10649 MI.getOpcode() == PPC::SELECT_CC_QSRC || 10650 MI.getOpcode() == PPC::SELECT_CC_QBRC || 10651 MI.getOpcode() == PPC::SELECT_CC_VRRC || 10652 MI.getOpcode() == PPC::SELECT_CC_VSFRC || 10653 MI.getOpcode() == PPC::SELECT_CC_VSSRC || 10654 MI.getOpcode() == PPC::SELECT_CC_VSRC || 10655 MI.getOpcode() == PPC::SELECT_CC_SPE4 || 10656 MI.getOpcode() == PPC::SELECT_CC_SPE || 10657 MI.getOpcode() == PPC::SELECT_I4 || 10658 MI.getOpcode() == PPC::SELECT_I8 || 10659 MI.getOpcode() == PPC::SELECT_F4 || 10660 MI.getOpcode() == PPC::SELECT_F8 || 10661 MI.getOpcode() == PPC::SELECT_F16 || 10662 MI.getOpcode() == PPC::SELECT_QFRC || 10663 MI.getOpcode() == PPC::SELECT_QSRC || 10664 MI.getOpcode() == PPC::SELECT_QBRC || 10665 MI.getOpcode() == PPC::SELECT_SPE || 10666 MI.getOpcode() == PPC::SELECT_SPE4 || 10667 MI.getOpcode() == PPC::SELECT_VRRC || 10668 MI.getOpcode() == PPC::SELECT_VSFRC || 10669 MI.getOpcode() == PPC::SELECT_VSSRC || 10670 MI.getOpcode() == PPC::SELECT_VSRC) { 10671 // The incoming instruction knows the destination vreg to set, the 10672 // condition code register to branch on, the true/false values to 10673 // select between, and a branch opcode to use. 10674 10675 // thisMBB: 10676 // ... 10677 // TrueVal = ... 10678 // cmpTY ccX, r1, r2 10679 // bCC copy1MBB 10680 // fallthrough --> copy0MBB 10681 MachineBasicBlock *thisMBB = BB; 10682 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 10683 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 10684 DebugLoc dl = MI.getDebugLoc(); 10685 F->insert(It, copy0MBB); 10686 F->insert(It, sinkMBB); 10687 10688 // Transfer the remainder of BB and its successor edges to sinkMBB. 10689 sinkMBB->splice(sinkMBB->begin(), BB, 10690 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 10691 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 10692 10693 // Next, add the true and fallthrough blocks as its successors. 10694 BB->addSuccessor(copy0MBB); 10695 BB->addSuccessor(sinkMBB); 10696 10697 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 || 10698 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || 10699 MI.getOpcode() == PPC::SELECT_F16 || 10700 MI.getOpcode() == PPC::SELECT_SPE4 || 10701 MI.getOpcode() == PPC::SELECT_SPE || 10702 MI.getOpcode() == PPC::SELECT_QFRC || 10703 MI.getOpcode() == PPC::SELECT_QSRC || 10704 MI.getOpcode() == PPC::SELECT_QBRC || 10705 MI.getOpcode() == PPC::SELECT_VRRC || 10706 MI.getOpcode() == PPC::SELECT_VSFRC || 10707 MI.getOpcode() == PPC::SELECT_VSSRC || 10708 MI.getOpcode() == PPC::SELECT_VSRC) { 10709 BuildMI(BB, dl, TII->get(PPC::BC)) 10710 .addReg(MI.getOperand(1).getReg()) 10711 .addMBB(sinkMBB); 10712 } else { 10713 unsigned SelectPred = MI.getOperand(4).getImm(); 10714 BuildMI(BB, dl, TII->get(PPC::BCC)) 10715 .addImm(SelectPred) 10716 .addReg(MI.getOperand(1).getReg()) 10717 .addMBB(sinkMBB); 10718 } 10719 10720 // copy0MBB: 10721 // %FalseValue = ... 10722 // # fallthrough to sinkMBB 10723 BB = copy0MBB; 10724 10725 // Update machine-CFG edges 10726 BB->addSuccessor(sinkMBB); 10727 10728 // sinkMBB: 10729 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 10730 // ... 10731 BB = sinkMBB; 10732 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg()) 10733 .addReg(MI.getOperand(3).getReg()) 10734 .addMBB(copy0MBB) 10735 .addReg(MI.getOperand(2).getReg()) 10736 .addMBB(thisMBB); 10737 } else if (MI.getOpcode() == PPC::ReadTB) { 10738 // To read the 64-bit time-base register on a 32-bit target, we read the 10739 // two halves. Should the counter have wrapped while it was being read, we 10740 // need to try again. 10741 // ... 10742 // readLoop: 10743 // mfspr Rx,TBU # load from TBU 10744 // mfspr Ry,TB # load from TB 10745 // mfspr Rz,TBU # load from TBU 10746 // cmpw crX,Rx,Rz # check if 'old'='new' 10747 // bne readLoop # branch if they're not equal 10748 // ... 10749 10750 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); 10751 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 10752 DebugLoc dl = MI.getDebugLoc(); 10753 F->insert(It, readMBB); 10754 F->insert(It, sinkMBB); 10755 10756 // Transfer the remainder of BB and its successor edges to sinkMBB. 10757 sinkMBB->splice(sinkMBB->begin(), BB, 10758 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 10759 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 10760 10761 BB->addSuccessor(readMBB); 10762 BB = readMBB; 10763 10764 MachineRegisterInfo &RegInfo = F->getRegInfo(); 10765 unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 10766 unsigned LoReg = MI.getOperand(0).getReg(); 10767 unsigned HiReg = MI.getOperand(1).getReg(); 10768 10769 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); 10770 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); 10771 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); 10772 10773 unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 10774 10775 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) 10776 .addReg(HiReg) 10777 .addReg(ReadAgainReg); 10778 BuildMI(BB, dl, TII->get(PPC::BCC)) 10779 .addImm(PPC::PRED_NE) 10780 .addReg(CmpReg) 10781 .addMBB(readMBB); 10782 10783 BB->addSuccessor(readMBB); 10784 BB->addSuccessor(sinkMBB); 10785 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 10786 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 10787 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 10788 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 10789 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 10790 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); 10791 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 10792 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); 10793 10794 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 10795 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 10796 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 10797 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 10798 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 10799 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); 10800 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 10801 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); 10802 10803 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 10804 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 10805 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 10806 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 10807 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 10808 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); 10809 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 10810 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); 10811 10812 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 10813 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 10814 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 10815 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 10816 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 10817 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); 10818 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 10819 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); 10820 10821 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 10822 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); 10823 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 10824 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); 10825 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 10826 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); 10827 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 10828 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); 10829 10830 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 10831 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 10832 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 10833 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 10834 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 10835 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); 10836 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 10837 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); 10838 10839 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8) 10840 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE); 10841 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16) 10842 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE); 10843 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32) 10844 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE); 10845 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64) 10846 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE); 10847 10848 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8) 10849 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE); 10850 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16) 10851 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE); 10852 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32) 10853 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE); 10854 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64) 10855 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE); 10856 10857 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8) 10858 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE); 10859 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16) 10860 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE); 10861 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32) 10862 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE); 10863 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64) 10864 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE); 10865 10866 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8) 10867 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE); 10868 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16) 10869 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE); 10870 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32) 10871 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE); 10872 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64) 10873 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE); 10874 10875 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8) 10876 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 10877 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16) 10878 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 10879 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32) 10880 BB = EmitAtomicBinary(MI, BB, 4, 0); 10881 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64) 10882 BB = EmitAtomicBinary(MI, BB, 8, 0); 10883 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 10884 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || 10885 (Subtarget.hasPartwordAtomics() && 10886 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || 10887 (Subtarget.hasPartwordAtomics() && 10888 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { 10889 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 10890 10891 auto LoadMnemonic = PPC::LDARX; 10892 auto StoreMnemonic = PPC::STDCX; 10893 switch (MI.getOpcode()) { 10894 default: 10895 llvm_unreachable("Compare and swap of unknown size"); 10896 case PPC::ATOMIC_CMP_SWAP_I8: 10897 LoadMnemonic = PPC::LBARX; 10898 StoreMnemonic = PPC::STBCX; 10899 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); 10900 break; 10901 case PPC::ATOMIC_CMP_SWAP_I16: 10902 LoadMnemonic = PPC::LHARX; 10903 StoreMnemonic = PPC::STHCX; 10904 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); 10905 break; 10906 case PPC::ATOMIC_CMP_SWAP_I32: 10907 LoadMnemonic = PPC::LWARX; 10908 StoreMnemonic = PPC::STWCX; 10909 break; 10910 case PPC::ATOMIC_CMP_SWAP_I64: 10911 LoadMnemonic = PPC::LDARX; 10912 StoreMnemonic = PPC::STDCX; 10913 break; 10914 } 10915 unsigned dest = MI.getOperand(0).getReg(); 10916 unsigned ptrA = MI.getOperand(1).getReg(); 10917 unsigned ptrB = MI.getOperand(2).getReg(); 10918 unsigned oldval = MI.getOperand(3).getReg(); 10919 unsigned newval = MI.getOperand(4).getReg(); 10920 DebugLoc dl = MI.getDebugLoc(); 10921 10922 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 10923 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 10924 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 10925 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 10926 F->insert(It, loop1MBB); 10927 F->insert(It, loop2MBB); 10928 F->insert(It, midMBB); 10929 F->insert(It, exitMBB); 10930 exitMBB->splice(exitMBB->begin(), BB, 10931 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 10932 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 10933 10934 // thisMBB: 10935 // ... 10936 // fallthrough --> loopMBB 10937 BB->addSuccessor(loop1MBB); 10938 10939 // loop1MBB: 10940 // l[bhwd]arx dest, ptr 10941 // cmp[wd] dest, oldval 10942 // bne- midMBB 10943 // loop2MBB: 10944 // st[bhwd]cx. newval, ptr 10945 // bne- loopMBB 10946 // b exitBB 10947 // midMBB: 10948 // st[bhwd]cx. dest, ptr 10949 // exitBB: 10950 BB = loop1MBB; 10951 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB); 10952 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 10953 .addReg(oldval) 10954 .addReg(dest); 10955 BuildMI(BB, dl, TII->get(PPC::BCC)) 10956 .addImm(PPC::PRED_NE) 10957 .addReg(PPC::CR0) 10958 .addMBB(midMBB); 10959 BB->addSuccessor(loop2MBB); 10960 BB->addSuccessor(midMBB); 10961 10962 BB = loop2MBB; 10963 BuildMI(BB, dl, TII->get(StoreMnemonic)) 10964 .addReg(newval) 10965 .addReg(ptrA) 10966 .addReg(ptrB); 10967 BuildMI(BB, dl, TII->get(PPC::BCC)) 10968 .addImm(PPC::PRED_NE) 10969 .addReg(PPC::CR0) 10970 .addMBB(loop1MBB); 10971 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 10972 BB->addSuccessor(loop1MBB); 10973 BB->addSuccessor(exitMBB); 10974 10975 BB = midMBB; 10976 BuildMI(BB, dl, TII->get(StoreMnemonic)) 10977 .addReg(dest) 10978 .addReg(ptrA) 10979 .addReg(ptrB); 10980 BB->addSuccessor(exitMBB); 10981 10982 // exitMBB: 10983 // ... 10984 BB = exitMBB; 10985 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 10986 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 10987 // We must use 64-bit registers for addresses when targeting 64-bit, 10988 // since we're actually doing arithmetic on them. Other registers 10989 // can be 32-bit. 10990 bool is64bit = Subtarget.isPPC64(); 10991 bool isLittleEndian = Subtarget.isLittleEndian(); 10992 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 10993 10994 unsigned dest = MI.getOperand(0).getReg(); 10995 unsigned ptrA = MI.getOperand(1).getReg(); 10996 unsigned ptrB = MI.getOperand(2).getReg(); 10997 unsigned oldval = MI.getOperand(3).getReg(); 10998 unsigned newval = MI.getOperand(4).getReg(); 10999 DebugLoc dl = MI.getDebugLoc(); 11000 11001 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 11002 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 11003 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 11004 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 11005 F->insert(It, loop1MBB); 11006 F->insert(It, loop2MBB); 11007 F->insert(It, midMBB); 11008 F->insert(It, exitMBB); 11009 exitMBB->splice(exitMBB->begin(), BB, 11010 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 11011 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 11012 11013 MachineRegisterInfo &RegInfo = F->getRegInfo(); 11014 const TargetRegisterClass *RC = 11015 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 11016 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; 11017 11018 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 11019 unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC); 11020 unsigned ShiftReg = 11021 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC); 11022 unsigned NewVal2Reg = RegInfo.createVirtualRegister(GPRC); 11023 unsigned NewVal3Reg = RegInfo.createVirtualRegister(GPRC); 11024 unsigned OldVal2Reg = RegInfo.createVirtualRegister(GPRC); 11025 unsigned OldVal3Reg = RegInfo.createVirtualRegister(GPRC); 11026 unsigned MaskReg = RegInfo.createVirtualRegister(GPRC); 11027 unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC); 11028 unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC); 11029 unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC); 11030 unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC); 11031 unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC); 11032 unsigned Ptr1Reg; 11033 unsigned TmpReg = RegInfo.createVirtualRegister(GPRC); 11034 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 11035 // thisMBB: 11036 // ... 11037 // fallthrough --> loopMBB 11038 BB->addSuccessor(loop1MBB); 11039 11040 // The 4-byte load must be aligned, while a char or short may be 11041 // anywhere in the word. Hence all this nasty bookkeeping code. 11042 // add ptr1, ptrA, ptrB [copy if ptrA==0] 11043 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 11044 // xori shift, shift1, 24 [16] 11045 // rlwinm ptr, ptr1, 0, 0, 29 11046 // slw newval2, newval, shift 11047 // slw oldval2, oldval,shift 11048 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 11049 // slw mask, mask2, shift 11050 // and newval3, newval2, mask 11051 // and oldval3, oldval2, mask 11052 // loop1MBB: 11053 // lwarx tmpDest, ptr 11054 // and tmp, tmpDest, mask 11055 // cmpw tmp, oldval3 11056 // bne- midMBB 11057 // loop2MBB: 11058 // andc tmp2, tmpDest, mask 11059 // or tmp4, tmp2, newval3 11060 // stwcx. tmp4, ptr 11061 // bne- loop1MBB 11062 // b exitBB 11063 // midMBB: 11064 // stwcx. tmpDest, ptr 11065 // exitBB: 11066 // srw dest, tmpDest, shift 11067 if (ptrA != ZeroReg) { 11068 Ptr1Reg = RegInfo.createVirtualRegister(RC); 11069 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 11070 .addReg(ptrA) 11071 .addReg(ptrB); 11072 } else { 11073 Ptr1Reg = ptrB; 11074 } 11075 11076 // We need use 32-bit subregister to avoid mismatch register class in 64-bit 11077 // mode. 11078 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg) 11079 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0) 11080 .addImm(3) 11081 .addImm(27) 11082 .addImm(is8bit ? 28 : 27); 11083 if (!isLittleEndian) 11084 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg) 11085 .addReg(Shift1Reg) 11086 .addImm(is8bit ? 24 : 16); 11087 if (is64bit) 11088 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 11089 .addReg(Ptr1Reg) 11090 .addImm(0) 11091 .addImm(61); 11092 else 11093 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 11094 .addReg(Ptr1Reg) 11095 .addImm(0) 11096 .addImm(0) 11097 .addImm(29); 11098 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 11099 .addReg(newval) 11100 .addReg(ShiftReg); 11101 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 11102 .addReg(oldval) 11103 .addReg(ShiftReg); 11104 if (is8bit) 11105 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 11106 else { 11107 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 11108 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 11109 .addReg(Mask3Reg) 11110 .addImm(65535); 11111 } 11112 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 11113 .addReg(Mask2Reg) 11114 .addReg(ShiftReg); 11115 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 11116 .addReg(NewVal2Reg) 11117 .addReg(MaskReg); 11118 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 11119 .addReg(OldVal2Reg) 11120 .addReg(MaskReg); 11121 11122 BB = loop1MBB; 11123 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 11124 .addReg(ZeroReg) 11125 .addReg(PtrReg); 11126 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg) 11127 .addReg(TmpDestReg) 11128 .addReg(MaskReg); 11129 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 11130 .addReg(TmpReg) 11131 .addReg(OldVal3Reg); 11132 BuildMI(BB, dl, TII->get(PPC::BCC)) 11133 .addImm(PPC::PRED_NE) 11134 .addReg(PPC::CR0) 11135 .addMBB(midMBB); 11136 BB->addSuccessor(loop2MBB); 11137 BB->addSuccessor(midMBB); 11138 11139 BB = loop2MBB; 11140 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg) 11141 .addReg(TmpDestReg) 11142 .addReg(MaskReg); 11143 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg) 11144 .addReg(Tmp2Reg) 11145 .addReg(NewVal3Reg); 11146 BuildMI(BB, dl, TII->get(PPC::STWCX)) 11147 .addReg(Tmp4Reg) 11148 .addReg(ZeroReg) 11149 .addReg(PtrReg); 11150 BuildMI(BB, dl, TII->get(PPC::BCC)) 11151 .addImm(PPC::PRED_NE) 11152 .addReg(PPC::CR0) 11153 .addMBB(loop1MBB); 11154 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 11155 BB->addSuccessor(loop1MBB); 11156 BB->addSuccessor(exitMBB); 11157 11158 BB = midMBB; 11159 BuildMI(BB, dl, TII->get(PPC::STWCX)) 11160 .addReg(TmpDestReg) 11161 .addReg(ZeroReg) 11162 .addReg(PtrReg); 11163 BB->addSuccessor(exitMBB); 11164 11165 // exitMBB: 11166 // ... 11167 BB = exitMBB; 11168 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest) 11169 .addReg(TmpReg) 11170 .addReg(ShiftReg); 11171 } else if (MI.getOpcode() == PPC::FADDrtz) { 11172 // This pseudo performs an FADD with rounding mode temporarily forced 11173 // to round-to-zero. We emit this via custom inserter since the FPSCR 11174 // is not modeled at the SelectionDAG level. 11175 unsigned Dest = MI.getOperand(0).getReg(); 11176 unsigned Src1 = MI.getOperand(1).getReg(); 11177 unsigned Src2 = MI.getOperand(2).getReg(); 11178 DebugLoc dl = MI.getDebugLoc(); 11179 11180 MachineRegisterInfo &RegInfo = F->getRegInfo(); 11181 unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 11182 11183 // Save FPSCR value. 11184 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); 11185 11186 // Set rounding mode to round-to-zero. 11187 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); 11188 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); 11189 11190 // Perform addition. 11191 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); 11192 11193 // Restore FPSCR value. 11194 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); 11195 } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || 11196 MI.getOpcode() == PPC::ANDIo_1_GT_BIT || 11197 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 11198 MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) { 11199 unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 11200 MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) 11201 ? PPC::ANDIo8 11202 : PPC::ANDIo; 11203 bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || 11204 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8); 11205 11206 MachineRegisterInfo &RegInfo = F->getRegInfo(); 11207 unsigned Dest = RegInfo.createVirtualRegister( 11208 Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass); 11209 11210 DebugLoc dl = MI.getDebugLoc(); 11211 BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) 11212 .addReg(MI.getOperand(1).getReg()) 11213 .addImm(1); 11214 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), 11215 MI.getOperand(0).getReg()) 11216 .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); 11217 } else if (MI.getOpcode() == PPC::TCHECK_RET) { 11218 DebugLoc Dl = MI.getDebugLoc(); 11219 MachineRegisterInfo &RegInfo = F->getRegInfo(); 11220 unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 11221 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); 11222 return BB; 11223 } else if (MI.getOpcode() == PPC::SETRNDi) { 11224 DebugLoc dl = MI.getDebugLoc(); 11225 unsigned OldFPSCRReg = MI.getOperand(0).getReg(); 11226 11227 // Save FPSCR value. 11228 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); 11229 11230 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has 11231 // the following settings: 11232 // 00 Round to nearest 11233 // 01 Round to 0 11234 // 10 Round to +inf 11235 // 11 Round to -inf 11236 11237 // When the operand is immediate, using the two least significant bits of 11238 // the immediate to set the bits 62:63 of FPSCR. 11239 unsigned Mode = MI.getOperand(1).getImm(); 11240 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0)) 11241 .addImm(31); 11242 11243 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0)) 11244 .addImm(30); 11245 } else if (MI.getOpcode() == PPC::SETRND) { 11246 DebugLoc dl = MI.getDebugLoc(); 11247 11248 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg 11249 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg. 11250 // If the target doesn't have DirectMove, we should use stack to do the 11251 // conversion, because the target doesn't have the instructions like mtvsrd 11252 // or mfvsrd to do this conversion directly. 11253 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) { 11254 if (Subtarget.hasDirectMove()) { 11255 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg) 11256 .addReg(SrcReg); 11257 } else { 11258 // Use stack to do the register copy. 11259 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD; 11260 MachineRegisterInfo &RegInfo = F->getRegInfo(); 11261 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg); 11262 if (RC == &PPC::F8RCRegClass) { 11263 // Copy register from F8RCRegClass to G8RCRegclass. 11264 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && 11265 "Unsupported RegClass."); 11266 11267 StoreOp = PPC::STFD; 11268 LoadOp = PPC::LD; 11269 } else { 11270 // Copy register from G8RCRegClass to F8RCRegclass. 11271 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && 11272 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && 11273 "Unsupported RegClass."); 11274 } 11275 11276 MachineFrameInfo &MFI = F->getFrameInfo(); 11277 int FrameIdx = MFI.CreateStackObject(8, 8, false); 11278 11279 MachineMemOperand *MMOStore = F->getMachineMemOperand( 11280 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0), 11281 MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx), 11282 MFI.getObjectAlignment(FrameIdx)); 11283 11284 // Store the SrcReg into the stack. 11285 BuildMI(*BB, MI, dl, TII->get(StoreOp)) 11286 .addReg(SrcReg) 11287 .addImm(0) 11288 .addFrameIndex(FrameIdx) 11289 .addMemOperand(MMOStore); 11290 11291 MachineMemOperand *MMOLoad = F->getMachineMemOperand( 11292 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0), 11293 MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx), 11294 MFI.getObjectAlignment(FrameIdx)); 11295 11296 // Load from the stack where SrcReg is stored, and save to DestReg, 11297 // so we have done the RegClass conversion from RegClass::SrcReg to 11298 // RegClass::DestReg. 11299 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg) 11300 .addImm(0) 11301 .addFrameIndex(FrameIdx) 11302 .addMemOperand(MMOLoad); 11303 } 11304 }; 11305 11306 unsigned OldFPSCRReg = MI.getOperand(0).getReg(); 11307 11308 // Save FPSCR value. 11309 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); 11310 11311 // When the operand is gprc register, use two least significant bits of the 11312 // register and mtfsf instruction to set the bits 62:63 of FPSCR. 11313 // 11314 // copy OldFPSCRTmpReg, OldFPSCRReg 11315 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1) 11316 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62 11317 // copy NewFPSCRReg, NewFPSCRTmpReg 11318 // mtfsf 255, NewFPSCRReg 11319 MachineOperand SrcOp = MI.getOperand(1); 11320 MachineRegisterInfo &RegInfo = F->getRegInfo(); 11321 unsigned OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); 11322 11323 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg); 11324 11325 unsigned ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); 11326 unsigned ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); 11327 11328 // The first operand of INSERT_SUBREG should be a register which has 11329 // subregisters, we only care about its RegClass, so we should use an 11330 // IMPLICIT_DEF register. 11331 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg); 11332 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg) 11333 .addReg(ImDefReg) 11334 .add(SrcOp) 11335 .addImm(1); 11336 11337 unsigned NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); 11338 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg) 11339 .addReg(OldFPSCRTmpReg) 11340 .addReg(ExtSrcReg) 11341 .addImm(0) 11342 .addImm(62); 11343 11344 unsigned NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 11345 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg); 11346 11347 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63 11348 // bits of FPSCR. 11349 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)) 11350 .addImm(255) 11351 .addReg(NewFPSCRReg) 11352 .addImm(0) 11353 .addImm(0); 11354 } else { 11355 llvm_unreachable("Unexpected instr type to insert"); 11356 } 11357 11358 MI.eraseFromParent(); // The pseudo instruction is gone now. 11359 return BB; 11360 } 11361 11362 //===----------------------------------------------------------------------===// 11363 // Target Optimization Hooks 11364 //===----------------------------------------------------------------------===// 11365 11366 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) { 11367 // For the estimates, convergence is quadratic, so we essentially double the 11368 // number of digits correct after every iteration. For both FRE and FRSQRTE, 11369 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), 11370 // this is 2^-14. IEEE float has 23 digits and double has 52 digits. 11371 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; 11372 if (VT.getScalarType() == MVT::f64) 11373 RefinementSteps++; 11374 return RefinementSteps; 11375 } 11376 11377 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, 11378 int Enabled, int &RefinementSteps, 11379 bool &UseOneConstNR, 11380 bool Reciprocal) const { 11381 EVT VT = Operand.getValueType(); 11382 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || 11383 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || 11384 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 11385 (VT == MVT::v2f64 && Subtarget.hasVSX()) || 11386 (VT == MVT::v4f32 && Subtarget.hasQPX()) || 11387 (VT == MVT::v4f64 && Subtarget.hasQPX())) { 11388 if (RefinementSteps == ReciprocalEstimate::Unspecified) 11389 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); 11390 11391 // The Newton-Raphson computation with a single constant does not provide 11392 // enough accuracy on some CPUs. 11393 UseOneConstNR = !Subtarget.needsTwoConstNR(); 11394 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); 11395 } 11396 return SDValue(); 11397 } 11398 11399 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG, 11400 int Enabled, 11401 int &RefinementSteps) const { 11402 EVT VT = Operand.getValueType(); 11403 if ((VT == MVT::f32 && Subtarget.hasFRES()) || 11404 (VT == MVT::f64 && Subtarget.hasFRE()) || 11405 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 11406 (VT == MVT::v2f64 && Subtarget.hasVSX()) || 11407 (VT == MVT::v4f32 && Subtarget.hasQPX()) || 11408 (VT == MVT::v4f64 && Subtarget.hasQPX())) { 11409 if (RefinementSteps == ReciprocalEstimate::Unspecified) 11410 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); 11411 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); 11412 } 11413 return SDValue(); 11414 } 11415 11416 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const { 11417 // Note: This functionality is used only when unsafe-fp-math is enabled, and 11418 // on cores with reciprocal estimates (which are used when unsafe-fp-math is 11419 // enabled for division), this functionality is redundant with the default 11420 // combiner logic (once the division -> reciprocal/multiply transformation 11421 // has taken place). As a result, this matters more for older cores than for 11422 // newer ones. 11423 11424 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 11425 // reciprocal if there are two or more FDIVs (for embedded cores with only 11426 // one FP pipeline) for three or more FDIVs (for generic OOO cores). 11427 switch (Subtarget.getDarwinDirective()) { 11428 default: 11429 return 3; 11430 case PPC::DIR_440: 11431 case PPC::DIR_A2: 11432 case PPC::DIR_E500: 11433 case PPC::DIR_E500mc: 11434 case PPC::DIR_E5500: 11435 return 2; 11436 } 11437 } 11438 11439 // isConsecutiveLSLoc needs to work even if all adds have not yet been 11440 // collapsed, and so we need to look through chains of them. 11441 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, 11442 int64_t& Offset, SelectionDAG &DAG) { 11443 if (DAG.isBaseWithConstantOffset(Loc)) { 11444 Base = Loc.getOperand(0); 11445 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue(); 11446 11447 // The base might itself be a base plus an offset, and if so, accumulate 11448 // that as well. 11449 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG); 11450 } 11451 } 11452 11453 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, 11454 unsigned Bytes, int Dist, 11455 SelectionDAG &DAG) { 11456 if (VT.getSizeInBits() / 8 != Bytes) 11457 return false; 11458 11459 SDValue BaseLoc = Base->getBasePtr(); 11460 if (Loc.getOpcode() == ISD::FrameIndex) { 11461 if (BaseLoc.getOpcode() != ISD::FrameIndex) 11462 return false; 11463 const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 11464 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 11465 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 11466 int FS = MFI.getObjectSize(FI); 11467 int BFS = MFI.getObjectSize(BFI); 11468 if (FS != BFS || FS != (int)Bytes) return false; 11469 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes); 11470 } 11471 11472 SDValue Base1 = Loc, Base2 = BaseLoc; 11473 int64_t Offset1 = 0, Offset2 = 0; 11474 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG); 11475 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG); 11476 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes)) 11477 return true; 11478 11479 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 11480 const GlobalValue *GV1 = nullptr; 11481 const GlobalValue *GV2 = nullptr; 11482 Offset1 = 0; 11483 Offset2 = 0; 11484 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); 11485 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); 11486 if (isGA1 && isGA2 && GV1 == GV2) 11487 return Offset1 == (Offset2 + Dist*Bytes); 11488 return false; 11489 } 11490 11491 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does 11492 // not enforce equality of the chain operands. 11493 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, 11494 unsigned Bytes, int Dist, 11495 SelectionDAG &DAG) { 11496 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) { 11497 EVT VT = LS->getMemoryVT(); 11498 SDValue Loc = LS->getBasePtr(); 11499 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); 11500 } 11501 11502 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 11503 EVT VT; 11504 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 11505 default: return false; 11506 case Intrinsic::ppc_qpx_qvlfd: 11507 case Intrinsic::ppc_qpx_qvlfda: 11508 VT = MVT::v4f64; 11509 break; 11510 case Intrinsic::ppc_qpx_qvlfs: 11511 case Intrinsic::ppc_qpx_qvlfsa: 11512 VT = MVT::v4f32; 11513 break; 11514 case Intrinsic::ppc_qpx_qvlfcd: 11515 case Intrinsic::ppc_qpx_qvlfcda: 11516 VT = MVT::v2f64; 11517 break; 11518 case Intrinsic::ppc_qpx_qvlfcs: 11519 case Intrinsic::ppc_qpx_qvlfcsa: 11520 VT = MVT::v2f32; 11521 break; 11522 case Intrinsic::ppc_qpx_qvlfiwa: 11523 case Intrinsic::ppc_qpx_qvlfiwz: 11524 case Intrinsic::ppc_altivec_lvx: 11525 case Intrinsic::ppc_altivec_lvxl: 11526 case Intrinsic::ppc_vsx_lxvw4x: 11527 case Intrinsic::ppc_vsx_lxvw4x_be: 11528 VT = MVT::v4i32; 11529 break; 11530 case Intrinsic::ppc_vsx_lxvd2x: 11531 case Intrinsic::ppc_vsx_lxvd2x_be: 11532 VT = MVT::v2f64; 11533 break; 11534 case Intrinsic::ppc_altivec_lvebx: 11535 VT = MVT::i8; 11536 break; 11537 case Intrinsic::ppc_altivec_lvehx: 11538 VT = MVT::i16; 11539 break; 11540 case Intrinsic::ppc_altivec_lvewx: 11541 VT = MVT::i32; 11542 break; 11543 } 11544 11545 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); 11546 } 11547 11548 if (N->getOpcode() == ISD::INTRINSIC_VOID) { 11549 EVT VT; 11550 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 11551 default: return false; 11552 case Intrinsic::ppc_qpx_qvstfd: 11553 case Intrinsic::ppc_qpx_qvstfda: 11554 VT = MVT::v4f64; 11555 break; 11556 case Intrinsic::ppc_qpx_qvstfs: 11557 case Intrinsic::ppc_qpx_qvstfsa: 11558 VT = MVT::v4f32; 11559 break; 11560 case Intrinsic::ppc_qpx_qvstfcd: 11561 case Intrinsic::ppc_qpx_qvstfcda: 11562 VT = MVT::v2f64; 11563 break; 11564 case Intrinsic::ppc_qpx_qvstfcs: 11565 case Intrinsic::ppc_qpx_qvstfcsa: 11566 VT = MVT::v2f32; 11567 break; 11568 case Intrinsic::ppc_qpx_qvstfiw: 11569 case Intrinsic::ppc_qpx_qvstfiwa: 11570 case Intrinsic::ppc_altivec_stvx: 11571 case Intrinsic::ppc_altivec_stvxl: 11572 case Intrinsic::ppc_vsx_stxvw4x: 11573 VT = MVT::v4i32; 11574 break; 11575 case Intrinsic::ppc_vsx_stxvd2x: 11576 VT = MVT::v2f64; 11577 break; 11578 case Intrinsic::ppc_vsx_stxvw4x_be: 11579 VT = MVT::v4i32; 11580 break; 11581 case Intrinsic::ppc_vsx_stxvd2x_be: 11582 VT = MVT::v2f64; 11583 break; 11584 case Intrinsic::ppc_altivec_stvebx: 11585 VT = MVT::i8; 11586 break; 11587 case Intrinsic::ppc_altivec_stvehx: 11588 VT = MVT::i16; 11589 break; 11590 case Intrinsic::ppc_altivec_stvewx: 11591 VT = MVT::i32; 11592 break; 11593 } 11594 11595 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); 11596 } 11597 11598 return false; 11599 } 11600 11601 // Return true is there is a nearyby consecutive load to the one provided 11602 // (regardless of alignment). We search up and down the chain, looking though 11603 // token factors and other loads (but nothing else). As a result, a true result 11604 // indicates that it is safe to create a new consecutive load adjacent to the 11605 // load provided. 11606 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { 11607 SDValue Chain = LD->getChain(); 11608 EVT VT = LD->getMemoryVT(); 11609 11610 SmallSet<SDNode *, 16> LoadRoots; 11611 SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); 11612 SmallSet<SDNode *, 16> Visited; 11613 11614 // First, search up the chain, branching to follow all token-factor operands. 11615 // If we find a consecutive load, then we're done, otherwise, record all 11616 // nodes just above the top-level loads and token factors. 11617 while (!Queue.empty()) { 11618 SDNode *ChainNext = Queue.pop_back_val(); 11619 if (!Visited.insert(ChainNext).second) 11620 continue; 11621 11622 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) { 11623 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 11624 return true; 11625 11626 if (!Visited.count(ChainLD->getChain().getNode())) 11627 Queue.push_back(ChainLD->getChain().getNode()); 11628 } else if (ChainNext->getOpcode() == ISD::TokenFactor) { 11629 for (const SDUse &O : ChainNext->ops()) 11630 if (!Visited.count(O.getNode())) 11631 Queue.push_back(O.getNode()); 11632 } else 11633 LoadRoots.insert(ChainNext); 11634 } 11635 11636 // Second, search down the chain, starting from the top-level nodes recorded 11637 // in the first phase. These top-level nodes are the nodes just above all 11638 // loads and token factors. Starting with their uses, recursively look though 11639 // all loads (just the chain uses) and token factors to find a consecutive 11640 // load. 11641 Visited.clear(); 11642 Queue.clear(); 11643 11644 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), 11645 IE = LoadRoots.end(); I != IE; ++I) { 11646 Queue.push_back(*I); 11647 11648 while (!Queue.empty()) { 11649 SDNode *LoadRoot = Queue.pop_back_val(); 11650 if (!Visited.insert(LoadRoot).second) 11651 continue; 11652 11653 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot)) 11654 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 11655 return true; 11656 11657 for (SDNode::use_iterator UI = LoadRoot->use_begin(), 11658 UE = LoadRoot->use_end(); UI != UE; ++UI) 11659 if (((isa<MemSDNode>(*UI) && 11660 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) || 11661 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) 11662 Queue.push_back(*UI); 11663 } 11664 } 11665 11666 return false; 11667 } 11668 11669 /// This function is called when we have proved that a SETCC node can be replaced 11670 /// by subtraction (and other supporting instructions) so that the result of 11671 /// comparison is kept in a GPR instead of CR. This function is purely for 11672 /// codegen purposes and has some flags to guide the codegen process. 11673 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, 11674 bool Swap, SDLoc &DL, SelectionDAG &DAG) { 11675 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); 11676 11677 // Zero extend the operands to the largest legal integer. Originally, they 11678 // must be of a strictly smaller size. 11679 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0), 11680 DAG.getConstant(Size, DL, MVT::i32)); 11681 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1), 11682 DAG.getConstant(Size, DL, MVT::i32)); 11683 11684 // Swap if needed. Depends on the condition code. 11685 if (Swap) 11686 std::swap(Op0, Op1); 11687 11688 // Subtract extended integers. 11689 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1); 11690 11691 // Move the sign bit to the least significant position and zero out the rest. 11692 // Now the least significant bit carries the result of original comparison. 11693 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode, 11694 DAG.getConstant(Size - 1, DL, MVT::i32)); 11695 auto Final = Shifted; 11696 11697 // Complement the result if needed. Based on the condition code. 11698 if (Complement) 11699 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted, 11700 DAG.getConstant(1, DL, MVT::i64)); 11701 11702 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final); 11703 } 11704 11705 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N, 11706 DAGCombinerInfo &DCI) const { 11707 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); 11708 11709 SelectionDAG &DAG = DCI.DAG; 11710 SDLoc DL(N); 11711 11712 // Size of integers being compared has a critical role in the following 11713 // analysis, so we prefer to do this when all types are legal. 11714 if (!DCI.isAfterLegalizeDAG()) 11715 return SDValue(); 11716 11717 // If all users of SETCC extend its value to a legal integer type 11718 // then we replace SETCC with a subtraction 11719 for (SDNode::use_iterator UI = N->use_begin(), 11720 UE = N->use_end(); UI != UE; ++UI) { 11721 if (UI->getOpcode() != ISD::ZERO_EXTEND) 11722 return SDValue(); 11723 } 11724 11725 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 11726 auto OpSize = N->getOperand(0).getValueSizeInBits(); 11727 11728 unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits(); 11729 11730 if (OpSize < Size) { 11731 switch (CC) { 11732 default: break; 11733 case ISD::SETULT: 11734 return generateEquivalentSub(N, Size, false, false, DL, DAG); 11735 case ISD::SETULE: 11736 return generateEquivalentSub(N, Size, true, true, DL, DAG); 11737 case ISD::SETUGT: 11738 return generateEquivalentSub(N, Size, false, true, DL, DAG); 11739 case ISD::SETUGE: 11740 return generateEquivalentSub(N, Size, true, false, DL, DAG); 11741 } 11742 } 11743 11744 return SDValue(); 11745 } 11746 11747 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, 11748 DAGCombinerInfo &DCI) const { 11749 SelectionDAG &DAG = DCI.DAG; 11750 SDLoc dl(N); 11751 11752 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits"); 11753 // If we're tracking CR bits, we need to be careful that we don't have: 11754 // trunc(binary-ops(zext(x), zext(y))) 11755 // or 11756 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) 11757 // such that we're unnecessarily moving things into GPRs when it would be 11758 // better to keep them in CR bits. 11759 11760 // Note that trunc here can be an actual i1 trunc, or can be the effective 11761 // truncation that comes from a setcc or select_cc. 11762 if (N->getOpcode() == ISD::TRUNCATE && 11763 N->getValueType(0) != MVT::i1) 11764 return SDValue(); 11765 11766 if (N->getOperand(0).getValueType() != MVT::i32 && 11767 N->getOperand(0).getValueType() != MVT::i64) 11768 return SDValue(); 11769 11770 if (N->getOpcode() == ISD::SETCC || 11771 N->getOpcode() == ISD::SELECT_CC) { 11772 // If we're looking at a comparison, then we need to make sure that the 11773 // high bits (all except for the first) don't matter the result. 11774 ISD::CondCode CC = 11775 cast<CondCodeSDNode>(N->getOperand( 11776 N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); 11777 unsigned OpBits = N->getOperand(0).getValueSizeInBits(); 11778 11779 if (ISD::isSignedIntSetCC(CC)) { 11780 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || 11781 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) 11782 return SDValue(); 11783 } else if (ISD::isUnsignedIntSetCC(CC)) { 11784 if (!DAG.MaskedValueIsZero(N->getOperand(0), 11785 APInt::getHighBitsSet(OpBits, OpBits-1)) || 11786 !DAG.MaskedValueIsZero(N->getOperand(1), 11787 APInt::getHighBitsSet(OpBits, OpBits-1))) 11788 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI) 11789 : SDValue()); 11790 } else { 11791 // This is neither a signed nor an unsigned comparison, just make sure 11792 // that the high bits are equal. 11793 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0)); 11794 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1)); 11795 11796 // We don't really care about what is known about the first bit (if 11797 // anything), so clear it in all masks prior to comparing them. 11798 Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0); 11799 Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0); 11800 11801 if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One) 11802 return SDValue(); 11803 } 11804 } 11805 11806 // We now know that the higher-order bits are irrelevant, we just need to 11807 // make sure that all of the intermediate operations are bit operations, and 11808 // all inputs are extensions. 11809 if (N->getOperand(0).getOpcode() != ISD::AND && 11810 N->getOperand(0).getOpcode() != ISD::OR && 11811 N->getOperand(0).getOpcode() != ISD::XOR && 11812 N->getOperand(0).getOpcode() != ISD::SELECT && 11813 N->getOperand(0).getOpcode() != ISD::SELECT_CC && 11814 N->getOperand(0).getOpcode() != ISD::TRUNCATE && 11815 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && 11816 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && 11817 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) 11818 return SDValue(); 11819 11820 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && 11821 N->getOperand(1).getOpcode() != ISD::AND && 11822 N->getOperand(1).getOpcode() != ISD::OR && 11823 N->getOperand(1).getOpcode() != ISD::XOR && 11824 N->getOperand(1).getOpcode() != ISD::SELECT && 11825 N->getOperand(1).getOpcode() != ISD::SELECT_CC && 11826 N->getOperand(1).getOpcode() != ISD::TRUNCATE && 11827 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && 11828 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && 11829 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) 11830 return SDValue(); 11831 11832 SmallVector<SDValue, 4> Inputs; 11833 SmallVector<SDValue, 8> BinOps, PromOps; 11834 SmallPtrSet<SDNode *, 16> Visited; 11835 11836 for (unsigned i = 0; i < 2; ++i) { 11837 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 11838 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 11839 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 11840 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || 11841 isa<ConstantSDNode>(N->getOperand(i))) 11842 Inputs.push_back(N->getOperand(i)); 11843 else 11844 BinOps.push_back(N->getOperand(i)); 11845 11846 if (N->getOpcode() == ISD::TRUNCATE) 11847 break; 11848 } 11849 11850 // Visit all inputs, collect all binary operations (and, or, xor and 11851 // select) that are all fed by extensions. 11852 while (!BinOps.empty()) { 11853 SDValue BinOp = BinOps.back(); 11854 BinOps.pop_back(); 11855 11856 if (!Visited.insert(BinOp.getNode()).second) 11857 continue; 11858 11859 PromOps.push_back(BinOp); 11860 11861 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 11862 // The condition of the select is not promoted. 11863 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 11864 continue; 11865 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 11866 continue; 11867 11868 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 11869 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 11870 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 11871 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || 11872 isa<ConstantSDNode>(BinOp.getOperand(i))) { 11873 Inputs.push_back(BinOp.getOperand(i)); 11874 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 11875 BinOp.getOperand(i).getOpcode() == ISD::OR || 11876 BinOp.getOperand(i).getOpcode() == ISD::XOR || 11877 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 11878 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || 11879 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 11880 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 11881 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 11882 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { 11883 BinOps.push_back(BinOp.getOperand(i)); 11884 } else { 11885 // We have an input that is not an extension or another binary 11886 // operation; we'll abort this transformation. 11887 return SDValue(); 11888 } 11889 } 11890 } 11891 11892 // Make sure that this is a self-contained cluster of operations (which 11893 // is not quite the same thing as saying that everything has only one 11894 // use). 11895 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 11896 if (isa<ConstantSDNode>(Inputs[i])) 11897 continue; 11898 11899 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 11900 UE = Inputs[i].getNode()->use_end(); 11901 UI != UE; ++UI) { 11902 SDNode *User = *UI; 11903 if (User != N && !Visited.count(User)) 11904 return SDValue(); 11905 11906 // Make sure that we're not going to promote the non-output-value 11907 // operand(s) or SELECT or SELECT_CC. 11908 // FIXME: Although we could sometimes handle this, and it does occur in 11909 // practice that one of the condition inputs to the select is also one of 11910 // the outputs, we currently can't deal with this. 11911 if (User->getOpcode() == ISD::SELECT) { 11912 if (User->getOperand(0) == Inputs[i]) 11913 return SDValue(); 11914 } else if (User->getOpcode() == ISD::SELECT_CC) { 11915 if (User->getOperand(0) == Inputs[i] || 11916 User->getOperand(1) == Inputs[i]) 11917 return SDValue(); 11918 } 11919 } 11920 } 11921 11922 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 11923 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 11924 UE = PromOps[i].getNode()->use_end(); 11925 UI != UE; ++UI) { 11926 SDNode *User = *UI; 11927 if (User != N && !Visited.count(User)) 11928 return SDValue(); 11929 11930 // Make sure that we're not going to promote the non-output-value 11931 // operand(s) or SELECT or SELECT_CC. 11932 // FIXME: Although we could sometimes handle this, and it does occur in 11933 // practice that one of the condition inputs to the select is also one of 11934 // the outputs, we currently can't deal with this. 11935 if (User->getOpcode() == ISD::SELECT) { 11936 if (User->getOperand(0) == PromOps[i]) 11937 return SDValue(); 11938 } else if (User->getOpcode() == ISD::SELECT_CC) { 11939 if (User->getOperand(0) == PromOps[i] || 11940 User->getOperand(1) == PromOps[i]) 11941 return SDValue(); 11942 } 11943 } 11944 } 11945 11946 // Replace all inputs with the extension operand. 11947 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 11948 // Constants may have users outside the cluster of to-be-promoted nodes, 11949 // and so we need to replace those as we do the promotions. 11950 if (isa<ConstantSDNode>(Inputs[i])) 11951 continue; 11952 else 11953 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); 11954 } 11955 11956 std::list<HandleSDNode> PromOpHandles; 11957 for (auto &PromOp : PromOps) 11958 PromOpHandles.emplace_back(PromOp); 11959 11960 // Replace all operations (these are all the same, but have a different 11961 // (i1) return type). DAG.getNode will validate that the types of 11962 // a binary operator match, so go through the list in reverse so that 11963 // we've likely promoted both operands first. Any intermediate truncations or 11964 // extensions disappear. 11965 while (!PromOpHandles.empty()) { 11966 SDValue PromOp = PromOpHandles.back().getValue(); 11967 PromOpHandles.pop_back(); 11968 11969 if (PromOp.getOpcode() == ISD::TRUNCATE || 11970 PromOp.getOpcode() == ISD::SIGN_EXTEND || 11971 PromOp.getOpcode() == ISD::ZERO_EXTEND || 11972 PromOp.getOpcode() == ISD::ANY_EXTEND) { 11973 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) && 11974 PromOp.getOperand(0).getValueType() != MVT::i1) { 11975 // The operand is not yet ready (see comment below). 11976 PromOpHandles.emplace_front(PromOp); 11977 continue; 11978 } 11979 11980 SDValue RepValue = PromOp.getOperand(0); 11981 if (isa<ConstantSDNode>(RepValue)) 11982 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); 11983 11984 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); 11985 continue; 11986 } 11987 11988 unsigned C; 11989 switch (PromOp.getOpcode()) { 11990 default: C = 0; break; 11991 case ISD::SELECT: C = 1; break; 11992 case ISD::SELECT_CC: C = 2; break; 11993 } 11994 11995 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 11996 PromOp.getOperand(C).getValueType() != MVT::i1) || 11997 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 11998 PromOp.getOperand(C+1).getValueType() != MVT::i1)) { 11999 // The to-be-promoted operands of this node have not yet been 12000 // promoted (this should be rare because we're going through the 12001 // list backward, but if one of the operands has several users in 12002 // this cluster of to-be-promoted nodes, it is possible). 12003 PromOpHandles.emplace_front(PromOp); 12004 continue; 12005 } 12006 12007 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 12008 PromOp.getNode()->op_end()); 12009 12010 // If there are any constant inputs, make sure they're replaced now. 12011 for (unsigned i = 0; i < 2; ++i) 12012 if (isa<ConstantSDNode>(Ops[C+i])) 12013 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); 12014 12015 DAG.ReplaceAllUsesOfValueWith(PromOp, 12016 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); 12017 } 12018 12019 // Now we're left with the initial truncation itself. 12020 if (N->getOpcode() == ISD::TRUNCATE) 12021 return N->getOperand(0); 12022 12023 // Otherwise, this is a comparison. The operands to be compared have just 12024 // changed type (to i1), but everything else is the same. 12025 return SDValue(N, 0); 12026 } 12027 12028 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, 12029 DAGCombinerInfo &DCI) const { 12030 SelectionDAG &DAG = DCI.DAG; 12031 SDLoc dl(N); 12032 12033 // If we're tracking CR bits, we need to be careful that we don't have: 12034 // zext(binary-ops(trunc(x), trunc(y))) 12035 // or 12036 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) 12037 // such that we're unnecessarily moving things into CR bits that can more 12038 // efficiently stay in GPRs. Note that if we're not certain that the high 12039 // bits are set as required by the final extension, we still may need to do 12040 // some masking to get the proper behavior. 12041 12042 // This same functionality is important on PPC64 when dealing with 12043 // 32-to-64-bit extensions; these occur often when 32-bit values are used as 12044 // the return values of functions. Because it is so similar, it is handled 12045 // here as well. 12046 12047 if (N->getValueType(0) != MVT::i32 && 12048 N->getValueType(0) != MVT::i64) 12049 return SDValue(); 12050 12051 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) || 12052 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64()))) 12053 return SDValue(); 12054 12055 if (N->getOperand(0).getOpcode() != ISD::AND && 12056 N->getOperand(0).getOpcode() != ISD::OR && 12057 N->getOperand(0).getOpcode() != ISD::XOR && 12058 N->getOperand(0).getOpcode() != ISD::SELECT && 12059 N->getOperand(0).getOpcode() != ISD::SELECT_CC) 12060 return SDValue(); 12061 12062 SmallVector<SDValue, 4> Inputs; 12063 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps; 12064 SmallPtrSet<SDNode *, 16> Visited; 12065 12066 // Visit all inputs, collect all binary operations (and, or, xor and 12067 // select) that are all fed by truncations. 12068 while (!BinOps.empty()) { 12069 SDValue BinOp = BinOps.back(); 12070 BinOps.pop_back(); 12071 12072 if (!Visited.insert(BinOp.getNode()).second) 12073 continue; 12074 12075 PromOps.push_back(BinOp); 12076 12077 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 12078 // The condition of the select is not promoted. 12079 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 12080 continue; 12081 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 12082 continue; 12083 12084 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 12085 isa<ConstantSDNode>(BinOp.getOperand(i))) { 12086 Inputs.push_back(BinOp.getOperand(i)); 12087 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 12088 BinOp.getOperand(i).getOpcode() == ISD::OR || 12089 BinOp.getOperand(i).getOpcode() == ISD::XOR || 12090 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 12091 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { 12092 BinOps.push_back(BinOp.getOperand(i)); 12093 } else { 12094 // We have an input that is not a truncation or another binary 12095 // operation; we'll abort this transformation. 12096 return SDValue(); 12097 } 12098 } 12099 } 12100 12101 // The operands of a select that must be truncated when the select is 12102 // promoted because the operand is actually part of the to-be-promoted set. 12103 DenseMap<SDNode *, EVT> SelectTruncOp[2]; 12104 12105 // Make sure that this is a self-contained cluster of operations (which 12106 // is not quite the same thing as saying that everything has only one 12107 // use). 12108 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 12109 if (isa<ConstantSDNode>(Inputs[i])) 12110 continue; 12111 12112 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 12113 UE = Inputs[i].getNode()->use_end(); 12114 UI != UE; ++UI) { 12115 SDNode *User = *UI; 12116 if (User != N && !Visited.count(User)) 12117 return SDValue(); 12118 12119 // If we're going to promote the non-output-value operand(s) or SELECT or 12120 // SELECT_CC, record them for truncation. 12121 if (User->getOpcode() == ISD::SELECT) { 12122 if (User->getOperand(0) == Inputs[i]) 12123 SelectTruncOp[0].insert(std::make_pair(User, 12124 User->getOperand(0).getValueType())); 12125 } else if (User->getOpcode() == ISD::SELECT_CC) { 12126 if (User->getOperand(0) == Inputs[i]) 12127 SelectTruncOp[0].insert(std::make_pair(User, 12128 User->getOperand(0).getValueType())); 12129 if (User->getOperand(1) == Inputs[i]) 12130 SelectTruncOp[1].insert(std::make_pair(User, 12131 User->getOperand(1).getValueType())); 12132 } 12133 } 12134 } 12135 12136 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 12137 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 12138 UE = PromOps[i].getNode()->use_end(); 12139 UI != UE; ++UI) { 12140 SDNode *User = *UI; 12141 if (User != N && !Visited.count(User)) 12142 return SDValue(); 12143 12144 // If we're going to promote the non-output-value operand(s) or SELECT or 12145 // SELECT_CC, record them for truncation. 12146 if (User->getOpcode() == ISD::SELECT) { 12147 if (User->getOperand(0) == PromOps[i]) 12148 SelectTruncOp[0].insert(std::make_pair(User, 12149 User->getOperand(0).getValueType())); 12150 } else if (User->getOpcode() == ISD::SELECT_CC) { 12151 if (User->getOperand(0) == PromOps[i]) 12152 SelectTruncOp[0].insert(std::make_pair(User, 12153 User->getOperand(0).getValueType())); 12154 if (User->getOperand(1) == PromOps[i]) 12155 SelectTruncOp[1].insert(std::make_pair(User, 12156 User->getOperand(1).getValueType())); 12157 } 12158 } 12159 } 12160 12161 unsigned PromBits = N->getOperand(0).getValueSizeInBits(); 12162 bool ReallyNeedsExt = false; 12163 if (N->getOpcode() != ISD::ANY_EXTEND) { 12164 // If all of the inputs are not already sign/zero extended, then 12165 // we'll still need to do that at the end. 12166 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 12167 if (isa<ConstantSDNode>(Inputs[i])) 12168 continue; 12169 12170 unsigned OpBits = 12171 Inputs[i].getOperand(0).getValueSizeInBits(); 12172 assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); 12173 12174 if ((N->getOpcode() == ISD::ZERO_EXTEND && 12175 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), 12176 APInt::getHighBitsSet(OpBits, 12177 OpBits-PromBits))) || 12178 (N->getOpcode() == ISD::SIGN_EXTEND && 12179 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < 12180 (OpBits-(PromBits-1)))) { 12181 ReallyNeedsExt = true; 12182 break; 12183 } 12184 } 12185 } 12186 12187 // Replace all inputs, either with the truncation operand, or a 12188 // truncation or extension to the final output type. 12189 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 12190 // Constant inputs need to be replaced with the to-be-promoted nodes that 12191 // use them because they might have users outside of the cluster of 12192 // promoted nodes. 12193 if (isa<ConstantSDNode>(Inputs[i])) 12194 continue; 12195 12196 SDValue InSrc = Inputs[i].getOperand(0); 12197 if (Inputs[i].getValueType() == N->getValueType(0)) 12198 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); 12199 else if (N->getOpcode() == ISD::SIGN_EXTEND) 12200 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 12201 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); 12202 else if (N->getOpcode() == ISD::ZERO_EXTEND) 12203 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 12204 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); 12205 else 12206 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 12207 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); 12208 } 12209 12210 std::list<HandleSDNode> PromOpHandles; 12211 for (auto &PromOp : PromOps) 12212 PromOpHandles.emplace_back(PromOp); 12213 12214 // Replace all operations (these are all the same, but have a different 12215 // (promoted) return type). DAG.getNode will validate that the types of 12216 // a binary operator match, so go through the list in reverse so that 12217 // we've likely promoted both operands first. 12218 while (!PromOpHandles.empty()) { 12219 SDValue PromOp = PromOpHandles.back().getValue(); 12220 PromOpHandles.pop_back(); 12221 12222 unsigned C; 12223 switch (PromOp.getOpcode()) { 12224 default: C = 0; break; 12225 case ISD::SELECT: C = 1; break; 12226 case ISD::SELECT_CC: C = 2; break; 12227 } 12228 12229 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 12230 PromOp.getOperand(C).getValueType() != N->getValueType(0)) || 12231 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 12232 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { 12233 // The to-be-promoted operands of this node have not yet been 12234 // promoted (this should be rare because we're going through the 12235 // list backward, but if one of the operands has several users in 12236 // this cluster of to-be-promoted nodes, it is possible). 12237 PromOpHandles.emplace_front(PromOp); 12238 continue; 12239 } 12240 12241 // For SELECT and SELECT_CC nodes, we do a similar check for any 12242 // to-be-promoted comparison inputs. 12243 if (PromOp.getOpcode() == ISD::SELECT || 12244 PromOp.getOpcode() == ISD::SELECT_CC) { 12245 if ((SelectTruncOp[0].count(PromOp.getNode()) && 12246 PromOp.getOperand(0).getValueType() != N->getValueType(0)) || 12247 (SelectTruncOp[1].count(PromOp.getNode()) && 12248 PromOp.getOperand(1).getValueType() != N->getValueType(0))) { 12249 PromOpHandles.emplace_front(PromOp); 12250 continue; 12251 } 12252 } 12253 12254 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 12255 PromOp.getNode()->op_end()); 12256 12257 // If this node has constant inputs, then they'll need to be promoted here. 12258 for (unsigned i = 0; i < 2; ++i) { 12259 if (!isa<ConstantSDNode>(Ops[C+i])) 12260 continue; 12261 if (Ops[C+i].getValueType() == N->getValueType(0)) 12262 continue; 12263 12264 if (N->getOpcode() == ISD::SIGN_EXTEND) 12265 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 12266 else if (N->getOpcode() == ISD::ZERO_EXTEND) 12267 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 12268 else 12269 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 12270 } 12271 12272 // If we've promoted the comparison inputs of a SELECT or SELECT_CC, 12273 // truncate them again to the original value type. 12274 if (PromOp.getOpcode() == ISD::SELECT || 12275 PromOp.getOpcode() == ISD::SELECT_CC) { 12276 auto SI0 = SelectTruncOp[0].find(PromOp.getNode()); 12277 if (SI0 != SelectTruncOp[0].end()) 12278 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]); 12279 auto SI1 = SelectTruncOp[1].find(PromOp.getNode()); 12280 if (SI1 != SelectTruncOp[1].end()) 12281 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]); 12282 } 12283 12284 DAG.ReplaceAllUsesOfValueWith(PromOp, 12285 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); 12286 } 12287 12288 // Now we're left with the initial extension itself. 12289 if (!ReallyNeedsExt) 12290 return N->getOperand(0); 12291 12292 // To zero extend, just mask off everything except for the first bit (in the 12293 // i1 case). 12294 if (N->getOpcode() == ISD::ZERO_EXTEND) 12295 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), 12296 DAG.getConstant(APInt::getLowBitsSet( 12297 N->getValueSizeInBits(0), PromBits), 12298 dl, N->getValueType(0))); 12299 12300 assert(N->getOpcode() == ISD::SIGN_EXTEND && 12301 "Invalid extension type"); 12302 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout()); 12303 SDValue ShiftCst = 12304 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); 12305 return DAG.getNode( 12306 ISD::SRA, dl, N->getValueType(0), 12307 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst), 12308 ShiftCst); 12309 } 12310 12311 SDValue PPCTargetLowering::combineSetCC(SDNode *N, 12312 DAGCombinerInfo &DCI) const { 12313 assert(N->getOpcode() == ISD::SETCC && 12314 "Should be called with a SETCC node"); 12315 12316 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 12317 if (CC == ISD::SETNE || CC == ISD::SETEQ) { 12318 SDValue LHS = N->getOperand(0); 12319 SDValue RHS = N->getOperand(1); 12320 12321 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS. 12322 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) && 12323 LHS.hasOneUse()) 12324 std::swap(LHS, RHS); 12325 12326 // x == 0-y --> x+y == 0 12327 // x != 0-y --> x+y != 0 12328 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) && 12329 RHS.hasOneUse()) { 12330 SDLoc DL(N); 12331 SelectionDAG &DAG = DCI.DAG; 12332 EVT VT = N->getValueType(0); 12333 EVT OpVT = LHS.getValueType(); 12334 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1)); 12335 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC); 12336 } 12337 } 12338 12339 return DAGCombineTruncBoolExt(N, DCI); 12340 } 12341 12342 // Is this an extending load from an f32 to an f64? 12343 static bool isFPExtLoad(SDValue Op) { 12344 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode())) 12345 return LD->getExtensionType() == ISD::EXTLOAD && 12346 Op.getValueType() == MVT::f64; 12347 return false; 12348 } 12349 12350 /// Reduces the number of fp-to-int conversion when building a vector. 12351 /// 12352 /// If this vector is built out of floating to integer conversions, 12353 /// transform it to a vector built out of floating point values followed by a 12354 /// single floating to integer conversion of the vector. 12355 /// Namely (build_vector (fptosi $A), (fptosi $B), ...) 12356 /// becomes (fptosi (build_vector ($A, $B, ...))) 12357 SDValue PPCTargetLowering:: 12358 combineElementTruncationToVectorTruncation(SDNode *N, 12359 DAGCombinerInfo &DCI) const { 12360 assert(N->getOpcode() == ISD::BUILD_VECTOR && 12361 "Should be called with a BUILD_VECTOR node"); 12362 12363 SelectionDAG &DAG = DCI.DAG; 12364 SDLoc dl(N); 12365 12366 SDValue FirstInput = N->getOperand(0); 12367 assert(FirstInput.getOpcode() == PPCISD::MFVSR && 12368 "The input operand must be an fp-to-int conversion."); 12369 12370 // This combine happens after legalization so the fp_to_[su]i nodes are 12371 // already converted to PPCSISD nodes. 12372 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode(); 12373 if (FirstConversion == PPCISD::FCTIDZ || 12374 FirstConversion == PPCISD::FCTIDUZ || 12375 FirstConversion == PPCISD::FCTIWZ || 12376 FirstConversion == PPCISD::FCTIWUZ) { 12377 bool IsSplat = true; 12378 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ || 12379 FirstConversion == PPCISD::FCTIWUZ; 12380 EVT SrcVT = FirstInput.getOperand(0).getValueType(); 12381 SmallVector<SDValue, 4> Ops; 12382 EVT TargetVT = N->getValueType(0); 12383 for (int i = 0, e = N->getNumOperands(); i < e; ++i) { 12384 SDValue NextOp = N->getOperand(i); 12385 if (NextOp.getOpcode() != PPCISD::MFVSR) 12386 return SDValue(); 12387 unsigned NextConversion = NextOp.getOperand(0).getOpcode(); 12388 if (NextConversion != FirstConversion) 12389 return SDValue(); 12390 // If we are converting to 32-bit integers, we need to add an FP_ROUND. 12391 // This is not valid if the input was originally double precision. It is 12392 // also not profitable to do unless this is an extending load in which 12393 // case doing this combine will allow us to combine consecutive loads. 12394 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0))) 12395 return SDValue(); 12396 if (N->getOperand(i) != FirstInput) 12397 IsSplat = false; 12398 } 12399 12400 // If this is a splat, we leave it as-is since there will be only a single 12401 // fp-to-int conversion followed by a splat of the integer. This is better 12402 // for 32-bit and smaller ints and neutral for 64-bit ints. 12403 if (IsSplat) 12404 return SDValue(); 12405 12406 // Now that we know we have the right type of node, get its operands 12407 for (int i = 0, e = N->getNumOperands(); i < e; ++i) { 12408 SDValue In = N->getOperand(i).getOperand(0); 12409 if (Is32Bit) { 12410 // For 32-bit values, we need to add an FP_ROUND node (if we made it 12411 // here, we know that all inputs are extending loads so this is safe). 12412 if (In.isUndef()) 12413 Ops.push_back(DAG.getUNDEF(SrcVT)); 12414 else { 12415 SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl, 12416 MVT::f32, In.getOperand(0), 12417 DAG.getIntPtrConstant(1, dl)); 12418 Ops.push_back(Trunc); 12419 } 12420 } else 12421 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0)); 12422 } 12423 12424 unsigned Opcode; 12425 if (FirstConversion == PPCISD::FCTIDZ || 12426 FirstConversion == PPCISD::FCTIWZ) 12427 Opcode = ISD::FP_TO_SINT; 12428 else 12429 Opcode = ISD::FP_TO_UINT; 12430 12431 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32; 12432 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops); 12433 return DAG.getNode(Opcode, dl, TargetVT, BV); 12434 } 12435 return SDValue(); 12436 } 12437 12438 /// Reduce the number of loads when building a vector. 12439 /// 12440 /// Building a vector out of multiple loads can be converted to a load 12441 /// of the vector type if the loads are consecutive. If the loads are 12442 /// consecutive but in descending order, a shuffle is added at the end 12443 /// to reorder the vector. 12444 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) { 12445 assert(N->getOpcode() == ISD::BUILD_VECTOR && 12446 "Should be called with a BUILD_VECTOR node"); 12447 12448 SDLoc dl(N); 12449 12450 // Return early for non byte-sized type, as they can't be consecutive. 12451 if (!N->getValueType(0).getVectorElementType().isByteSized()) 12452 return SDValue(); 12453 12454 bool InputsAreConsecutiveLoads = true; 12455 bool InputsAreReverseConsecutive = true; 12456 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize(); 12457 SDValue FirstInput = N->getOperand(0); 12458 bool IsRoundOfExtLoad = false; 12459 12460 if (FirstInput.getOpcode() == ISD::FP_ROUND && 12461 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) { 12462 LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0)); 12463 IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD; 12464 } 12465 // Not a build vector of (possibly fp_rounded) loads. 12466 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) || 12467 N->getNumOperands() == 1) 12468 return SDValue(); 12469 12470 for (int i = 1, e = N->getNumOperands(); i < e; ++i) { 12471 // If any inputs are fp_round(extload), they all must be. 12472 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND) 12473 return SDValue(); 12474 12475 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) : 12476 N->getOperand(i); 12477 if (NextInput.getOpcode() != ISD::LOAD) 12478 return SDValue(); 12479 12480 SDValue PreviousInput = 12481 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1); 12482 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput); 12483 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput); 12484 12485 // If any inputs are fp_round(extload), they all must be. 12486 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD) 12487 return SDValue(); 12488 12489 if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG)) 12490 InputsAreConsecutiveLoads = false; 12491 if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG)) 12492 InputsAreReverseConsecutive = false; 12493 12494 // Exit early if the loads are neither consecutive nor reverse consecutive. 12495 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive) 12496 return SDValue(); 12497 } 12498 12499 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && 12500 "The loads cannot be both consecutive and reverse consecutive."); 12501 12502 SDValue FirstLoadOp = 12503 IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput; 12504 SDValue LastLoadOp = 12505 IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) : 12506 N->getOperand(N->getNumOperands()-1); 12507 12508 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp); 12509 LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp); 12510 if (InputsAreConsecutiveLoads) { 12511 assert(LD1 && "Input needs to be a LoadSDNode."); 12512 return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(), 12513 LD1->getBasePtr(), LD1->getPointerInfo(), 12514 LD1->getAlignment()); 12515 } 12516 if (InputsAreReverseConsecutive) { 12517 assert(LDL && "Input needs to be a LoadSDNode."); 12518 SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), 12519 LDL->getBasePtr(), LDL->getPointerInfo(), 12520 LDL->getAlignment()); 12521 SmallVector<int, 16> Ops; 12522 for (int i = N->getNumOperands() - 1; i >= 0; i--) 12523 Ops.push_back(i); 12524 12525 return DAG.getVectorShuffle(N->getValueType(0), dl, Load, 12526 DAG.getUNDEF(N->getValueType(0)), Ops); 12527 } 12528 return SDValue(); 12529 } 12530 12531 // This function adds the required vector_shuffle needed to get 12532 // the elements of the vector extract in the correct position 12533 // as specified by the CorrectElems encoding. 12534 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, 12535 SDValue Input, uint64_t Elems, 12536 uint64_t CorrectElems) { 12537 SDLoc dl(N); 12538 12539 unsigned NumElems = Input.getValueType().getVectorNumElements(); 12540 SmallVector<int, 16> ShuffleMask(NumElems, -1); 12541 12542 // Knowing the element indices being extracted from the original 12543 // vector and the order in which they're being inserted, just put 12544 // them at element indices required for the instruction. 12545 for (unsigned i = 0; i < N->getNumOperands(); i++) { 12546 if (DAG.getDataLayout().isLittleEndian()) 12547 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF; 12548 else 12549 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4; 12550 CorrectElems = CorrectElems >> 8; 12551 Elems = Elems >> 8; 12552 } 12553 12554 SDValue Shuffle = 12555 DAG.getVectorShuffle(Input.getValueType(), dl, Input, 12556 DAG.getUNDEF(Input.getValueType()), ShuffleMask); 12557 12558 EVT Ty = N->getValueType(0); 12559 SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle); 12560 return BV; 12561 } 12562 12563 // Look for build vector patterns where input operands come from sign 12564 // extended vector_extract elements of specific indices. If the correct indices 12565 // aren't used, add a vector shuffle to fix up the indices and create a new 12566 // PPCISD:SExtVElems node which selects the vector sign extend instructions 12567 // during instruction selection. 12568 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) { 12569 // This array encodes the indices that the vector sign extend instructions 12570 // extract from when extending from one type to another for both BE and LE. 12571 // The right nibble of each byte corresponds to the LE incides. 12572 // and the left nibble of each byte corresponds to the BE incides. 12573 // For example: 0x3074B8FC byte->word 12574 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC 12575 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF 12576 // For example: 0x000070F8 byte->double word 12577 // For LE: the allowed indices are: 0x0,0x8 12578 // For BE: the allowed indices are: 0x7,0xF 12579 uint64_t TargetElems[] = { 12580 0x3074B8FC, // b->w 12581 0x000070F8, // b->d 12582 0x10325476, // h->w 12583 0x00003074, // h->d 12584 0x00001032, // w->d 12585 }; 12586 12587 uint64_t Elems = 0; 12588 int Index; 12589 SDValue Input; 12590 12591 auto isSExtOfVecExtract = [&](SDValue Op) -> bool { 12592 if (!Op) 12593 return false; 12594 if (Op.getOpcode() != ISD::SIGN_EXTEND && 12595 Op.getOpcode() != ISD::SIGN_EXTEND_INREG) 12596 return false; 12597 12598 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value 12599 // of the right width. 12600 SDValue Extract = Op.getOperand(0); 12601 if (Extract.getOpcode() == ISD::ANY_EXTEND) 12602 Extract = Extract.getOperand(0); 12603 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 12604 return false; 12605 12606 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1)); 12607 if (!ExtOp) 12608 return false; 12609 12610 Index = ExtOp->getZExtValue(); 12611 if (Input && Input != Extract.getOperand(0)) 12612 return false; 12613 12614 if (!Input) 12615 Input = Extract.getOperand(0); 12616 12617 Elems = Elems << 8; 12618 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4; 12619 Elems |= Index; 12620 12621 return true; 12622 }; 12623 12624 // If the build vector operands aren't sign extended vector extracts, 12625 // of the same input vector, then return. 12626 for (unsigned i = 0; i < N->getNumOperands(); i++) { 12627 if (!isSExtOfVecExtract(N->getOperand(i))) { 12628 return SDValue(); 12629 } 12630 } 12631 12632 // If the vector extract indicies are not correct, add the appropriate 12633 // vector_shuffle. 12634 int TgtElemArrayIdx; 12635 int InputSize = Input.getValueType().getScalarSizeInBits(); 12636 int OutputSize = N->getValueType(0).getScalarSizeInBits(); 12637 if (InputSize + OutputSize == 40) 12638 TgtElemArrayIdx = 0; 12639 else if (InputSize + OutputSize == 72) 12640 TgtElemArrayIdx = 1; 12641 else if (InputSize + OutputSize == 48) 12642 TgtElemArrayIdx = 2; 12643 else if (InputSize + OutputSize == 80) 12644 TgtElemArrayIdx = 3; 12645 else if (InputSize + OutputSize == 96) 12646 TgtElemArrayIdx = 4; 12647 else 12648 return SDValue(); 12649 12650 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx]; 12651 CorrectElems = DAG.getDataLayout().isLittleEndian() 12652 ? CorrectElems & 0x0F0F0F0F0F0F0F0F 12653 : CorrectElems & 0xF0F0F0F0F0F0F0F0; 12654 if (Elems != CorrectElems) { 12655 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems); 12656 } 12657 12658 // Regular lowering will catch cases where a shuffle is not needed. 12659 return SDValue(); 12660 } 12661 12662 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, 12663 DAGCombinerInfo &DCI) const { 12664 assert(N->getOpcode() == ISD::BUILD_VECTOR && 12665 "Should be called with a BUILD_VECTOR node"); 12666 12667 SelectionDAG &DAG = DCI.DAG; 12668 SDLoc dl(N); 12669 12670 if (!Subtarget.hasVSX()) 12671 return SDValue(); 12672 12673 // The target independent DAG combiner will leave a build_vector of 12674 // float-to-int conversions intact. We can generate MUCH better code for 12675 // a float-to-int conversion of a vector of floats. 12676 SDValue FirstInput = N->getOperand(0); 12677 if (FirstInput.getOpcode() == PPCISD::MFVSR) { 12678 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI); 12679 if (Reduced) 12680 return Reduced; 12681 } 12682 12683 // If we're building a vector out of consecutive loads, just load that 12684 // vector type. 12685 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG); 12686 if (Reduced) 12687 return Reduced; 12688 12689 // If we're building a vector out of extended elements from another vector 12690 // we have P9 vector integer extend instructions. The code assumes legal 12691 // input types (i.e. it can't handle things like v4i16) so do not run before 12692 // legalization. 12693 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) { 12694 Reduced = combineBVOfVecSExt(N, DAG); 12695 if (Reduced) 12696 return Reduced; 12697 } 12698 12699 12700 if (N->getValueType(0) != MVT::v2f64) 12701 return SDValue(); 12702 12703 // Looking for: 12704 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1)) 12705 if (FirstInput.getOpcode() != ISD::SINT_TO_FP && 12706 FirstInput.getOpcode() != ISD::UINT_TO_FP) 12707 return SDValue(); 12708 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP && 12709 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP) 12710 return SDValue(); 12711 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode()) 12712 return SDValue(); 12713 12714 SDValue Ext1 = FirstInput.getOperand(0); 12715 SDValue Ext2 = N->getOperand(1).getOperand(0); 12716 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || 12717 Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 12718 return SDValue(); 12719 12720 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1)); 12721 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1)); 12722 if (!Ext1Op || !Ext2Op) 12723 return SDValue(); 12724 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 || 12725 Ext1.getOperand(0) != Ext2.getOperand(0)) 12726 return SDValue(); 12727 12728 int FirstElem = Ext1Op->getZExtValue(); 12729 int SecondElem = Ext2Op->getZExtValue(); 12730 int SubvecIdx; 12731 if (FirstElem == 0 && SecondElem == 1) 12732 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0; 12733 else if (FirstElem == 2 && SecondElem == 3) 12734 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1; 12735 else 12736 return SDValue(); 12737 12738 SDValue SrcVec = Ext1.getOperand(0); 12739 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ? 12740 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP; 12741 return DAG.getNode(NodeType, dl, MVT::v2f64, 12742 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl)); 12743 } 12744 12745 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, 12746 DAGCombinerInfo &DCI) const { 12747 assert((N->getOpcode() == ISD::SINT_TO_FP || 12748 N->getOpcode() == ISD::UINT_TO_FP) && 12749 "Need an int -> FP conversion node here"); 12750 12751 if (useSoftFloat() || !Subtarget.has64BitSupport()) 12752 return SDValue(); 12753 12754 SelectionDAG &DAG = DCI.DAG; 12755 SDLoc dl(N); 12756 SDValue Op(N, 0); 12757 12758 // Don't handle ppc_fp128 here or conversions that are out-of-range capable 12759 // from the hardware. 12760 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 12761 return SDValue(); 12762 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) || 12763 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64)) 12764 return SDValue(); 12765 12766 SDValue FirstOperand(Op.getOperand(0)); 12767 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD && 12768 (FirstOperand.getValueType() == MVT::i8 || 12769 FirstOperand.getValueType() == MVT::i16); 12770 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) { 12771 bool Signed = N->getOpcode() == ISD::SINT_TO_FP; 12772 bool DstDouble = Op.getValueType() == MVT::f64; 12773 unsigned ConvOp = Signed ? 12774 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) : 12775 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS); 12776 SDValue WidthConst = 12777 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2, 12778 dl, false); 12779 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode()); 12780 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst }; 12781 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl, 12782 DAG.getVTList(MVT::f64, MVT::Other), 12783 Ops, MVT::i8, LDN->getMemOperand()); 12784 12785 // For signed conversion, we need to sign-extend the value in the VSR 12786 if (Signed) { 12787 SDValue ExtOps[] = { Ld, WidthConst }; 12788 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps); 12789 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext); 12790 } else 12791 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld); 12792 } 12793 12794 12795 // For i32 intermediate values, unfortunately, the conversion functions 12796 // leave the upper 32 bits of the value are undefined. Within the set of 12797 // scalar instructions, we have no method for zero- or sign-extending the 12798 // value. Thus, we cannot handle i32 intermediate values here. 12799 if (Op.getOperand(0).getValueType() == MVT::i32) 12800 return SDValue(); 12801 12802 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 12803 "UINT_TO_FP is supported only with FPCVT"); 12804 12805 // If we have FCFIDS, then use it when converting to single-precision. 12806 // Otherwise, convert to double-precision and then round. 12807 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 12808 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS 12809 : PPCISD::FCFIDS) 12810 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU 12811 : PPCISD::FCFID); 12812 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 12813 ? MVT::f32 12814 : MVT::f64; 12815 12816 // If we're converting from a float, to an int, and back to a float again, 12817 // then we don't need the store/load pair at all. 12818 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT && 12819 Subtarget.hasFPCVT()) || 12820 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) { 12821 SDValue Src = Op.getOperand(0).getOperand(0); 12822 if (Src.getValueType() == MVT::f32) { 12823 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 12824 DCI.AddToWorklist(Src.getNode()); 12825 } else if (Src.getValueType() != MVT::f64) { 12826 // Make sure that we don't pick up a ppc_fp128 source value. 12827 return SDValue(); 12828 } 12829 12830 unsigned FCTOp = 12831 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 12832 PPCISD::FCTIDUZ; 12833 12834 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src); 12835 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp); 12836 12837 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { 12838 FP = DAG.getNode(ISD::FP_ROUND, dl, 12839 MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); 12840 DCI.AddToWorklist(FP.getNode()); 12841 } 12842 12843 return FP; 12844 } 12845 12846 return SDValue(); 12847 } 12848 12849 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for 12850 // builtins) into loads with swaps. 12851 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, 12852 DAGCombinerInfo &DCI) const { 12853 SelectionDAG &DAG = DCI.DAG; 12854 SDLoc dl(N); 12855 SDValue Chain; 12856 SDValue Base; 12857 MachineMemOperand *MMO; 12858 12859 switch (N->getOpcode()) { 12860 default: 12861 llvm_unreachable("Unexpected opcode for little endian VSX load"); 12862 case ISD::LOAD: { 12863 LoadSDNode *LD = cast<LoadSDNode>(N); 12864 Chain = LD->getChain(); 12865 Base = LD->getBasePtr(); 12866 MMO = LD->getMemOperand(); 12867 // If the MMO suggests this isn't a load of a full vector, leave 12868 // things alone. For a built-in, we have to make the change for 12869 // correctness, so if there is a size problem that will be a bug. 12870 if (MMO->getSize() < 16) 12871 return SDValue(); 12872 break; 12873 } 12874 case ISD::INTRINSIC_W_CHAIN: { 12875 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 12876 Chain = Intrin->getChain(); 12877 // Similarly to the store case below, Intrin->getBasePtr() doesn't get 12878 // us what we want. Get operand 2 instead. 12879 Base = Intrin->getOperand(2); 12880 MMO = Intrin->getMemOperand(); 12881 break; 12882 } 12883 } 12884 12885 MVT VecTy = N->getValueType(0).getSimpleVT(); 12886 12887 // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is 12888 // aligned and the type is a vector with elements up to 4 bytes 12889 if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16) 12890 && VecTy.getScalarSizeInBits() <= 32 ) { 12891 return SDValue(); 12892 } 12893 12894 SDValue LoadOps[] = { Chain, Base }; 12895 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, 12896 DAG.getVTList(MVT::v2f64, MVT::Other), 12897 LoadOps, MVT::v2f64, MMO); 12898 12899 DCI.AddToWorklist(Load.getNode()); 12900 Chain = Load.getValue(1); 12901 SDValue Swap = DAG.getNode( 12902 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load); 12903 DCI.AddToWorklist(Swap.getNode()); 12904 12905 // Add a bitcast if the resulting load type doesn't match v2f64. 12906 if (VecTy != MVT::v2f64) { 12907 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap); 12908 DCI.AddToWorklist(N.getNode()); 12909 // Package {bitcast value, swap's chain} to match Load's shape. 12910 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other), 12911 N, Swap.getValue(1)); 12912 } 12913 12914 return Swap; 12915 } 12916 12917 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for 12918 // builtins) into stores with swaps. 12919 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, 12920 DAGCombinerInfo &DCI) const { 12921 SelectionDAG &DAG = DCI.DAG; 12922 SDLoc dl(N); 12923 SDValue Chain; 12924 SDValue Base; 12925 unsigned SrcOpnd; 12926 MachineMemOperand *MMO; 12927 12928 switch (N->getOpcode()) { 12929 default: 12930 llvm_unreachable("Unexpected opcode for little endian VSX store"); 12931 case ISD::STORE: { 12932 StoreSDNode *ST = cast<StoreSDNode>(N); 12933 Chain = ST->getChain(); 12934 Base = ST->getBasePtr(); 12935 MMO = ST->getMemOperand(); 12936 SrcOpnd = 1; 12937 // If the MMO suggests this isn't a store of a full vector, leave 12938 // things alone. For a built-in, we have to make the change for 12939 // correctness, so if there is a size problem that will be a bug. 12940 if (MMO->getSize() < 16) 12941 return SDValue(); 12942 break; 12943 } 12944 case ISD::INTRINSIC_VOID: { 12945 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 12946 Chain = Intrin->getChain(); 12947 // Intrin->getBasePtr() oddly does not get what we want. 12948 Base = Intrin->getOperand(3); 12949 MMO = Intrin->getMemOperand(); 12950 SrcOpnd = 2; 12951 break; 12952 } 12953 } 12954 12955 SDValue Src = N->getOperand(SrcOpnd); 12956 MVT VecTy = Src.getValueType().getSimpleVT(); 12957 12958 // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is 12959 // aligned and the type is a vector with elements up to 4 bytes 12960 if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16) 12961 && VecTy.getScalarSizeInBits() <= 32 ) { 12962 return SDValue(); 12963 } 12964 12965 // All stores are done as v2f64 and possible bit cast. 12966 if (VecTy != MVT::v2f64) { 12967 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); 12968 DCI.AddToWorklist(Src.getNode()); 12969 } 12970 12971 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, 12972 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src); 12973 DCI.AddToWorklist(Swap.getNode()); 12974 Chain = Swap.getValue(1); 12975 SDValue StoreOps[] = { Chain, Swap, Base }; 12976 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl, 12977 DAG.getVTList(MVT::Other), 12978 StoreOps, VecTy, MMO); 12979 DCI.AddToWorklist(Store.getNode()); 12980 return Store; 12981 } 12982 12983 // Handle DAG combine for STORE (FP_TO_INT F). 12984 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N, 12985 DAGCombinerInfo &DCI) const { 12986 12987 SelectionDAG &DAG = DCI.DAG; 12988 SDLoc dl(N); 12989 unsigned Opcode = N->getOperand(1).getOpcode(); 12990 12991 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) 12992 && "Not a FP_TO_INT Instruction!"); 12993 12994 SDValue Val = N->getOperand(1).getOperand(0); 12995 EVT Op1VT = N->getOperand(1).getValueType(); 12996 EVT ResVT = Val.getValueType(); 12997 12998 // Floating point types smaller than 32 bits are not legal on Power. 12999 if (ResVT.getScalarSizeInBits() < 32) 13000 return SDValue(); 13001 13002 // Only perform combine for conversion to i64/i32 or power9 i16/i8. 13003 bool ValidTypeForStoreFltAsInt = 13004 (Op1VT == MVT::i32 || Op1VT == MVT::i64 || 13005 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8))); 13006 13007 if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Altivec() || 13008 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt) 13009 return SDValue(); 13010 13011 // Extend f32 values to f64 13012 if (ResVT.getScalarSizeInBits() == 32) { 13013 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 13014 DCI.AddToWorklist(Val.getNode()); 13015 } 13016 13017 // Set signed or unsigned conversion opcode. 13018 unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ? 13019 PPCISD::FP_TO_SINT_IN_VSR : 13020 PPCISD::FP_TO_UINT_IN_VSR; 13021 13022 Val = DAG.getNode(ConvOpcode, 13023 dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val); 13024 DCI.AddToWorklist(Val.getNode()); 13025 13026 // Set number of bytes being converted. 13027 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8; 13028 SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2), 13029 DAG.getIntPtrConstant(ByteSize, dl, false), 13030 DAG.getValueType(Op1VT) }; 13031 13032 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl, 13033 DAG.getVTList(MVT::Other), Ops, 13034 cast<StoreSDNode>(N)->getMemoryVT(), 13035 cast<StoreSDNode>(N)->getMemOperand()); 13036 13037 DCI.AddToWorklist(Val.getNode()); 13038 return Val; 13039 } 13040 13041 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 13042 DAGCombinerInfo &DCI) const { 13043 SelectionDAG &DAG = DCI.DAG; 13044 SDLoc dl(N); 13045 switch (N->getOpcode()) { 13046 default: break; 13047 case ISD::ADD: 13048 return combineADD(N, DCI); 13049 case ISD::SHL: 13050 return combineSHL(N, DCI); 13051 case ISD::SRA: 13052 return combineSRA(N, DCI); 13053 case ISD::SRL: 13054 return combineSRL(N, DCI); 13055 case ISD::MUL: 13056 return combineMUL(N, DCI); 13057 case PPCISD::SHL: 13058 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. 13059 return N->getOperand(0); 13060 break; 13061 case PPCISD::SRL: 13062 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0. 13063 return N->getOperand(0); 13064 break; 13065 case PPCISD::SRA: 13066 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 13067 if (C->isNullValue() || // 0 >>s V -> 0. 13068 C->isAllOnesValue()) // -1 >>s V -> -1. 13069 return N->getOperand(0); 13070 } 13071 break; 13072 case ISD::SIGN_EXTEND: 13073 case ISD::ZERO_EXTEND: 13074 case ISD::ANY_EXTEND: 13075 return DAGCombineExtBoolTrunc(N, DCI); 13076 case ISD::TRUNCATE: 13077 return combineTRUNCATE(N, DCI); 13078 case ISD::SETCC: 13079 if (SDValue CSCC = combineSetCC(N, DCI)) 13080 return CSCC; 13081 LLVM_FALLTHROUGH; 13082 case ISD::SELECT_CC: 13083 return DAGCombineTruncBoolExt(N, DCI); 13084 case ISD::SINT_TO_FP: 13085 case ISD::UINT_TO_FP: 13086 return combineFPToIntToFP(N, DCI); 13087 case ISD::STORE: { 13088 13089 EVT Op1VT = N->getOperand(1).getValueType(); 13090 unsigned Opcode = N->getOperand(1).getOpcode(); 13091 13092 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) { 13093 SDValue Val= combineStoreFPToInt(N, DCI); 13094 if (Val) 13095 return Val; 13096 } 13097 13098 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 13099 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP && 13100 N->getOperand(1).getNode()->hasOneUse() && 13101 (Op1VT == MVT::i32 || Op1VT == MVT::i16 || 13102 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) { 13103 13104 // STBRX can only handle simple types and it makes no sense to store less 13105 // two bytes in byte-reversed order. 13106 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT(); 13107 if (mVT.isExtended() || mVT.getSizeInBits() < 16) 13108 break; 13109 13110 SDValue BSwapOp = N->getOperand(1).getOperand(0); 13111 // Do an any-extend to 32-bits if this is a half-word input. 13112 if (BSwapOp.getValueType() == MVT::i16) 13113 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 13114 13115 // If the type of BSWAP operand is wider than stored memory width 13116 // it need to be shifted to the right side before STBRX. 13117 if (Op1VT.bitsGT(mVT)) { 13118 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits(); 13119 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp, 13120 DAG.getConstant(Shift, dl, MVT::i32)); 13121 // Need to truncate if this is a bswap of i64 stored as i32/i16. 13122 if (Op1VT == MVT::i64) 13123 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp); 13124 } 13125 13126 SDValue Ops[] = { 13127 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT) 13128 }; 13129 return 13130 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 13131 Ops, cast<StoreSDNode>(N)->getMemoryVT(), 13132 cast<StoreSDNode>(N)->getMemOperand()); 13133 } 13134 13135 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0> 13136 // So it can increase the chance of CSE constant construction. 13137 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() && 13138 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) { 13139 // Need to sign-extended to 64-bits to handle negative values. 13140 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT(); 13141 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1), 13142 MemVT.getSizeInBits()); 13143 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64); 13144 13145 // DAG.getTruncStore() can't be used here because it doesn't accept 13146 // the general (base + offset) addressing mode. 13147 // So we use UpdateNodeOperands and setTruncatingStore instead. 13148 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2), 13149 N->getOperand(3)); 13150 cast<StoreSDNode>(N)->setTruncatingStore(true); 13151 return SDValue(N, 0); 13152 } 13153 13154 // For little endian, VSX stores require generating xxswapd/lxvd2x. 13155 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. 13156 if (Op1VT.isSimple()) { 13157 MVT StoreVT = Op1VT.getSimpleVT(); 13158 if (Subtarget.needsSwapsForVSXMemOps() && 13159 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || 13160 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) 13161 return expandVSXStoreForLE(N, DCI); 13162 } 13163 break; 13164 } 13165 case ISD::LOAD: { 13166 LoadSDNode *LD = cast<LoadSDNode>(N); 13167 EVT VT = LD->getValueType(0); 13168 13169 // For little endian, VSX loads require generating lxvd2x/xxswapd. 13170 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. 13171 if (VT.isSimple()) { 13172 MVT LoadVT = VT.getSimpleVT(); 13173 if (Subtarget.needsSwapsForVSXMemOps() && 13174 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || 13175 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) 13176 return expandVSXLoadForLE(N, DCI); 13177 } 13178 13179 // We sometimes end up with a 64-bit integer load, from which we extract 13180 // two single-precision floating-point numbers. This happens with 13181 // std::complex<float>, and other similar structures, because of the way we 13182 // canonicalize structure copies. However, if we lack direct moves, 13183 // then the final bitcasts from the extracted integer values to the 13184 // floating-point numbers turn into store/load pairs. Even with direct moves, 13185 // just loading the two floating-point numbers is likely better. 13186 auto ReplaceTwoFloatLoad = [&]() { 13187 if (VT != MVT::i64) 13188 return false; 13189 13190 if (LD->getExtensionType() != ISD::NON_EXTLOAD || 13191 LD->isVolatile()) 13192 return false; 13193 13194 // We're looking for a sequence like this: 13195 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64 13196 // t16: i64 = srl t13, Constant:i32<32> 13197 // t17: i32 = truncate t16 13198 // t18: f32 = bitcast t17 13199 // t19: i32 = truncate t13 13200 // t20: f32 = bitcast t19 13201 13202 if (!LD->hasNUsesOfValue(2, 0)) 13203 return false; 13204 13205 auto UI = LD->use_begin(); 13206 while (UI.getUse().getResNo() != 0) ++UI; 13207 SDNode *Trunc = *UI++; 13208 while (UI.getUse().getResNo() != 0) ++UI; 13209 SDNode *RightShift = *UI; 13210 if (Trunc->getOpcode() != ISD::TRUNCATE) 13211 std::swap(Trunc, RightShift); 13212 13213 if (Trunc->getOpcode() != ISD::TRUNCATE || 13214 Trunc->getValueType(0) != MVT::i32 || 13215 !Trunc->hasOneUse()) 13216 return false; 13217 if (RightShift->getOpcode() != ISD::SRL || 13218 !isa<ConstantSDNode>(RightShift->getOperand(1)) || 13219 RightShift->getConstantOperandVal(1) != 32 || 13220 !RightShift->hasOneUse()) 13221 return false; 13222 13223 SDNode *Trunc2 = *RightShift->use_begin(); 13224 if (Trunc2->getOpcode() != ISD::TRUNCATE || 13225 Trunc2->getValueType(0) != MVT::i32 || 13226 !Trunc2->hasOneUse()) 13227 return false; 13228 13229 SDNode *Bitcast = *Trunc->use_begin(); 13230 SDNode *Bitcast2 = *Trunc2->use_begin(); 13231 13232 if (Bitcast->getOpcode() != ISD::BITCAST || 13233 Bitcast->getValueType(0) != MVT::f32) 13234 return false; 13235 if (Bitcast2->getOpcode() != ISD::BITCAST || 13236 Bitcast2->getValueType(0) != MVT::f32) 13237 return false; 13238 13239 if (Subtarget.isLittleEndian()) 13240 std::swap(Bitcast, Bitcast2); 13241 13242 // Bitcast has the second float (in memory-layout order) and Bitcast2 13243 // has the first one. 13244 13245 SDValue BasePtr = LD->getBasePtr(); 13246 if (LD->isIndexed()) { 13247 assert(LD->getAddressingMode() == ISD::PRE_INC && 13248 "Non-pre-inc AM on PPC?"); 13249 BasePtr = 13250 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 13251 LD->getOffset()); 13252 } 13253 13254 auto MMOFlags = 13255 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile; 13256 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr, 13257 LD->getPointerInfo(), LD->getAlignment(), 13258 MMOFlags, LD->getAAInfo()); 13259 SDValue AddPtr = 13260 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), 13261 BasePtr, DAG.getIntPtrConstant(4, dl)); 13262 SDValue FloatLoad2 = DAG.getLoad( 13263 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr, 13264 LD->getPointerInfo().getWithOffset(4), 13265 MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo()); 13266 13267 if (LD->isIndexed()) { 13268 // Note that DAGCombine should re-form any pre-increment load(s) from 13269 // what is produced here if that makes sense. 13270 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr); 13271 } 13272 13273 DCI.CombineTo(Bitcast2, FloatLoad); 13274 DCI.CombineTo(Bitcast, FloatLoad2); 13275 13276 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1), 13277 SDValue(FloatLoad2.getNode(), 1)); 13278 return true; 13279 }; 13280 13281 if (ReplaceTwoFloatLoad()) 13282 return SDValue(N, 0); 13283 13284 EVT MemVT = LD->getMemoryVT(); 13285 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); 13286 unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); 13287 Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext()); 13288 unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy); 13289 if (LD->isUnindexed() && VT.isVector() && 13290 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) && 13291 // P8 and later hardware should just use LOAD. 13292 !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || 13293 VT == MVT::v4i32 || VT == MVT::v4f32)) || 13294 (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) && 13295 LD->getAlignment() >= ScalarABIAlignment)) && 13296 LD->getAlignment() < ABIAlignment) { 13297 // This is a type-legal unaligned Altivec or QPX load. 13298 SDValue Chain = LD->getChain(); 13299 SDValue Ptr = LD->getBasePtr(); 13300 bool isLittleEndian = Subtarget.isLittleEndian(); 13301 13302 // This implements the loading of unaligned vectors as described in 13303 // the venerable Apple Velocity Engine overview. Specifically: 13304 // https://developer.apple.com/hardwaredrivers/ve/alignment.html 13305 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html 13306 // 13307 // The general idea is to expand a sequence of one or more unaligned 13308 // loads into an alignment-based permutation-control instruction (lvsl 13309 // or lvsr), a series of regular vector loads (which always truncate 13310 // their input address to an aligned address), and a series of 13311 // permutations. The results of these permutations are the requested 13312 // loaded values. The trick is that the last "extra" load is not taken 13313 // from the address you might suspect (sizeof(vector) bytes after the 13314 // last requested load), but rather sizeof(vector) - 1 bytes after the 13315 // last requested vector. The point of this is to avoid a page fault if 13316 // the base address happened to be aligned. This works because if the 13317 // base address is aligned, then adding less than a full vector length 13318 // will cause the last vector in the sequence to be (re)loaded. 13319 // Otherwise, the next vector will be fetched as you might suspect was 13320 // necessary. 13321 13322 // We might be able to reuse the permutation generation from 13323 // a different base address offset from this one by an aligned amount. 13324 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this 13325 // optimization later. 13326 Intrinsic::ID Intr, IntrLD, IntrPerm; 13327 MVT PermCntlTy, PermTy, LDTy; 13328 if (Subtarget.hasAltivec()) { 13329 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr : 13330 Intrinsic::ppc_altivec_lvsl; 13331 IntrLD = Intrinsic::ppc_altivec_lvx; 13332 IntrPerm = Intrinsic::ppc_altivec_vperm; 13333 PermCntlTy = MVT::v16i8; 13334 PermTy = MVT::v4i32; 13335 LDTy = MVT::v4i32; 13336 } else { 13337 Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld : 13338 Intrinsic::ppc_qpx_qvlpcls; 13339 IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd : 13340 Intrinsic::ppc_qpx_qvlfs; 13341 IntrPerm = Intrinsic::ppc_qpx_qvfperm; 13342 PermCntlTy = MVT::v4f64; 13343 PermTy = MVT::v4f64; 13344 LDTy = MemVT.getSimpleVT(); 13345 } 13346 13347 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy); 13348 13349 // Create the new MMO for the new base load. It is like the original MMO, 13350 // but represents an area in memory almost twice the vector size centered 13351 // on the original address. If the address is unaligned, we might start 13352 // reading up to (sizeof(vector)-1) bytes below the address of the 13353 // original unaligned load. 13354 MachineFunction &MF = DAG.getMachineFunction(); 13355 MachineMemOperand *BaseMMO = 13356 MF.getMachineMemOperand(LD->getMemOperand(), 13357 -(long)MemVT.getStoreSize()+1, 13358 2*MemVT.getStoreSize()-1); 13359 13360 // Create the new base load. 13361 SDValue LDXIntID = 13362 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout())); 13363 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; 13364 SDValue BaseLoad = 13365 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 13366 DAG.getVTList(PermTy, MVT::Other), 13367 BaseLoadOps, LDTy, BaseMMO); 13368 13369 // Note that the value of IncOffset (which is provided to the next 13370 // load's pointer info offset value, and thus used to calculate the 13371 // alignment), and the value of IncValue (which is actually used to 13372 // increment the pointer value) are different! This is because we 13373 // require the next load to appear to be aligned, even though it 13374 // is actually offset from the base pointer by a lesser amount. 13375 int IncOffset = VT.getSizeInBits() / 8; 13376 int IncValue = IncOffset; 13377 13378 // Walk (both up and down) the chain looking for another load at the real 13379 // (aligned) offset (the alignment of the other load does not matter in 13380 // this case). If found, then do not use the offset reduction trick, as 13381 // that will prevent the loads from being later combined (as they would 13382 // otherwise be duplicates). 13383 if (!findConsecutiveLoad(LD, DAG)) 13384 --IncValue; 13385 13386 SDValue Increment = 13387 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout())); 13388 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); 13389 13390 MachineMemOperand *ExtraMMO = 13391 MF.getMachineMemOperand(LD->getMemOperand(), 13392 1, 2*MemVT.getStoreSize()-1); 13393 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; 13394 SDValue ExtraLoad = 13395 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 13396 DAG.getVTList(PermTy, MVT::Other), 13397 ExtraLoadOps, LDTy, ExtraMMO); 13398 13399 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 13400 BaseLoad.getValue(1), ExtraLoad.getValue(1)); 13401 13402 // Because vperm has a big-endian bias, we must reverse the order 13403 // of the input vectors and complement the permute control vector 13404 // when generating little endian code. We have already handled the 13405 // latter by using lvsr instead of lvsl, so just reverse BaseLoad 13406 // and ExtraLoad here. 13407 SDValue Perm; 13408 if (isLittleEndian) 13409 Perm = BuildIntrinsicOp(IntrPerm, 13410 ExtraLoad, BaseLoad, PermCntl, DAG, dl); 13411 else 13412 Perm = BuildIntrinsicOp(IntrPerm, 13413 BaseLoad, ExtraLoad, PermCntl, DAG, dl); 13414 13415 if (VT != PermTy) 13416 Perm = Subtarget.hasAltivec() ? 13417 DAG.getNode(ISD::BITCAST, dl, VT, Perm) : 13418 DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX 13419 DAG.getTargetConstant(1, dl, MVT::i64)); 13420 // second argument is 1 because this rounding 13421 // is always exact. 13422 13423 // The output of the permutation is our loaded result, the TokenFactor is 13424 // our new chain. 13425 DCI.CombineTo(N, Perm, TF); 13426 return SDValue(N, 0); 13427 } 13428 } 13429 break; 13430 case ISD::INTRINSIC_WO_CHAIN: { 13431 bool isLittleEndian = Subtarget.isLittleEndian(); 13432 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 13433 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr 13434 : Intrinsic::ppc_altivec_lvsl); 13435 if ((IID == Intr || 13436 IID == Intrinsic::ppc_qpx_qvlpcld || 13437 IID == Intrinsic::ppc_qpx_qvlpcls) && 13438 N->getOperand(1)->getOpcode() == ISD::ADD) { 13439 SDValue Add = N->getOperand(1); 13440 13441 int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ? 13442 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */; 13443 13444 if (DAG.MaskedValueIsZero(Add->getOperand(1), 13445 APInt::getAllOnesValue(Bits /* alignment */) 13446 .zext(Add.getScalarValueSizeInBits()))) { 13447 SDNode *BasePtr = Add->getOperand(0).getNode(); 13448 for (SDNode::use_iterator UI = BasePtr->use_begin(), 13449 UE = BasePtr->use_end(); 13450 UI != UE; ++UI) { 13451 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 13452 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) { 13453 // We've found another LVSL/LVSR, and this address is an aligned 13454 // multiple of that one. The results will be the same, so use the 13455 // one we've just found instead. 13456 13457 return SDValue(*UI, 0); 13458 } 13459 } 13460 } 13461 13462 if (isa<ConstantSDNode>(Add->getOperand(1))) { 13463 SDNode *BasePtr = Add->getOperand(0).getNode(); 13464 for (SDNode::use_iterator UI = BasePtr->use_begin(), 13465 UE = BasePtr->use_end(); UI != UE; ++UI) { 13466 if (UI->getOpcode() == ISD::ADD && 13467 isa<ConstantSDNode>(UI->getOperand(1)) && 13468 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() - 13469 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) % 13470 (1ULL << Bits) == 0) { 13471 SDNode *OtherAdd = *UI; 13472 for (SDNode::use_iterator VI = OtherAdd->use_begin(), 13473 VE = OtherAdd->use_end(); VI != VE; ++VI) { 13474 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 13475 cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) { 13476 return SDValue(*VI, 0); 13477 } 13478 } 13479 } 13480 } 13481 } 13482 } 13483 13484 // Combine vmaxsw/h/b(a, a's negation) to abs(a) 13485 // Expose the vabsduw/h/b opportunity for down stream 13486 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() && 13487 (IID == Intrinsic::ppc_altivec_vmaxsw || 13488 IID == Intrinsic::ppc_altivec_vmaxsh || 13489 IID == Intrinsic::ppc_altivec_vmaxsb)) { 13490 SDValue V1 = N->getOperand(1); 13491 SDValue V2 = N->getOperand(2); 13492 if ((V1.getSimpleValueType() == MVT::v4i32 || 13493 V1.getSimpleValueType() == MVT::v8i16 || 13494 V1.getSimpleValueType() == MVT::v16i8) && 13495 V1.getSimpleValueType() == V2.getSimpleValueType()) { 13496 // (0-a, a) 13497 if (V1.getOpcode() == ISD::SUB && 13498 ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) && 13499 V1.getOperand(1) == V2) { 13500 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2); 13501 } 13502 // (a, 0-a) 13503 if (V2.getOpcode() == ISD::SUB && 13504 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) && 13505 V2.getOperand(1) == V1) { 13506 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1); 13507 } 13508 // (x-y, y-x) 13509 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB && 13510 V1.getOperand(0) == V2.getOperand(1) && 13511 V1.getOperand(1) == V2.getOperand(0)) { 13512 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1); 13513 } 13514 } 13515 } 13516 } 13517 13518 break; 13519 case ISD::INTRINSIC_W_CHAIN: 13520 // For little endian, VSX loads require generating lxvd2x/xxswapd. 13521 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. 13522 if (Subtarget.needsSwapsForVSXMemOps()) { 13523 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 13524 default: 13525 break; 13526 case Intrinsic::ppc_vsx_lxvw4x: 13527 case Intrinsic::ppc_vsx_lxvd2x: 13528 return expandVSXLoadForLE(N, DCI); 13529 } 13530 } 13531 break; 13532 case ISD::INTRINSIC_VOID: 13533 // For little endian, VSX stores require generating xxswapd/stxvd2x. 13534 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. 13535 if (Subtarget.needsSwapsForVSXMemOps()) { 13536 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 13537 default: 13538 break; 13539 case Intrinsic::ppc_vsx_stxvw4x: 13540 case Intrinsic::ppc_vsx_stxvd2x: 13541 return expandVSXStoreForLE(N, DCI); 13542 } 13543 } 13544 break; 13545 case ISD::BSWAP: 13546 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 13547 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 13548 N->getOperand(0).hasOneUse() && 13549 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || 13550 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && 13551 N->getValueType(0) == MVT::i64))) { 13552 SDValue Load = N->getOperand(0); 13553 LoadSDNode *LD = cast<LoadSDNode>(Load); 13554 // Create the byte-swapping load. 13555 SDValue Ops[] = { 13556 LD->getChain(), // Chain 13557 LD->getBasePtr(), // Ptr 13558 DAG.getValueType(N->getValueType(0)) // VT 13559 }; 13560 SDValue BSLoad = 13561 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 13562 DAG.getVTList(N->getValueType(0) == MVT::i64 ? 13563 MVT::i64 : MVT::i32, MVT::Other), 13564 Ops, LD->getMemoryVT(), LD->getMemOperand()); 13565 13566 // If this is an i16 load, insert the truncate. 13567 SDValue ResVal = BSLoad; 13568 if (N->getValueType(0) == MVT::i16) 13569 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 13570 13571 // First, combine the bswap away. This makes the value produced by the 13572 // load dead. 13573 DCI.CombineTo(N, ResVal); 13574 13575 // Next, combine the load away, we give it a bogus result value but a real 13576 // chain result. The result value is dead because the bswap is dead. 13577 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 13578 13579 // Return N so it doesn't get rechecked! 13580 return SDValue(N, 0); 13581 } 13582 break; 13583 case PPCISD::VCMP: 13584 // If a VCMPo node already exists with exactly the same operands as this 13585 // node, use its result instead of this node (VCMPo computes both a CR6 and 13586 // a normal output). 13587 // 13588 if (!N->getOperand(0).hasOneUse() && 13589 !N->getOperand(1).hasOneUse() && 13590 !N->getOperand(2).hasOneUse()) { 13591 13592 // Scan all of the users of the LHS, looking for VCMPo's that match. 13593 SDNode *VCMPoNode = nullptr; 13594 13595 SDNode *LHSN = N->getOperand(0).getNode(); 13596 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 13597 UI != E; ++UI) 13598 if (UI->getOpcode() == PPCISD::VCMPo && 13599 UI->getOperand(1) == N->getOperand(1) && 13600 UI->getOperand(2) == N->getOperand(2) && 13601 UI->getOperand(0) == N->getOperand(0)) { 13602 VCMPoNode = *UI; 13603 break; 13604 } 13605 13606 // If there is no VCMPo node, or if the flag value has a single use, don't 13607 // transform this. 13608 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 13609 break; 13610 13611 // Look at the (necessarily single) use of the flag value. If it has a 13612 // chain, this transformation is more complex. Note that multiple things 13613 // could use the value result, which we should ignore. 13614 SDNode *FlagUser = nullptr; 13615 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 13616 FlagUser == nullptr; ++UI) { 13617 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 13618 SDNode *User = *UI; 13619 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 13620 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 13621 FlagUser = User; 13622 break; 13623 } 13624 } 13625 } 13626 13627 // If the user is a MFOCRF instruction, we know this is safe. 13628 // Otherwise we give up for right now. 13629 if (FlagUser->getOpcode() == PPCISD::MFOCRF) 13630 return SDValue(VCMPoNode, 0); 13631 } 13632 break; 13633 case ISD::BRCOND: { 13634 SDValue Cond = N->getOperand(1); 13635 SDValue Target = N->getOperand(2); 13636 13637 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && 13638 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() == 13639 Intrinsic::loop_decrement) { 13640 13641 // We now need to make the intrinsic dead (it cannot be instruction 13642 // selected). 13643 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); 13644 assert(Cond.getNode()->hasOneUse() && 13645 "Counter decrement has more than one use"); 13646 13647 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, 13648 N->getOperand(0), Target); 13649 } 13650 } 13651 break; 13652 case ISD::BR_CC: { 13653 // If this is a branch on an altivec predicate comparison, lower this so 13654 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This 13655 // lowering is done pre-legalize, because the legalizer lowers the predicate 13656 // compare down to code that is difficult to reassemble. 13657 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 13658 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 13659 13660 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero 13661 // value. If so, pass-through the AND to get to the intrinsic. 13662 if (LHS.getOpcode() == ISD::AND && 13663 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && 13664 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == 13665 Intrinsic::loop_decrement && 13666 isa<ConstantSDNode>(LHS.getOperand(1)) && 13667 !isNullConstant(LHS.getOperand(1))) 13668 LHS = LHS.getOperand(0); 13669 13670 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && 13671 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 13672 Intrinsic::loop_decrement && 13673 isa<ConstantSDNode>(RHS)) { 13674 assert((CC == ISD::SETEQ || CC == ISD::SETNE) && 13675 "Counter decrement comparison is not EQ or NE"); 13676 13677 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 13678 bool isBDNZ = (CC == ISD::SETEQ && Val) || 13679 (CC == ISD::SETNE && !Val); 13680 13681 // We now need to make the intrinsic dead (it cannot be instruction 13682 // selected). 13683 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); 13684 assert(LHS.getNode()->hasOneUse() && 13685 "Counter decrement has more than one use"); 13686 13687 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, 13688 N->getOperand(0), N->getOperand(4)); 13689 } 13690 13691 int CompareOpc; 13692 bool isDot; 13693 13694 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 13695 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 13696 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { 13697 assert(isDot && "Can't compare against a vector result!"); 13698 13699 // If this is a comparison against something other than 0/1, then we know 13700 // that the condition is never/always true. 13701 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 13702 if (Val != 0 && Val != 1) { 13703 if (CC == ISD::SETEQ) // Cond never true, remove branch. 13704 return N->getOperand(0); 13705 // Always !=, turn it into an unconditional branch. 13706 return DAG.getNode(ISD::BR, dl, MVT::Other, 13707 N->getOperand(0), N->getOperand(4)); 13708 } 13709 13710 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 13711 13712 // Create the PPCISD altivec 'dot' comparison node. 13713 SDValue Ops[] = { 13714 LHS.getOperand(2), // LHS of compare 13715 LHS.getOperand(3), // RHS of compare 13716 DAG.getConstant(CompareOpc, dl, MVT::i32) 13717 }; 13718 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; 13719 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 13720 13721 // Unpack the result based on how the target uses it. 13722 PPC::Predicate CompOpc; 13723 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 13724 default: // Can't happen, don't crash on invalid number though. 13725 case 0: // Branch on the value of the EQ bit of CR6. 13726 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 13727 break; 13728 case 1: // Branch on the inverted value of the EQ bit of CR6. 13729 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 13730 break; 13731 case 2: // Branch on the value of the LT bit of CR6. 13732 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 13733 break; 13734 case 3: // Branch on the inverted value of the LT bit of CR6. 13735 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 13736 break; 13737 } 13738 13739 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 13740 DAG.getConstant(CompOpc, dl, MVT::i32), 13741 DAG.getRegister(PPC::CR6, MVT::i32), 13742 N->getOperand(4), CompNode.getValue(1)); 13743 } 13744 break; 13745 } 13746 case ISD::BUILD_VECTOR: 13747 return DAGCombineBuildVector(N, DCI); 13748 case ISD::ABS: 13749 return combineABS(N, DCI); 13750 case ISD::VSELECT: 13751 return combineVSelect(N, DCI); 13752 } 13753 13754 return SDValue(); 13755 } 13756 13757 SDValue 13758 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, 13759 SelectionDAG &DAG, 13760 SmallVectorImpl<SDNode *> &Created) const { 13761 // fold (sdiv X, pow2) 13762 EVT VT = N->getValueType(0); 13763 if (VT == MVT::i64 && !Subtarget.isPPC64()) 13764 return SDValue(); 13765 if ((VT != MVT::i32 && VT != MVT::i64) || 13766 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) 13767 return SDValue(); 13768 13769 SDLoc DL(N); 13770 SDValue N0 = N->getOperand(0); 13771 13772 bool IsNegPow2 = (-Divisor).isPowerOf2(); 13773 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros(); 13774 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT); 13775 13776 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); 13777 Created.push_back(Op.getNode()); 13778 13779 if (IsNegPow2) { 13780 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); 13781 Created.push_back(Op.getNode()); 13782 } 13783 13784 return Op; 13785 } 13786 13787 //===----------------------------------------------------------------------===// 13788 // Inline Assembly Support 13789 //===----------------------------------------------------------------------===// 13790 13791 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 13792 KnownBits &Known, 13793 const APInt &DemandedElts, 13794 const SelectionDAG &DAG, 13795 unsigned Depth) const { 13796 Known.resetAll(); 13797 switch (Op.getOpcode()) { 13798 default: break; 13799 case PPCISD::LBRX: { 13800 // lhbrx is known to have the top bits cleared out. 13801 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 13802 Known.Zero = 0xFFFF0000; 13803 break; 13804 } 13805 case ISD::INTRINSIC_WO_CHAIN: { 13806 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 13807 default: break; 13808 case Intrinsic::ppc_altivec_vcmpbfp_p: 13809 case Intrinsic::ppc_altivec_vcmpeqfp_p: 13810 case Intrinsic::ppc_altivec_vcmpequb_p: 13811 case Intrinsic::ppc_altivec_vcmpequh_p: 13812 case Intrinsic::ppc_altivec_vcmpequw_p: 13813 case Intrinsic::ppc_altivec_vcmpequd_p: 13814 case Intrinsic::ppc_altivec_vcmpgefp_p: 13815 case Intrinsic::ppc_altivec_vcmpgtfp_p: 13816 case Intrinsic::ppc_altivec_vcmpgtsb_p: 13817 case Intrinsic::ppc_altivec_vcmpgtsh_p: 13818 case Intrinsic::ppc_altivec_vcmpgtsw_p: 13819 case Intrinsic::ppc_altivec_vcmpgtsd_p: 13820 case Intrinsic::ppc_altivec_vcmpgtub_p: 13821 case Intrinsic::ppc_altivec_vcmpgtuh_p: 13822 case Intrinsic::ppc_altivec_vcmpgtuw_p: 13823 case Intrinsic::ppc_altivec_vcmpgtud_p: 13824 Known.Zero = ~1U; // All bits but the low one are known to be zero. 13825 break; 13826 } 13827 } 13828 } 13829 } 13830 13831 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { 13832 switch (Subtarget.getDarwinDirective()) { 13833 default: break; 13834 case PPC::DIR_970: 13835 case PPC::DIR_PWR4: 13836 case PPC::DIR_PWR5: 13837 case PPC::DIR_PWR5X: 13838 case PPC::DIR_PWR6: 13839 case PPC::DIR_PWR6X: 13840 case PPC::DIR_PWR7: 13841 case PPC::DIR_PWR8: 13842 case PPC::DIR_PWR9: { 13843 if (!ML) 13844 break; 13845 13846 const PPCInstrInfo *TII = Subtarget.getInstrInfo(); 13847 13848 // For small loops (between 5 and 8 instructions), align to a 32-byte 13849 // boundary so that the entire loop fits in one instruction-cache line. 13850 uint64_t LoopSize = 0; 13851 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) 13852 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) { 13853 LoopSize += TII->getInstSizeInBytes(*J); 13854 if (LoopSize > 32) 13855 break; 13856 } 13857 13858 if (LoopSize > 16 && LoopSize <= 32) 13859 return 5; 13860 13861 break; 13862 } 13863 } 13864 13865 return TargetLowering::getPrefLoopAlignment(ML); 13866 } 13867 13868 /// getConstraintType - Given a constraint, return the type of 13869 /// constraint it is for this target. 13870 PPCTargetLowering::ConstraintType 13871 PPCTargetLowering::getConstraintType(StringRef Constraint) const { 13872 if (Constraint.size() == 1) { 13873 switch (Constraint[0]) { 13874 default: break; 13875 case 'b': 13876 case 'r': 13877 case 'f': 13878 case 'd': 13879 case 'v': 13880 case 'y': 13881 return C_RegisterClass; 13882 case 'Z': 13883 // FIXME: While Z does indicate a memory constraint, it specifically 13884 // indicates an r+r address (used in conjunction with the 'y' modifier 13885 // in the replacement string). Currently, we're forcing the base 13886 // register to be r0 in the asm printer (which is interpreted as zero) 13887 // and forming the complete address in the second register. This is 13888 // suboptimal. 13889 return C_Memory; 13890 } 13891 } else if (Constraint == "wc") { // individual CR bits. 13892 return C_RegisterClass; 13893 } else if (Constraint == "wa" || Constraint == "wd" || 13894 Constraint == "wf" || Constraint == "ws" || 13895 Constraint == "wi") { 13896 return C_RegisterClass; // VSX registers. 13897 } 13898 return TargetLowering::getConstraintType(Constraint); 13899 } 13900 13901 /// Examine constraint type and operand type and determine a weight value. 13902 /// This object must already have been set up with the operand type 13903 /// and the current alternative constraint selected. 13904 TargetLowering::ConstraintWeight 13905 PPCTargetLowering::getSingleConstraintMatchWeight( 13906 AsmOperandInfo &info, const char *constraint) const { 13907 ConstraintWeight weight = CW_Invalid; 13908 Value *CallOperandVal = info.CallOperandVal; 13909 // If we don't have a value, we can't do a match, 13910 // but allow it at the lowest weight. 13911 if (!CallOperandVal) 13912 return CW_Default; 13913 Type *type = CallOperandVal->getType(); 13914 13915 // Look at the constraint type. 13916 if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) 13917 return CW_Register; // an individual CR bit. 13918 else if ((StringRef(constraint) == "wa" || 13919 StringRef(constraint) == "wd" || 13920 StringRef(constraint) == "wf") && 13921 type->isVectorTy()) 13922 return CW_Register; 13923 else if (StringRef(constraint) == "ws" && type->isDoubleTy()) 13924 return CW_Register; 13925 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64)) 13926 return CW_Register; // just hold 64-bit integers data. 13927 13928 switch (*constraint) { 13929 default: 13930 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 13931 break; 13932 case 'b': 13933 if (type->isIntegerTy()) 13934 weight = CW_Register; 13935 break; 13936 case 'f': 13937 if (type->isFloatTy()) 13938 weight = CW_Register; 13939 break; 13940 case 'd': 13941 if (type->isDoubleTy()) 13942 weight = CW_Register; 13943 break; 13944 case 'v': 13945 if (type->isVectorTy()) 13946 weight = CW_Register; 13947 break; 13948 case 'y': 13949 weight = CW_Register; 13950 break; 13951 case 'Z': 13952 weight = CW_Memory; 13953 break; 13954 } 13955 return weight; 13956 } 13957 13958 std::pair<unsigned, const TargetRegisterClass *> 13959 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 13960 StringRef Constraint, 13961 MVT VT) const { 13962 if (Constraint.size() == 1) { 13963 // GCC RS6000 Constraint Letters 13964 switch (Constraint[0]) { 13965 case 'b': // R1-R31 13966 if (VT == MVT::i64 && Subtarget.isPPC64()) 13967 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); 13968 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); 13969 case 'r': // R0-R31 13970 if (VT == MVT::i64 && Subtarget.isPPC64()) 13971 return std::make_pair(0U, &PPC::G8RCRegClass); 13972 return std::make_pair(0U, &PPC::GPRCRegClass); 13973 // 'd' and 'f' constraints are both defined to be "the floating point 13974 // registers", where one is for 32-bit and the other for 64-bit. We don't 13975 // really care overly much here so just give them all the same reg classes. 13976 case 'd': 13977 case 'f': 13978 if (Subtarget.hasSPE()) { 13979 if (VT == MVT::f32 || VT == MVT::i32) 13980 return std::make_pair(0U, &PPC::SPE4RCRegClass); 13981 if (VT == MVT::f64 || VT == MVT::i64) 13982 return std::make_pair(0U, &PPC::SPERCRegClass); 13983 } else { 13984 if (VT == MVT::f32 || VT == MVT::i32) 13985 return std::make_pair(0U, &PPC::F4RCRegClass); 13986 if (VT == MVT::f64 || VT == MVT::i64) 13987 return std::make_pair(0U, &PPC::F8RCRegClass); 13988 if (VT == MVT::v4f64 && Subtarget.hasQPX()) 13989 return std::make_pair(0U, &PPC::QFRCRegClass); 13990 if (VT == MVT::v4f32 && Subtarget.hasQPX()) 13991 return std::make_pair(0U, &PPC::QSRCRegClass); 13992 } 13993 break; 13994 case 'v': 13995 if (VT == MVT::v4f64 && Subtarget.hasQPX()) 13996 return std::make_pair(0U, &PPC::QFRCRegClass); 13997 if (VT == MVT::v4f32 && Subtarget.hasQPX()) 13998 return std::make_pair(0U, &PPC::QSRCRegClass); 13999 if (Subtarget.hasAltivec()) 14000 return std::make_pair(0U, &PPC::VRRCRegClass); 14001 break; 14002 case 'y': // crrc 14003 return std::make_pair(0U, &PPC::CRRCRegClass); 14004 } 14005 } else if (Constraint == "wc" && Subtarget.useCRBits()) { 14006 // An individual CR bit. 14007 return std::make_pair(0U, &PPC::CRBITRCRegClass); 14008 } else if ((Constraint == "wa" || Constraint == "wd" || 14009 Constraint == "wf" || Constraint == "wi") && 14010 Subtarget.hasVSX()) { 14011 return std::make_pair(0U, &PPC::VSRCRegClass); 14012 } else if (Constraint == "ws" && Subtarget.hasVSX()) { 14013 if (VT == MVT::f32 && Subtarget.hasP8Vector()) 14014 return std::make_pair(0U, &PPC::VSSRCRegClass); 14015 else 14016 return std::make_pair(0U, &PPC::VSFRCRegClass); 14017 } 14018 14019 std::pair<unsigned, const TargetRegisterClass *> R = 14020 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 14021 14022 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers 14023 // (which we call X[0-9]+). If a 64-bit value has been requested, and a 14024 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent 14025 // register. 14026 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use 14027 // the AsmName field from *RegisterInfo.td, then this would not be necessary. 14028 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && 14029 PPC::GPRCRegClass.contains(R.first)) 14030 return std::make_pair(TRI->getMatchingSuperReg(R.first, 14031 PPC::sub_32, &PPC::G8RCRegClass), 14032 &PPC::G8RCRegClass); 14033 14034 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. 14035 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) { 14036 R.first = PPC::CR0; 14037 R.second = &PPC::CRRCRegClass; 14038 } 14039 14040 return R; 14041 } 14042 14043 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 14044 /// vector. If it is invalid, don't add anything to Ops. 14045 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 14046 std::string &Constraint, 14047 std::vector<SDValue>&Ops, 14048 SelectionDAG &DAG) const { 14049 SDValue Result; 14050 14051 // Only support length 1 constraints. 14052 if (Constraint.length() > 1) return; 14053 14054 char Letter = Constraint[0]; 14055 switch (Letter) { 14056 default: break; 14057 case 'I': 14058 case 'J': 14059 case 'K': 14060 case 'L': 14061 case 'M': 14062 case 'N': 14063 case 'O': 14064 case 'P': { 14065 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 14066 if (!CST) return; // Must be an immediate to match. 14067 SDLoc dl(Op); 14068 int64_t Value = CST->getSExtValue(); 14069 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative 14070 // numbers are printed as such. 14071 switch (Letter) { 14072 default: llvm_unreachable("Unknown constraint letter!"); 14073 case 'I': // "I" is a signed 16-bit constant. 14074 if (isInt<16>(Value)) 14075 Result = DAG.getTargetConstant(Value, dl, TCVT); 14076 break; 14077 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 14078 if (isShiftedUInt<16, 16>(Value)) 14079 Result = DAG.getTargetConstant(Value, dl, TCVT); 14080 break; 14081 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 14082 if (isShiftedInt<16, 16>(Value)) 14083 Result = DAG.getTargetConstant(Value, dl, TCVT); 14084 break; 14085 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 14086 if (isUInt<16>(Value)) 14087 Result = DAG.getTargetConstant(Value, dl, TCVT); 14088 break; 14089 case 'M': // "M" is a constant that is greater than 31. 14090 if (Value > 31) 14091 Result = DAG.getTargetConstant(Value, dl, TCVT); 14092 break; 14093 case 'N': // "N" is a positive constant that is an exact power of two. 14094 if (Value > 0 && isPowerOf2_64(Value)) 14095 Result = DAG.getTargetConstant(Value, dl, TCVT); 14096 break; 14097 case 'O': // "O" is the constant zero. 14098 if (Value == 0) 14099 Result = DAG.getTargetConstant(Value, dl, TCVT); 14100 break; 14101 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 14102 if (isInt<16>(-Value)) 14103 Result = DAG.getTargetConstant(Value, dl, TCVT); 14104 break; 14105 } 14106 break; 14107 } 14108 } 14109 14110 if (Result.getNode()) { 14111 Ops.push_back(Result); 14112 return; 14113 } 14114 14115 // Handle standard constraint letters. 14116 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 14117 } 14118 14119 // isLegalAddressingMode - Return true if the addressing mode represented 14120 // by AM is legal for this target, for a load/store of the specified type. 14121 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, 14122 const AddrMode &AM, Type *Ty, 14123 unsigned AS, Instruction *I) const { 14124 // PPC does not allow r+i addressing modes for vectors! 14125 if (Ty->isVectorTy() && AM.BaseOffs != 0) 14126 return false; 14127 14128 // PPC allows a sign-extended 16-bit immediate field. 14129 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 14130 return false; 14131 14132 // No global is ever allowed as a base. 14133 if (AM.BaseGV) 14134 return false; 14135 14136 // PPC only support r+r, 14137 switch (AM.Scale) { 14138 case 0: // "r+i" or just "i", depending on HasBaseReg. 14139 break; 14140 case 1: 14141 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 14142 return false; 14143 // Otherwise we have r+r or r+i. 14144 break; 14145 case 2: 14146 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 14147 return false; 14148 // Allow 2*r as r+r. 14149 break; 14150 default: 14151 // No other scales are supported. 14152 return false; 14153 } 14154 14155 return true; 14156 } 14157 14158 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 14159 SelectionDAG &DAG) const { 14160 MachineFunction &MF = DAG.getMachineFunction(); 14161 MachineFrameInfo &MFI = MF.getFrameInfo(); 14162 MFI.setReturnAddressIsTaken(true); 14163 14164 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 14165 return SDValue(); 14166 14167 SDLoc dl(Op); 14168 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 14169 14170 // Make sure the function does not optimize away the store of the RA to 14171 // the stack. 14172 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 14173 FuncInfo->setLRStoreRequired(); 14174 bool isPPC64 = Subtarget.isPPC64(); 14175 auto PtrVT = getPointerTy(MF.getDataLayout()); 14176 14177 if (Depth > 0) { 14178 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 14179 SDValue Offset = 14180 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl, 14181 isPPC64 ? MVT::i64 : MVT::i32); 14182 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 14183 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), 14184 MachinePointerInfo()); 14185 } 14186 14187 // Just load the return address off the stack. 14188 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 14189 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, 14190 MachinePointerInfo()); 14191 } 14192 14193 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 14194 SelectionDAG &DAG) const { 14195 SDLoc dl(Op); 14196 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 14197 14198 MachineFunction &MF = DAG.getMachineFunction(); 14199 MachineFrameInfo &MFI = MF.getFrameInfo(); 14200 MFI.setFrameAddressIsTaken(true); 14201 14202 EVT PtrVT = getPointerTy(MF.getDataLayout()); 14203 bool isPPC64 = PtrVT == MVT::i64; 14204 14205 // Naked functions never have a frame pointer, and so we use r1. For all 14206 // other functions, this decision must be delayed until during PEI. 14207 unsigned FrameReg; 14208 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 14209 FrameReg = isPPC64 ? PPC::X1 : PPC::R1; 14210 else 14211 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; 14212 14213 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 14214 PtrVT); 14215 while (Depth--) 14216 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 14217 FrameAddr, MachinePointerInfo()); 14218 return FrameAddr; 14219 } 14220 14221 // FIXME? Maybe this could be a TableGen attribute on some registers and 14222 // this table could be generated automatically from RegInfo. 14223 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, 14224 SelectionDAG &DAG) const { 14225 bool isPPC64 = Subtarget.isPPC64(); 14226 bool isDarwinABI = Subtarget.isDarwinABI(); 14227 14228 if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || 14229 (!isPPC64 && VT != MVT::i32)) 14230 report_fatal_error("Invalid register global variable type"); 14231 14232 bool is64Bit = isPPC64 && VT == MVT::i64; 14233 unsigned Reg = StringSwitch<unsigned>(RegName) 14234 .Case("r1", is64Bit ? PPC::X1 : PPC::R1) 14235 .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2) 14236 .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : 14237 (is64Bit ? PPC::X13 : PPC::R13)) 14238 .Default(0); 14239 14240 if (Reg) 14241 return Reg; 14242 report_fatal_error("Invalid register name global variable"); 14243 } 14244 14245 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const { 14246 // 32-bit SVR4 ABI access everything as got-indirect. 14247 if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) 14248 return true; 14249 14250 CodeModel::Model CModel = getTargetMachine().getCodeModel(); 14251 // If it is small or large code model, module locals are accessed 14252 // indirectly by loading their address from .toc/.got. The difference 14253 // is that for large code model we have ADDISTocHa + LDtocL and for 14254 // small code model we simply have LDtoc. 14255 if (CModel == CodeModel::Small || CModel == CodeModel::Large) 14256 return true; 14257 14258 // JumpTable and BlockAddress are accessed as got-indirect. 14259 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA)) 14260 return true; 14261 14262 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { 14263 const GlobalValue *GV = G->getGlobal(); 14264 unsigned char GVFlags = Subtarget.classifyGlobalReference(GV); 14265 // The NLP flag indicates that a global access has to use an 14266 // extra indirection. 14267 if (GVFlags & PPCII::MO_NLP_FLAG) 14268 return true; 14269 } 14270 14271 return false; 14272 } 14273 14274 bool 14275 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 14276 // The PowerPC target isn't yet aware of offsets. 14277 return false; 14278 } 14279 14280 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 14281 const CallInst &I, 14282 MachineFunction &MF, 14283 unsigned Intrinsic) const { 14284 switch (Intrinsic) { 14285 case Intrinsic::ppc_qpx_qvlfd: 14286 case Intrinsic::ppc_qpx_qvlfs: 14287 case Intrinsic::ppc_qpx_qvlfcd: 14288 case Intrinsic::ppc_qpx_qvlfcs: 14289 case Intrinsic::ppc_qpx_qvlfiwa: 14290 case Intrinsic::ppc_qpx_qvlfiwz: 14291 case Intrinsic::ppc_altivec_lvx: 14292 case Intrinsic::ppc_altivec_lvxl: 14293 case Intrinsic::ppc_altivec_lvebx: 14294 case Intrinsic::ppc_altivec_lvehx: 14295 case Intrinsic::ppc_altivec_lvewx: 14296 case Intrinsic::ppc_vsx_lxvd2x: 14297 case Intrinsic::ppc_vsx_lxvw4x: { 14298 EVT VT; 14299 switch (Intrinsic) { 14300 case Intrinsic::ppc_altivec_lvebx: 14301 VT = MVT::i8; 14302 break; 14303 case Intrinsic::ppc_altivec_lvehx: 14304 VT = MVT::i16; 14305 break; 14306 case Intrinsic::ppc_altivec_lvewx: 14307 VT = MVT::i32; 14308 break; 14309 case Intrinsic::ppc_vsx_lxvd2x: 14310 VT = MVT::v2f64; 14311 break; 14312 case Intrinsic::ppc_qpx_qvlfd: 14313 VT = MVT::v4f64; 14314 break; 14315 case Intrinsic::ppc_qpx_qvlfs: 14316 VT = MVT::v4f32; 14317 break; 14318 case Intrinsic::ppc_qpx_qvlfcd: 14319 VT = MVT::v2f64; 14320 break; 14321 case Intrinsic::ppc_qpx_qvlfcs: 14322 VT = MVT::v2f32; 14323 break; 14324 default: 14325 VT = MVT::v4i32; 14326 break; 14327 } 14328 14329 Info.opc = ISD::INTRINSIC_W_CHAIN; 14330 Info.memVT = VT; 14331 Info.ptrVal = I.getArgOperand(0); 14332 Info.offset = -VT.getStoreSize()+1; 14333 Info.size = 2*VT.getStoreSize()-1; 14334 Info.align = 1; 14335 Info.flags = MachineMemOperand::MOLoad; 14336 return true; 14337 } 14338 case Intrinsic::ppc_qpx_qvlfda: 14339 case Intrinsic::ppc_qpx_qvlfsa: 14340 case Intrinsic::ppc_qpx_qvlfcda: 14341 case Intrinsic::ppc_qpx_qvlfcsa: 14342 case Intrinsic::ppc_qpx_qvlfiwaa: 14343 case Intrinsic::ppc_qpx_qvlfiwza: { 14344 EVT VT; 14345 switch (Intrinsic) { 14346 case Intrinsic::ppc_qpx_qvlfda: 14347 VT = MVT::v4f64; 14348 break; 14349 case Intrinsic::ppc_qpx_qvlfsa: 14350 VT = MVT::v4f32; 14351 break; 14352 case Intrinsic::ppc_qpx_qvlfcda: 14353 VT = MVT::v2f64; 14354 break; 14355 case Intrinsic::ppc_qpx_qvlfcsa: 14356 VT = MVT::v2f32; 14357 break; 14358 default: 14359 VT = MVT::v4i32; 14360 break; 14361 } 14362 14363 Info.opc = ISD::INTRINSIC_W_CHAIN; 14364 Info.memVT = VT; 14365 Info.ptrVal = I.getArgOperand(0); 14366 Info.offset = 0; 14367 Info.size = VT.getStoreSize(); 14368 Info.align = 1; 14369 Info.flags = MachineMemOperand::MOLoad; 14370 return true; 14371 } 14372 case Intrinsic::ppc_qpx_qvstfd: 14373 case Intrinsic::ppc_qpx_qvstfs: 14374 case Intrinsic::ppc_qpx_qvstfcd: 14375 case Intrinsic::ppc_qpx_qvstfcs: 14376 case Intrinsic::ppc_qpx_qvstfiw: 14377 case Intrinsic::ppc_altivec_stvx: 14378 case Intrinsic::ppc_altivec_stvxl: 14379 case Intrinsic::ppc_altivec_stvebx: 14380 case Intrinsic::ppc_altivec_stvehx: 14381 case Intrinsic::ppc_altivec_stvewx: 14382 case Intrinsic::ppc_vsx_stxvd2x: 14383 case Intrinsic::ppc_vsx_stxvw4x: { 14384 EVT VT; 14385 switch (Intrinsic) { 14386 case Intrinsic::ppc_altivec_stvebx: 14387 VT = MVT::i8; 14388 break; 14389 case Intrinsic::ppc_altivec_stvehx: 14390 VT = MVT::i16; 14391 break; 14392 case Intrinsic::ppc_altivec_stvewx: 14393 VT = MVT::i32; 14394 break; 14395 case Intrinsic::ppc_vsx_stxvd2x: 14396 VT = MVT::v2f64; 14397 break; 14398 case Intrinsic::ppc_qpx_qvstfd: 14399 VT = MVT::v4f64; 14400 break; 14401 case Intrinsic::ppc_qpx_qvstfs: 14402 VT = MVT::v4f32; 14403 break; 14404 case Intrinsic::ppc_qpx_qvstfcd: 14405 VT = MVT::v2f64; 14406 break; 14407 case Intrinsic::ppc_qpx_qvstfcs: 14408 VT = MVT::v2f32; 14409 break; 14410 default: 14411 VT = MVT::v4i32; 14412 break; 14413 } 14414 14415 Info.opc = ISD::INTRINSIC_VOID; 14416 Info.memVT = VT; 14417 Info.ptrVal = I.getArgOperand(1); 14418 Info.offset = -VT.getStoreSize()+1; 14419 Info.size = 2*VT.getStoreSize()-1; 14420 Info.align = 1; 14421 Info.flags = MachineMemOperand::MOStore; 14422 return true; 14423 } 14424 case Intrinsic::ppc_qpx_qvstfda: 14425 case Intrinsic::ppc_qpx_qvstfsa: 14426 case Intrinsic::ppc_qpx_qvstfcda: 14427 case Intrinsic::ppc_qpx_qvstfcsa: 14428 case Intrinsic::ppc_qpx_qvstfiwa: { 14429 EVT VT; 14430 switch (Intrinsic) { 14431 case Intrinsic::ppc_qpx_qvstfda: 14432 VT = MVT::v4f64; 14433 break; 14434 case Intrinsic::ppc_qpx_qvstfsa: 14435 VT = MVT::v4f32; 14436 break; 14437 case Intrinsic::ppc_qpx_qvstfcda: 14438 VT = MVT::v2f64; 14439 break; 14440 case Intrinsic::ppc_qpx_qvstfcsa: 14441 VT = MVT::v2f32; 14442 break; 14443 default: 14444 VT = MVT::v4i32; 14445 break; 14446 } 14447 14448 Info.opc = ISD::INTRINSIC_VOID; 14449 Info.memVT = VT; 14450 Info.ptrVal = I.getArgOperand(1); 14451 Info.offset = 0; 14452 Info.size = VT.getStoreSize(); 14453 Info.align = 1; 14454 Info.flags = MachineMemOperand::MOStore; 14455 return true; 14456 } 14457 default: 14458 break; 14459 } 14460 14461 return false; 14462 } 14463 14464 /// getOptimalMemOpType - Returns the target specific optimal type for load 14465 /// and store operations as a result of memset, memcpy, and memmove 14466 /// lowering. If DstAlign is zero that means it's safe to destination 14467 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 14468 /// means there isn't a need to check it against alignment requirement, 14469 /// probably because the source does not need to be loaded. If 'IsMemset' is 14470 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 14471 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 14472 /// source is constant so it does not need to be loaded. 14473 /// It returns EVT::Other if the type should be determined using generic 14474 /// target-independent logic. 14475 EVT PPCTargetLowering::getOptimalMemOpType( 14476 uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, 14477 bool ZeroMemset, bool MemcpyStrSrc, 14478 const AttributeList &FuncAttributes) const { 14479 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { 14480 // When expanding a memset, require at least two QPX instructions to cover 14481 // the cost of loading the value to be stored from the constant pool. 14482 if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && 14483 (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && 14484 !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) { 14485 return MVT::v4f64; 14486 } 14487 14488 // We should use Altivec/VSX loads and stores when available. For unaligned 14489 // addresses, unaligned VSX loads are only fast starting with the P8. 14490 if (Subtarget.hasAltivec() && Size >= 16 && 14491 (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || 14492 ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) 14493 return MVT::v4i32; 14494 } 14495 14496 if (Subtarget.isPPC64()) { 14497 return MVT::i64; 14498 } 14499 14500 return MVT::i32; 14501 } 14502 14503 /// Returns true if it is beneficial to convert a load of a constant 14504 /// to just the constant itself. 14505 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 14506 Type *Ty) const { 14507 assert(Ty->isIntegerTy()); 14508 14509 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 14510 return !(BitSize == 0 || BitSize > 64); 14511 } 14512 14513 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { 14514 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 14515 return false; 14516 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 14517 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 14518 return NumBits1 == 64 && NumBits2 == 32; 14519 } 14520 14521 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { 14522 if (!VT1.isInteger() || !VT2.isInteger()) 14523 return false; 14524 unsigned NumBits1 = VT1.getSizeInBits(); 14525 unsigned NumBits2 = VT2.getSizeInBits(); 14526 return NumBits1 == 64 && NumBits2 == 32; 14527 } 14528 14529 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 14530 // Generally speaking, zexts are not free, but they are free when they can be 14531 // folded with other operations. 14532 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) { 14533 EVT MemVT = LD->getMemoryVT(); 14534 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 || 14535 (Subtarget.isPPC64() && MemVT == MVT::i32)) && 14536 (LD->getExtensionType() == ISD::NON_EXTLOAD || 14537 LD->getExtensionType() == ISD::ZEXTLOAD)) 14538 return true; 14539 } 14540 14541 // FIXME: Add other cases... 14542 // - 32-bit shifts with a zext to i64 14543 // - zext after ctlz, bswap, etc. 14544 // - zext after and by a constant mask 14545 14546 return TargetLowering::isZExtFree(Val, VT2); 14547 } 14548 14549 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const { 14550 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && 14551 "invalid fpext types"); 14552 // Extending to float128 is not free. 14553 if (DestVT == MVT::f128) 14554 return false; 14555 return true; 14556 } 14557 14558 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 14559 return isInt<16>(Imm) || isUInt<16>(Imm); 14560 } 14561 14562 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { 14563 return isInt<16>(Imm) || isUInt<16>(Imm); 14564 } 14565 14566 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 14567 unsigned, 14568 unsigned, 14569 MachineMemOperand::Flags, 14570 bool *Fast) const { 14571 if (DisablePPCUnaligned) 14572 return false; 14573 14574 // PowerPC supports unaligned memory access for simple non-vector types. 14575 // Although accessing unaligned addresses is not as efficient as accessing 14576 // aligned addresses, it is generally more efficient than manual expansion, 14577 // and generally only traps for software emulation when crossing page 14578 // boundaries. 14579 14580 if (!VT.isSimple()) 14581 return false; 14582 14583 if (VT.getSimpleVT().isVector()) { 14584 if (Subtarget.hasVSX()) { 14585 if (VT != MVT::v2f64 && VT != MVT::v2i64 && 14586 VT != MVT::v4f32 && VT != MVT::v4i32) 14587 return false; 14588 } else { 14589 return false; 14590 } 14591 } 14592 14593 if (VT == MVT::ppcf128) 14594 return false; 14595 14596 if (Fast) 14597 *Fast = true; 14598 14599 return true; 14600 } 14601 14602 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 14603 VT = VT.getScalarType(); 14604 14605 if (!VT.isSimple()) 14606 return false; 14607 14608 switch (VT.getSimpleVT().SimpleTy) { 14609 case MVT::f32: 14610 case MVT::f64: 14611 return true; 14612 case MVT::f128: 14613 return (EnableQuadPrecision && Subtarget.hasP9Vector()); 14614 default: 14615 break; 14616 } 14617 14618 return false; 14619 } 14620 14621 const MCPhysReg * 14622 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { 14623 // LR is a callee-save register, but we must treat it as clobbered by any call 14624 // site. Hence we include LR in the scratch registers, which are in turn added 14625 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies 14626 // to CTR, which is used by any indirect call. 14627 static const MCPhysReg ScratchRegs[] = { 14628 PPC::X12, PPC::LR8, PPC::CTR8, 0 14629 }; 14630 14631 return ScratchRegs; 14632 } 14633 14634 unsigned PPCTargetLowering::getExceptionPointerRegister( 14635 const Constant *PersonalityFn) const { 14636 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3; 14637 } 14638 14639 unsigned PPCTargetLowering::getExceptionSelectorRegister( 14640 const Constant *PersonalityFn) const { 14641 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4; 14642 } 14643 14644 bool 14645 PPCTargetLowering::shouldExpandBuildVectorWithShuffles( 14646 EVT VT , unsigned DefinedValues) const { 14647 if (VT == MVT::v2i64) 14648 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves 14649 14650 if (Subtarget.hasVSX() || Subtarget.hasQPX()) 14651 return true; 14652 14653 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); 14654 } 14655 14656 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 14657 if (DisableILPPref || Subtarget.enableMachineScheduler()) 14658 return TargetLowering::getSchedulingPreference(N); 14659 14660 return Sched::ILP; 14661 } 14662 14663 // Create a fast isel object. 14664 FastISel * 14665 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, 14666 const TargetLibraryInfo *LibInfo) const { 14667 return PPC::createFastISel(FuncInfo, LibInfo); 14668 } 14669 14670 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { 14671 if (Subtarget.isDarwinABI()) return; 14672 if (!Subtarget.isPPC64()) return; 14673 14674 // Update IsSplitCSR in PPCFunctionInfo 14675 PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>(); 14676 PFI->setIsSplitCSR(true); 14677 } 14678 14679 void PPCTargetLowering::insertCopiesSplitCSR( 14680 MachineBasicBlock *Entry, 14681 const SmallVectorImpl<MachineBasicBlock *> &Exits) const { 14682 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 14683 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); 14684 if (!IStart) 14685 return; 14686 14687 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 14688 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); 14689 MachineBasicBlock::iterator MBBI = Entry->begin(); 14690 for (const MCPhysReg *I = IStart; *I; ++I) { 14691 const TargetRegisterClass *RC = nullptr; 14692 if (PPC::G8RCRegClass.contains(*I)) 14693 RC = &PPC::G8RCRegClass; 14694 else if (PPC::F8RCRegClass.contains(*I)) 14695 RC = &PPC::F8RCRegClass; 14696 else if (PPC::CRRCRegClass.contains(*I)) 14697 RC = &PPC::CRRCRegClass; 14698 else if (PPC::VRRCRegClass.contains(*I)) 14699 RC = &PPC::VRRCRegClass; 14700 else 14701 llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 14702 14703 unsigned NewVR = MRI->createVirtualRegister(RC); 14704 // Create copy from CSR to a virtual register. 14705 // FIXME: this currently does not emit CFI pseudo-instructions, it works 14706 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be 14707 // nounwind. If we want to generalize this later, we may need to emit 14708 // CFI pseudo-instructions. 14709 assert(Entry->getParent()->getFunction().hasFnAttribute( 14710 Attribute::NoUnwind) && 14711 "Function should be nounwind in insertCopiesSplitCSR!"); 14712 Entry->addLiveIn(*I); 14713 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) 14714 .addReg(*I); 14715 14716 // Insert the copy-back instructions right before the terminator. 14717 for (auto *Exit : Exits) 14718 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), 14719 TII->get(TargetOpcode::COPY), *I) 14720 .addReg(NewVR); 14721 } 14722 } 14723 14724 // Override to enable LOAD_STACK_GUARD lowering on Linux. 14725 bool PPCTargetLowering::useLoadStackGuardNode() const { 14726 if (!Subtarget.isTargetLinux()) 14727 return TargetLowering::useLoadStackGuardNode(); 14728 return true; 14729 } 14730 14731 // Override to disable global variable loading on Linux. 14732 void PPCTargetLowering::insertSSPDeclarations(Module &M) const { 14733 if (!Subtarget.isTargetLinux()) 14734 return TargetLowering::insertSSPDeclarations(M); 14735 } 14736 14737 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 14738 bool ForCodeSize) const { 14739 if (!VT.isSimple() || !Subtarget.hasVSX()) 14740 return false; 14741 14742 switch(VT.getSimpleVT().SimpleTy) { 14743 default: 14744 // For FP types that are currently not supported by PPC backend, return 14745 // false. Examples: f16, f80. 14746 return false; 14747 case MVT::f32: 14748 case MVT::f64: 14749 case MVT::ppcf128: 14750 return Imm.isPosZero(); 14751 } 14752 } 14753 14754 // For vector shift operation op, fold 14755 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y) 14756 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, 14757 SelectionDAG &DAG) { 14758 SDValue N0 = N->getOperand(0); 14759 SDValue N1 = N->getOperand(1); 14760 EVT VT = N0.getValueType(); 14761 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 14762 unsigned Opcode = N->getOpcode(); 14763 unsigned TargetOpcode; 14764 14765 switch (Opcode) { 14766 default: 14767 llvm_unreachable("Unexpected shift operation"); 14768 case ISD::SHL: 14769 TargetOpcode = PPCISD::SHL; 14770 break; 14771 case ISD::SRL: 14772 TargetOpcode = PPCISD::SRL; 14773 break; 14774 case ISD::SRA: 14775 TargetOpcode = PPCISD::SRA; 14776 break; 14777 } 14778 14779 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) && 14780 N1->getOpcode() == ISD::AND) 14781 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) 14782 if (Mask->getZExtValue() == OpSizeInBits - 1) 14783 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0)); 14784 14785 return SDValue(); 14786 } 14787 14788 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const { 14789 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) 14790 return Value; 14791 14792 SDValue N0 = N->getOperand(0); 14793 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 14794 if (!Subtarget.isISA3_0() || 14795 N0.getOpcode() != ISD::SIGN_EXTEND || 14796 N0.getOperand(0).getValueType() != MVT::i32 || 14797 CN1 == nullptr || N->getValueType(0) != MVT::i64) 14798 return SDValue(); 14799 14800 // We can't save an operation here if the value is already extended, and 14801 // the existing shift is easier to combine. 14802 SDValue ExtsSrc = N0.getOperand(0); 14803 if (ExtsSrc.getOpcode() == ISD::TRUNCATE && 14804 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext) 14805 return SDValue(); 14806 14807 SDLoc DL(N0); 14808 SDValue ShiftBy = SDValue(CN1, 0); 14809 // We want the shift amount to be i32 on the extswli, but the shift could 14810 // have an i64. 14811 if (ShiftBy.getValueType() == MVT::i64) 14812 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32); 14813 14814 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0), 14815 ShiftBy); 14816 } 14817 14818 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const { 14819 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) 14820 return Value; 14821 14822 return SDValue(); 14823 } 14824 14825 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const { 14826 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) 14827 return Value; 14828 14829 return SDValue(); 14830 } 14831 14832 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1)) 14833 // Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0)) 14834 // When C is zero, the equation (addi Z, -C) can be simplified to Z 14835 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types 14836 static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, 14837 const PPCSubtarget &Subtarget) { 14838 if (!Subtarget.isPPC64()) 14839 return SDValue(); 14840 14841 SDValue LHS = N->getOperand(0); 14842 SDValue RHS = N->getOperand(1); 14843 14844 auto isZextOfCompareWithConstant = [](SDValue Op) { 14845 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() || 14846 Op.getValueType() != MVT::i64) 14847 return false; 14848 14849 SDValue Cmp = Op.getOperand(0); 14850 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() || 14851 Cmp.getOperand(0).getValueType() != MVT::i64) 14852 return false; 14853 14854 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) { 14855 int64_t NegConstant = 0 - Constant->getSExtValue(); 14856 // Due to the limitations of the addi instruction, 14857 // -C is required to be [-32768, 32767]. 14858 return isInt<16>(NegConstant); 14859 } 14860 14861 return false; 14862 }; 14863 14864 bool LHSHasPattern = isZextOfCompareWithConstant(LHS); 14865 bool RHSHasPattern = isZextOfCompareWithConstant(RHS); 14866 14867 // If there is a pattern, canonicalize a zext operand to the RHS. 14868 if (LHSHasPattern && !RHSHasPattern) 14869 std::swap(LHS, RHS); 14870 else if (!LHSHasPattern && !RHSHasPattern) 14871 return SDValue(); 14872 14873 SDLoc DL(N); 14874 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue); 14875 SDValue Cmp = RHS.getOperand(0); 14876 SDValue Z = Cmp.getOperand(0); 14877 auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1)); 14878 14879 assert(Constant && "Constant Should not be a null pointer."); 14880 int64_t NegConstant = 0 - Constant->getSExtValue(); 14881 14882 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) { 14883 default: break; 14884 case ISD::SETNE: { 14885 // when C == 0 14886 // --> addze X, (addic Z, -1).carry 14887 // / 14888 // add X, (zext(setne Z, C))-- 14889 // \ when -32768 <= -C <= 32767 && C != 0 14890 // --> addze X, (addic (addi Z, -C), -1).carry 14891 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z, 14892 DAG.getConstant(NegConstant, DL, MVT::i64)); 14893 SDValue AddOrZ = NegConstant != 0 ? Add : Z; 14894 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue), 14895 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64)); 14896 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64), 14897 SDValue(Addc.getNode(), 1)); 14898 } 14899 case ISD::SETEQ: { 14900 // when C == 0 14901 // --> addze X, (subfic Z, 0).carry 14902 // / 14903 // add X, (zext(sete Z, C))-- 14904 // \ when -32768 <= -C <= 32767 && C != 0 14905 // --> addze X, (subfic (addi Z, -C), 0).carry 14906 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z, 14907 DAG.getConstant(NegConstant, DL, MVT::i64)); 14908 SDValue AddOrZ = NegConstant != 0 ? Add : Z; 14909 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue), 14910 DAG.getConstant(0, DL, MVT::i64), AddOrZ); 14911 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64), 14912 SDValue(Subc.getNode(), 1)); 14913 } 14914 } 14915 14916 return SDValue(); 14917 } 14918 14919 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const { 14920 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget)) 14921 return Value; 14922 14923 return SDValue(); 14924 } 14925 14926 // Detect TRUNCATE operations on bitcasts of float128 values. 14927 // What we are looking for here is the situtation where we extract a subset 14928 // of bits from a 128 bit float. 14929 // This can be of two forms: 14930 // 1) BITCAST of f128 feeding TRUNCATE 14931 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE 14932 // The reason this is required is because we do not have a legal i128 type 14933 // and so we want to prevent having to store the f128 and then reload part 14934 // of it. 14935 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N, 14936 DAGCombinerInfo &DCI) const { 14937 // If we are using CRBits then try that first. 14938 if (Subtarget.useCRBits()) { 14939 // Check if CRBits did anything and return that if it did. 14940 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI)) 14941 return CRTruncValue; 14942 } 14943 14944 SDLoc dl(N); 14945 SDValue Op0 = N->getOperand(0); 14946 14947 // Looking for a truncate of i128 to i64. 14948 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64) 14949 return SDValue(); 14950 14951 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0; 14952 14953 // SRL feeding TRUNCATE. 14954 if (Op0.getOpcode() == ISD::SRL) { 14955 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1)); 14956 // The right shift has to be by 64 bits. 14957 if (!ConstNode || ConstNode->getZExtValue() != 64) 14958 return SDValue(); 14959 14960 // Switch the element number to extract. 14961 EltToExtract = EltToExtract ? 0 : 1; 14962 // Update Op0 past the SRL. 14963 Op0 = Op0.getOperand(0); 14964 } 14965 14966 // BITCAST feeding a TRUNCATE possibly via SRL. 14967 if (Op0.getOpcode() == ISD::BITCAST && 14968 Op0.getValueType() == MVT::i128 && 14969 Op0.getOperand(0).getValueType() == MVT::f128) { 14970 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0)); 14971 return DCI.DAG.getNode( 14972 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast, 14973 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32)); 14974 } 14975 return SDValue(); 14976 } 14977 14978 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const { 14979 SelectionDAG &DAG = DCI.DAG; 14980 14981 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1)); 14982 if (!ConstOpOrElement) 14983 return SDValue(); 14984 14985 // An imul is usually smaller than the alternative sequence for legal type. 14986 if (DAG.getMachineFunction().getFunction().hasMinSize() && 14987 isOperationLegal(ISD::MUL, N->getValueType(0))) 14988 return SDValue(); 14989 14990 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool { 14991 switch (this->Subtarget.getDarwinDirective()) { 14992 default: 14993 // TODO: enhance the condition for subtarget before pwr8 14994 return false; 14995 case PPC::DIR_PWR8: 14996 // type mul add shl 14997 // scalar 4 1 1 14998 // vector 7 2 2 14999 return true; 15000 case PPC::DIR_PWR9: 15001 // type mul add shl 15002 // scalar 5 2 2 15003 // vector 7 2 2 15004 15005 // The cycle RATIO of related operations are showed as a table above. 15006 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both 15007 // scalar and vector type. For 2 instrs patterns, add/sub + shl 15008 // are 4, it is always profitable; but for 3 instrs patterns 15009 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6. 15010 // So we should only do it for vector type. 15011 return IsAddOne && IsNeg ? VT.isVector() : true; 15012 } 15013 }; 15014 15015 EVT VT = N->getValueType(0); 15016 SDLoc DL(N); 15017 15018 const APInt &MulAmt = ConstOpOrElement->getAPIntValue(); 15019 bool IsNeg = MulAmt.isNegative(); 15020 APInt MulAmtAbs = MulAmt.abs(); 15021 15022 if ((MulAmtAbs - 1).isPowerOf2()) { 15023 // (mul x, 2^N + 1) => (add (shl x, N), x) 15024 // (mul x, -(2^N + 1)) => -(add (shl x, N), x) 15025 15026 if (!IsProfitable(IsNeg, true, VT)) 15027 return SDValue(); 15028 15029 SDValue Op0 = N->getOperand(0); 15030 SDValue Op1 = 15031 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), 15032 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT)); 15033 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); 15034 15035 if (!IsNeg) 15036 return Res; 15037 15038 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res); 15039 } else if ((MulAmtAbs + 1).isPowerOf2()) { 15040 // (mul x, 2^N - 1) => (sub (shl x, N), x) 15041 // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) 15042 15043 if (!IsProfitable(IsNeg, false, VT)) 15044 return SDValue(); 15045 15046 SDValue Op0 = N->getOperand(0); 15047 SDValue Op1 = 15048 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), 15049 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT)); 15050 15051 if (!IsNeg) 15052 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0); 15053 else 15054 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); 15055 15056 } else { 15057 return SDValue(); 15058 } 15059 } 15060 15061 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 15062 // Only duplicate to increase tail-calls for the 64bit SysV ABIs. 15063 if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64()) 15064 return false; 15065 15066 // If not a tail call then no need to proceed. 15067 if (!CI->isTailCall()) 15068 return false; 15069 15070 // If tail calls are disabled for the caller then we are done. 15071 const Function *Caller = CI->getParent()->getParent(); 15072 auto Attr = Caller->getFnAttribute("disable-tail-calls"); 15073 if (Attr.getValueAsString() == "true") 15074 return false; 15075 15076 // If sibling calls have been disabled and tail-calls aren't guaranteed 15077 // there is no reason to duplicate. 15078 auto &TM = getTargetMachine(); 15079 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO) 15080 return false; 15081 15082 // Can't tail call a function called indirectly, or if it has variadic args. 15083 const Function *Callee = CI->getCalledFunction(); 15084 if (!Callee || Callee->isVarArg()) 15085 return false; 15086 15087 // Make sure the callee and caller calling conventions are eligible for tco. 15088 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(), 15089 CI->getCallingConv())) 15090 return false; 15091 15092 // If the function is local then we have a good chance at tail-calling it 15093 return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee); 15094 } 15095 15096 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 15097 if (!Subtarget.hasVSX()) 15098 return false; 15099 if (Subtarget.hasP9Vector() && VT == MVT::f128) 15100 return true; 15101 return VT == MVT::f32 || VT == MVT::f64 || 15102 VT == MVT::v4f32 || VT == MVT::v2f64; 15103 } 15104 15105 bool PPCTargetLowering:: 15106 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { 15107 const Value *Mask = AndI.getOperand(1); 15108 // If the mask is suitable for andi. or andis. we should sink the and. 15109 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) { 15110 // Can't handle constants wider than 64-bits. 15111 if (CI->getBitWidth() > 64) 15112 return false; 15113 int64_t ConstVal = CI->getZExtValue(); 15114 return isUInt<16>(ConstVal) || 15115 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF)); 15116 } 15117 15118 // For non-constant masks, we can always use the record-form and. 15119 return true; 15120 } 15121 15122 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0) 15123 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0) 15124 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0) 15125 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0) 15126 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32 15127 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const { 15128 assert((N->getOpcode() == ISD::ABS) && "Need ABS node here"); 15129 assert(Subtarget.hasP9Altivec() && 15130 "Only combine this when P9 altivec supported!"); 15131 EVT VT = N->getValueType(0); 15132 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8) 15133 return SDValue(); 15134 15135 SelectionDAG &DAG = DCI.DAG; 15136 SDLoc dl(N); 15137 if (N->getOperand(0).getOpcode() == ISD::SUB) { 15138 // Even for signed integers, if it's known to be positive (as signed 15139 // integer) due to zero-extended inputs. 15140 unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode(); 15141 unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode(); 15142 if ((SubOpcd0 == ISD::ZERO_EXTEND || 15143 SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) && 15144 (SubOpcd1 == ISD::ZERO_EXTEND || 15145 SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) { 15146 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(), 15147 N->getOperand(0)->getOperand(0), 15148 N->getOperand(0)->getOperand(1), 15149 DAG.getTargetConstant(0, dl, MVT::i32)); 15150 } 15151 15152 // For type v4i32, it can be optimized with xvnegsp + vabsduw 15153 if (N->getOperand(0).getValueType() == MVT::v4i32 && 15154 N->getOperand(0).hasOneUse()) { 15155 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(), 15156 N->getOperand(0)->getOperand(0), 15157 N->getOperand(0)->getOperand(1), 15158 DAG.getTargetConstant(1, dl, MVT::i32)); 15159 } 15160 } 15161 15162 return SDValue(); 15163 } 15164 15165 // For type v4i32/v8ii16/v16i8, transform 15166 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b) 15167 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b) 15168 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b) 15169 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b) 15170 SDValue PPCTargetLowering::combineVSelect(SDNode *N, 15171 DAGCombinerInfo &DCI) const { 15172 assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here"); 15173 assert(Subtarget.hasP9Altivec() && 15174 "Only combine this when P9 altivec supported!"); 15175 15176 SelectionDAG &DAG = DCI.DAG; 15177 SDLoc dl(N); 15178 SDValue Cond = N->getOperand(0); 15179 SDValue TrueOpnd = N->getOperand(1); 15180 SDValue FalseOpnd = N->getOperand(2); 15181 EVT VT = N->getOperand(1).getValueType(); 15182 15183 if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB || 15184 FalseOpnd.getOpcode() != ISD::SUB) 15185 return SDValue(); 15186 15187 // ABSD only available for type v4i32/v8i16/v16i8 15188 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8) 15189 return SDValue(); 15190 15191 // At least to save one more dependent computation 15192 if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse())) 15193 return SDValue(); 15194 15195 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 15196 15197 // Can only handle unsigned comparison here 15198 switch (CC) { 15199 default: 15200 return SDValue(); 15201 case ISD::SETUGT: 15202 case ISD::SETUGE: 15203 break; 15204 case ISD::SETULT: 15205 case ISD::SETULE: 15206 std::swap(TrueOpnd, FalseOpnd); 15207 break; 15208 } 15209 15210 SDValue CmpOpnd1 = Cond.getOperand(0); 15211 SDValue CmpOpnd2 = Cond.getOperand(1); 15212 15213 // SETCC CmpOpnd1 CmpOpnd2 cond 15214 // TrueOpnd = CmpOpnd1 - CmpOpnd2 15215 // FalseOpnd = CmpOpnd2 - CmpOpnd1 15216 if (TrueOpnd.getOperand(0) == CmpOpnd1 && 15217 TrueOpnd.getOperand(1) == CmpOpnd2 && 15218 FalseOpnd.getOperand(0) == CmpOpnd2 && 15219 FalseOpnd.getOperand(1) == CmpOpnd1) { 15220 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(), 15221 CmpOpnd1, CmpOpnd2, 15222 DAG.getTargetConstant(0, dl, MVT::i32)); 15223 } 15224 15225 return SDValue(); 15226 } 15227