1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the PPCISelLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PPCISelLowering.h" 14 #include "MCTargetDesc/PPCPredicates.h" 15 #include "PPC.h" 16 #include "PPCCCState.h" 17 #include "PPCCallingConv.h" 18 #include "PPCFrameLowering.h" 19 #include "PPCInstrInfo.h" 20 #include "PPCMachineFunctionInfo.h" 21 #include "PPCPerfectShuffle.h" 22 #include "PPCRegisterInfo.h" 23 #include "PPCSubtarget.h" 24 #include "PPCTargetMachine.h" 25 #include "llvm/ADT/APFloat.h" 26 #include "llvm/ADT/APInt.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/DenseMap.h" 29 #include "llvm/ADT/None.h" 30 #include "llvm/ADT/STLExtras.h" 31 #include "llvm/ADT/SmallPtrSet.h" 32 #include "llvm/ADT/SmallSet.h" 33 #include "llvm/ADT/SmallVector.h" 34 #include "llvm/ADT/Statistic.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/ADT/StringSwitch.h" 37 #include "llvm/CodeGen/CallingConvLower.h" 38 #include "llvm/CodeGen/ISDOpcodes.h" 39 #include "llvm/CodeGen/MachineBasicBlock.h" 40 #include "llvm/CodeGen/MachineFrameInfo.h" 41 #include "llvm/CodeGen/MachineFunction.h" 42 #include "llvm/CodeGen/MachineInstr.h" 43 #include "llvm/CodeGen/MachineInstrBuilder.h" 44 #include "llvm/CodeGen/MachineJumpTableInfo.h" 45 #include "llvm/CodeGen/MachineLoopInfo.h" 46 #include "llvm/CodeGen/MachineMemOperand.h" 47 #include "llvm/CodeGen/MachineOperand.h" 48 #include "llvm/CodeGen/MachineRegisterInfo.h" 49 #include "llvm/CodeGen/RuntimeLibcalls.h" 50 #include "llvm/CodeGen/SelectionDAG.h" 51 #include "llvm/CodeGen/SelectionDAGNodes.h" 52 #include "llvm/CodeGen/TargetInstrInfo.h" 53 #include "llvm/CodeGen/TargetLowering.h" 54 #include "llvm/CodeGen/TargetRegisterInfo.h" 55 #include "llvm/CodeGen/ValueTypes.h" 56 #include "llvm/IR/CallSite.h" 57 #include "llvm/IR/CallingConv.h" 58 #include "llvm/IR/Constant.h" 59 #include "llvm/IR/Constants.h" 60 #include "llvm/IR/DataLayout.h" 61 #include "llvm/IR/DebugLoc.h" 62 #include "llvm/IR/DerivedTypes.h" 63 #include "llvm/IR/Function.h" 64 #include "llvm/IR/GlobalValue.h" 65 #include "llvm/IR/IRBuilder.h" 66 #include "llvm/IR/Instructions.h" 67 #include "llvm/IR/Intrinsics.h" 68 #include "llvm/IR/Module.h" 69 #include "llvm/IR/Type.h" 70 #include "llvm/IR/Use.h" 71 #include "llvm/IR/Value.h" 72 #include "llvm/MC/MCExpr.h" 73 #include "llvm/MC/MCRegisterInfo.h" 74 #include "llvm/Support/AtomicOrdering.h" 75 #include "llvm/Support/BranchProbability.h" 76 #include "llvm/Support/Casting.h" 77 #include "llvm/Support/CodeGen.h" 78 #include "llvm/Support/CommandLine.h" 79 #include "llvm/Support/Compiler.h" 80 #include "llvm/Support/Debug.h" 81 #include "llvm/Support/ErrorHandling.h" 82 #include "llvm/Support/Format.h" 83 #include "llvm/Support/KnownBits.h" 84 #include "llvm/Support/MachineValueType.h" 85 #include "llvm/Support/MathExtras.h" 86 #include "llvm/Support/raw_ostream.h" 87 #include "llvm/Target/TargetMachine.h" 88 #include "llvm/Target/TargetOptions.h" 89 #include <algorithm> 90 #include <cassert> 91 #include <cstdint> 92 #include <iterator> 93 #include <list> 94 #include <utility> 95 #include <vector> 96 97 using namespace llvm; 98 99 #define DEBUG_TYPE "ppc-lowering" 100 101 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", 102 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); 103 104 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", 105 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); 106 107 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", 108 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); 109 110 static cl::opt<bool> DisableSCO("disable-ppc-sco", 111 cl::desc("disable sibling call optimization on ppc"), cl::Hidden); 112 113 static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision", 114 cl::desc("enable quad precision float support on ppc"), cl::Hidden); 115 116 STATISTIC(NumTailCalls, "Number of tail calls"); 117 STATISTIC(NumSiblingCalls, "Number of sibling calls"); 118 119 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int); 120 121 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl); 122 123 // FIXME: Remove this once the bug has been fixed! 124 extern cl::opt<bool> ANDIGlueBug; 125 126 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, 127 const PPCSubtarget &STI) 128 : TargetLowering(TM), Subtarget(STI) { 129 // Use _setjmp/_longjmp instead of setjmp/longjmp. 130 setUseUnderscoreSetJmp(true); 131 setUseUnderscoreLongJmp(true); 132 133 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 134 // arguments are at least 4/8 bytes aligned. 135 bool isPPC64 = Subtarget.isPPC64(); 136 setMinStackArgumentAlignment(isPPC64 ? 8:4); 137 138 // Set up the register classes. 139 addRegisterClass(MVT::i32, &PPC::GPRCRegClass); 140 if (!useSoftFloat()) { 141 if (hasSPE()) { 142 addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass); 143 addRegisterClass(MVT::f64, &PPC::SPERCRegClass); 144 } else { 145 addRegisterClass(MVT::f32, &PPC::F4RCRegClass); 146 addRegisterClass(MVT::f64, &PPC::F8RCRegClass); 147 } 148 } 149 150 // Match BITREVERSE to customized fast code sequence in the td file. 151 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); 152 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); 153 154 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended. 155 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); 156 157 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. 158 for (MVT VT : MVT::integer_valuetypes()) { 159 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 160 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); 161 } 162 163 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 164 165 // PowerPC has pre-inc load and store's. 166 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 167 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 168 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 169 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 170 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 171 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 172 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 173 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 174 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 175 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 176 if (!Subtarget.hasSPE()) { 177 setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); 178 setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal); 179 setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal); 180 setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); 181 } 182 183 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry. 184 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; 185 for (MVT VT : ScalarIntVTs) { 186 setOperationAction(ISD::ADDC, VT, Legal); 187 setOperationAction(ISD::ADDE, VT, Legal); 188 setOperationAction(ISD::SUBC, VT, Legal); 189 setOperationAction(ISD::SUBE, VT, Legal); 190 } 191 192 if (Subtarget.useCRBits()) { 193 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 194 195 if (isPPC64 || Subtarget.hasFPCVT()) { 196 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); 197 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, 198 isPPC64 ? MVT::i64 : MVT::i32); 199 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); 200 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, 201 isPPC64 ? MVT::i64 : MVT::i32); 202 } else { 203 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); 204 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); 205 } 206 207 // PowerPC does not support direct load/store of condition registers. 208 setOperationAction(ISD::LOAD, MVT::i1, Custom); 209 setOperationAction(ISD::STORE, MVT::i1, Custom); 210 211 // FIXME: Remove this once the ANDI glue bug is fixed: 212 if (ANDIGlueBug) 213 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); 214 215 for (MVT VT : MVT::integer_valuetypes()) { 216 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 217 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 218 setTruncStoreAction(VT, MVT::i1, Expand); 219 } 220 221 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); 222 } 223 224 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on 225 // PPC (the libcall is not available). 226 setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom); 227 setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom); 228 229 // We do not currently implement these libm ops for PowerPC. 230 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 231 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 232 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 233 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 234 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 235 setOperationAction(ISD::FREM, MVT::ppcf128, Expand); 236 237 // PowerPC has no SREM/UREM instructions unless we are on P9 238 // On P9 we may use a hardware instruction to compute the remainder. 239 // The instructions are not legalized directly because in the cases where the 240 // result of both the remainder and the division is required it is more 241 // efficient to compute the remainder from the result of the division rather 242 // than use the remainder instruction. 243 if (Subtarget.isISA3_0()) { 244 setOperationAction(ISD::SREM, MVT::i32, Custom); 245 setOperationAction(ISD::UREM, MVT::i32, Custom); 246 setOperationAction(ISD::SREM, MVT::i64, Custom); 247 setOperationAction(ISD::UREM, MVT::i64, Custom); 248 } else { 249 setOperationAction(ISD::SREM, MVT::i32, Expand); 250 setOperationAction(ISD::UREM, MVT::i32, Expand); 251 setOperationAction(ISD::SREM, MVT::i64, Expand); 252 setOperationAction(ISD::UREM, MVT::i64, Expand); 253 } 254 255 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 256 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 257 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 258 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 259 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 260 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 261 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 262 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 263 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 264 265 // We don't support sin/cos/sqrt/fmod/pow 266 setOperationAction(ISD::FSIN , MVT::f64, Expand); 267 setOperationAction(ISD::FCOS , MVT::f64, Expand); 268 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 269 setOperationAction(ISD::FREM , MVT::f64, Expand); 270 setOperationAction(ISD::FPOW , MVT::f64, Expand); 271 setOperationAction(ISD::FSIN , MVT::f32, Expand); 272 setOperationAction(ISD::FCOS , MVT::f32, Expand); 273 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 274 setOperationAction(ISD::FREM , MVT::f32, Expand); 275 setOperationAction(ISD::FPOW , MVT::f32, Expand); 276 if (Subtarget.hasSPE()) { 277 setOperationAction(ISD::FMA , MVT::f64, Expand); 278 setOperationAction(ISD::FMA , MVT::f32, Expand); 279 } else { 280 setOperationAction(ISD::FMA , MVT::f64, Legal); 281 setOperationAction(ISD::FMA , MVT::f32, Legal); 282 } 283 284 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 285 286 // If we're enabling GP optimizations, use hardware square root 287 if (!Subtarget.hasFSQRT() && 288 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && 289 Subtarget.hasFRE())) 290 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 291 292 if (!Subtarget.hasFSQRT() && 293 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() && 294 Subtarget.hasFRES())) 295 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 296 297 if (Subtarget.hasFCPSGN()) { 298 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); 299 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); 300 } else { 301 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 302 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 303 } 304 305 if (Subtarget.hasFPRND()) { 306 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 307 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 308 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 309 setOperationAction(ISD::FROUND, MVT::f64, Legal); 310 311 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 312 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 313 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 314 setOperationAction(ISD::FROUND, MVT::f32, Legal); 315 } 316 317 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd 318 // to speed up scalar BSWAP64. 319 // CTPOP or CTTZ were introduced in P8/P9 respectively 320 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 321 if (Subtarget.hasP9Vector()) 322 setOperationAction(ISD::BSWAP, MVT::i64 , Custom); 323 else 324 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 325 if (Subtarget.isISA3_0()) { 326 setOperationAction(ISD::CTTZ , MVT::i32 , Legal); 327 setOperationAction(ISD::CTTZ , MVT::i64 , Legal); 328 } else { 329 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 330 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 331 } 332 333 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) { 334 setOperationAction(ISD::CTPOP, MVT::i32 , Legal); 335 setOperationAction(ISD::CTPOP, MVT::i64 , Legal); 336 } else { 337 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 338 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 339 } 340 341 // PowerPC does not have ROTR 342 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 343 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 344 345 if (!Subtarget.useCRBits()) { 346 // PowerPC does not have Select 347 setOperationAction(ISD::SELECT, MVT::i32, Expand); 348 setOperationAction(ISD::SELECT, MVT::i64, Expand); 349 setOperationAction(ISD::SELECT, MVT::f32, Expand); 350 setOperationAction(ISD::SELECT, MVT::f64, Expand); 351 } 352 353 // PowerPC wants to turn select_cc of FP into fsel when possible. 354 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 355 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 356 357 // PowerPC wants to optimize integer setcc a bit 358 if (!Subtarget.useCRBits()) 359 setOperationAction(ISD::SETCC, MVT::i32, Custom); 360 361 // PowerPC does not have BRCOND which requires SetCC 362 if (!Subtarget.useCRBits()) 363 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 364 365 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 366 367 if (Subtarget.hasSPE()) { 368 // SPE has built-in conversions 369 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); 370 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); 371 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); 372 } else { 373 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 374 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 375 376 // PowerPC does not have [U|S]INT_TO_FP 377 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 378 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 379 } 380 381 if (Subtarget.hasDirectMove() && isPPC64) { 382 setOperationAction(ISD::BITCAST, MVT::f32, Legal); 383 setOperationAction(ISD::BITCAST, MVT::i32, Legal); 384 setOperationAction(ISD::BITCAST, MVT::i64, Legal); 385 setOperationAction(ISD::BITCAST, MVT::f64, Legal); 386 } else { 387 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 388 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 389 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 390 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 391 } 392 393 // We cannot sextinreg(i1). Expand to shifts. 394 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 395 396 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 397 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 398 // support continuation, user-level threading, and etc.. As a result, no 399 // other SjLj exception interfaces are implemented and please don't build 400 // your own exception handling based on them. 401 // LLVM/Clang supports zero-cost DWARF exception handling. 402 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 403 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 404 405 // We want to legalize GlobalAddress and ConstantPool nodes into the 406 // appropriate instructions to materialize the address. 407 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 408 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 409 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 410 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 411 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 412 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 413 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 414 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 415 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 416 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 417 418 // TRAP is legal. 419 setOperationAction(ISD::TRAP, MVT::Other, Legal); 420 421 // TRAMPOLINE is custom lowered. 422 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 423 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 424 425 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 426 setOperationAction(ISD::VASTART , MVT::Other, Custom); 427 428 if (Subtarget.isSVR4ABI()) { 429 if (isPPC64) { 430 // VAARG always uses double-word chunks, so promote anything smaller. 431 setOperationAction(ISD::VAARG, MVT::i1, Promote); 432 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 433 setOperationAction(ISD::VAARG, MVT::i8, Promote); 434 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 435 setOperationAction(ISD::VAARG, MVT::i16, Promote); 436 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 437 setOperationAction(ISD::VAARG, MVT::i32, Promote); 438 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 439 setOperationAction(ISD::VAARG, MVT::Other, Expand); 440 } else { 441 // VAARG is custom lowered with the 32-bit SVR4 ABI. 442 setOperationAction(ISD::VAARG, MVT::Other, Custom); 443 setOperationAction(ISD::VAARG, MVT::i64, Custom); 444 } 445 } else 446 setOperationAction(ISD::VAARG, MVT::Other, Expand); 447 448 if (Subtarget.isSVR4ABI() && !isPPC64) 449 // VACOPY is custom lowered with the 32-bit SVR4 ABI. 450 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 451 else 452 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 453 454 // Use the default implementation. 455 setOperationAction(ISD::VAEND , MVT::Other, Expand); 456 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 457 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 458 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 459 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 460 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom); 461 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom); 462 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); 463 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); 464 465 // We want to custom lower some of our intrinsics. 466 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 467 468 // To handle counter-based loop conditions. 469 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); 470 471 setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); 472 setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); 473 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); 474 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 475 476 // Comparisons that require checking two conditions. 477 if (Subtarget.hasSPE()) { 478 setCondCodeAction(ISD::SETO, MVT::f32, Expand); 479 setCondCodeAction(ISD::SETO, MVT::f64, Expand); 480 setCondCodeAction(ISD::SETUO, MVT::f32, Expand); 481 setCondCodeAction(ISD::SETUO, MVT::f64, Expand); 482 } 483 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 484 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 485 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 486 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 487 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 488 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 489 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 490 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 491 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 492 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 493 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 494 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 495 496 if (Subtarget.has64BitSupport()) { 497 // They also have instructions for converting between i64 and fp. 498 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 499 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 500 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 501 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 502 // This is just the low 32 bits of a (signed) fp->i64 conversion. 503 // We cannot do this with Promote because i64 is not a legal type. 504 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 505 506 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) 507 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 508 } else { 509 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 510 if (Subtarget.hasSPE()) 511 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); 512 else 513 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 514 } 515 516 // With the instructions enabled under FPCVT, we can do everything. 517 if (Subtarget.hasFPCVT()) { 518 if (Subtarget.has64BitSupport()) { 519 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 520 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 521 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 522 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 523 } 524 525 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 526 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 527 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 528 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 529 } 530 531 if (Subtarget.use64BitRegs()) { 532 // 64-bit PowerPC implementations can support i64 types directly 533 addRegisterClass(MVT::i64, &PPC::G8RCRegClass); 534 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 535 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 536 // 64-bit PowerPC wants to expand i128 shifts itself. 537 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 538 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 539 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 540 } else { 541 // 32-bit PowerPC wants to expand i64 shifts itself. 542 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 543 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 544 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 545 } 546 547 if (Subtarget.hasAltivec()) { 548 // First set operation action for all vector types to expand. Then we 549 // will selectively turn on ones that can be effectively codegen'd. 550 for (MVT VT : MVT::vector_valuetypes()) { 551 // add/sub are legal for all supported vector VT's. 552 setOperationAction(ISD::ADD, VT, Legal); 553 setOperationAction(ISD::SUB, VT, Legal); 554 555 // Vector instructions introduced in P8 556 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { 557 setOperationAction(ISD::CTPOP, VT, Legal); 558 setOperationAction(ISD::CTLZ, VT, Legal); 559 } 560 else { 561 setOperationAction(ISD::CTPOP, VT, Expand); 562 setOperationAction(ISD::CTLZ, VT, Expand); 563 } 564 565 // Vector instructions introduced in P9 566 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128)) 567 setOperationAction(ISD::CTTZ, VT, Legal); 568 else 569 setOperationAction(ISD::CTTZ, VT, Expand); 570 571 // We promote all shuffles to v16i8. 572 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 573 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 574 575 // We promote all non-typed operations to v4i32. 576 setOperationAction(ISD::AND , VT, Promote); 577 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 578 setOperationAction(ISD::OR , VT, Promote); 579 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 580 setOperationAction(ISD::XOR , VT, Promote); 581 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 582 setOperationAction(ISD::LOAD , VT, Promote); 583 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 584 setOperationAction(ISD::SELECT, VT, Promote); 585 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 586 setOperationAction(ISD::VSELECT, VT, Legal); 587 setOperationAction(ISD::SELECT_CC, VT, Promote); 588 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); 589 setOperationAction(ISD::STORE, VT, Promote); 590 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 591 592 // No other operations are legal. 593 setOperationAction(ISD::MUL , VT, Expand); 594 setOperationAction(ISD::SDIV, VT, Expand); 595 setOperationAction(ISD::SREM, VT, Expand); 596 setOperationAction(ISD::UDIV, VT, Expand); 597 setOperationAction(ISD::UREM, VT, Expand); 598 setOperationAction(ISD::FDIV, VT, Expand); 599 setOperationAction(ISD::FREM, VT, Expand); 600 setOperationAction(ISD::FNEG, VT, Expand); 601 setOperationAction(ISD::FSQRT, VT, Expand); 602 setOperationAction(ISD::FLOG, VT, Expand); 603 setOperationAction(ISD::FLOG10, VT, Expand); 604 setOperationAction(ISD::FLOG2, VT, Expand); 605 setOperationAction(ISD::FEXP, VT, Expand); 606 setOperationAction(ISD::FEXP2, VT, Expand); 607 setOperationAction(ISD::FSIN, VT, Expand); 608 setOperationAction(ISD::FCOS, VT, Expand); 609 setOperationAction(ISD::FABS, VT, Expand); 610 setOperationAction(ISD::FFLOOR, VT, Expand); 611 setOperationAction(ISD::FCEIL, VT, Expand); 612 setOperationAction(ISD::FTRUNC, VT, Expand); 613 setOperationAction(ISD::FRINT, VT, Expand); 614 setOperationAction(ISD::FNEARBYINT, VT, Expand); 615 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 616 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 617 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 618 setOperationAction(ISD::MULHU, VT, Expand); 619 setOperationAction(ISD::MULHS, VT, Expand); 620 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 621 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 622 setOperationAction(ISD::UDIVREM, VT, Expand); 623 setOperationAction(ISD::SDIVREM, VT, Expand); 624 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 625 setOperationAction(ISD::FPOW, VT, Expand); 626 setOperationAction(ISD::BSWAP, VT, Expand); 627 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 628 setOperationAction(ISD::ROTL, VT, Expand); 629 setOperationAction(ISD::ROTR, VT, Expand); 630 631 for (MVT InnerVT : MVT::vector_valuetypes()) { 632 setTruncStoreAction(VT, InnerVT, Expand); 633 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 634 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 635 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 636 } 637 } 638 639 for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}) 640 setOperationAction(ISD::ABS, VT, Custom); 641 642 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 643 // with merges, splats, etc. 644 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 645 646 // Vector truncates to sub-word integer that fit in an Altivec/VSX register 647 // are cheap, so handle them before they get expanded to scalar. 648 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); 649 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); 650 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); 651 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); 652 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); 653 654 setOperationAction(ISD::AND , MVT::v4i32, Legal); 655 setOperationAction(ISD::OR , MVT::v4i32, Legal); 656 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 657 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 658 setOperationAction(ISD::SELECT, MVT::v4i32, 659 Subtarget.useCRBits() ? Legal : Expand); 660 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 661 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 662 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 663 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 664 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 665 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 666 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 667 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 668 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 669 670 // Without hasP8Altivec set, v2i64 SMAX isn't available. 671 // But ABS custom lowering requires SMAX support. 672 if (!Subtarget.hasP8Altivec()) 673 setOperationAction(ISD::ABS, MVT::v2i64, Expand); 674 675 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); 676 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); 677 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); 678 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); 679 680 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 681 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 682 683 if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { 684 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 685 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 686 } 687 688 if (Subtarget.hasP8Altivec()) 689 setOperationAction(ISD::MUL, MVT::v4i32, Legal); 690 else 691 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 692 693 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 694 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 695 696 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 697 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 698 699 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 700 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 701 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 702 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 703 704 // Altivec does not contain unordered floating-point compare instructions 705 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); 706 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); 707 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); 708 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); 709 710 if (Subtarget.hasVSX()) { 711 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 712 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 713 if (Subtarget.hasP8Vector()) { 714 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 715 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); 716 } 717 if (Subtarget.hasDirectMove() && isPPC64) { 718 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); 719 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); 720 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); 721 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); 722 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal); 723 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal); 724 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); 725 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); 726 } 727 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 728 729 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 730 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 731 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 732 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 733 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 734 735 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 736 737 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 738 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 739 740 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 741 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 742 743 // Share the Altivec comparison restrictions. 744 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); 745 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); 746 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); 747 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); 748 749 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 750 setOperationAction(ISD::STORE, MVT::v2f64, Legal); 751 752 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); 753 754 if (Subtarget.hasP8Vector()) 755 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass); 756 757 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); 758 759 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass); 760 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); 761 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); 762 763 if (Subtarget.hasP8Altivec()) { 764 setOperationAction(ISD::SHL, MVT::v2i64, Legal); 765 setOperationAction(ISD::SRA, MVT::v2i64, Legal); 766 setOperationAction(ISD::SRL, MVT::v2i64, Legal); 767 768 // 128 bit shifts can be accomplished via 3 instructions for SHL and 769 // SRL, but not for SRA because of the instructions available: 770 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth 771 // doing 772 setOperationAction(ISD::SHL, MVT::v1i128, Expand); 773 setOperationAction(ISD::SRL, MVT::v1i128, Expand); 774 setOperationAction(ISD::SRA, MVT::v1i128, Expand); 775 776 setOperationAction(ISD::SETCC, MVT::v2i64, Legal); 777 } 778 else { 779 setOperationAction(ISD::SHL, MVT::v2i64, Expand); 780 setOperationAction(ISD::SRA, MVT::v2i64, Expand); 781 setOperationAction(ISD::SRL, MVT::v2i64, Expand); 782 783 setOperationAction(ISD::SETCC, MVT::v2i64, Custom); 784 785 // VSX v2i64 only supports non-arithmetic operations. 786 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 787 setOperationAction(ISD::SUB, MVT::v2i64, Expand); 788 } 789 790 setOperationAction(ISD::LOAD, MVT::v2i64, Promote); 791 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); 792 setOperationAction(ISD::STORE, MVT::v2i64, Promote); 793 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); 794 795 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); 796 797 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 798 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 799 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 800 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 801 802 // Custom handling for partial vectors of integers converted to 803 // floating point. We already have optimal handling for v2i32 through 804 // the DAG combine, so those aren't necessary. 805 setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom); 806 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom); 807 setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom); 808 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 809 setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom); 810 setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom); 811 setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom); 812 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); 813 814 setOperationAction(ISD::FNEG, MVT::v4f32, Legal); 815 setOperationAction(ISD::FNEG, MVT::v2f64, Legal); 816 setOperationAction(ISD::FABS, MVT::v4f32, Legal); 817 setOperationAction(ISD::FABS, MVT::v2f64, Legal); 818 819 if (Subtarget.hasDirectMove()) 820 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 821 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 822 823 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); 824 } 825 826 if (Subtarget.hasP8Altivec()) { 827 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); 828 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); 829 } 830 831 if (Subtarget.hasP9Vector()) { 832 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 833 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 834 835 // 128 bit shifts can be accomplished via 3 instructions for SHL and 836 // SRL, but not for SRA because of the instructions available: 837 // VS{RL} and VS{RL}O. 838 setOperationAction(ISD::SHL, MVT::v1i128, Legal); 839 setOperationAction(ISD::SRL, MVT::v1i128, Legal); 840 setOperationAction(ISD::SRA, MVT::v1i128, Expand); 841 842 if (EnableQuadPrecision) { 843 addRegisterClass(MVT::f128, &PPC::VRRCRegClass); 844 setOperationAction(ISD::FADD, MVT::f128, Legal); 845 setOperationAction(ISD::FSUB, MVT::f128, Legal); 846 setOperationAction(ISD::FDIV, MVT::f128, Legal); 847 setOperationAction(ISD::FMUL, MVT::f128, Legal); 848 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal); 849 // No extending loads to f128 on PPC. 850 for (MVT FPT : MVT::fp_valuetypes()) 851 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand); 852 setOperationAction(ISD::FMA, MVT::f128, Legal); 853 setCondCodeAction(ISD::SETULT, MVT::f128, Expand); 854 setCondCodeAction(ISD::SETUGT, MVT::f128, Expand); 855 setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand); 856 setCondCodeAction(ISD::SETOGE, MVT::f128, Expand); 857 setCondCodeAction(ISD::SETOLE, MVT::f128, Expand); 858 setCondCodeAction(ISD::SETONE, MVT::f128, Expand); 859 860 setOperationAction(ISD::FTRUNC, MVT::f128, Legal); 861 setOperationAction(ISD::FRINT, MVT::f128, Legal); 862 setOperationAction(ISD::FFLOOR, MVT::f128, Legal); 863 setOperationAction(ISD::FCEIL, MVT::f128, Legal); 864 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal); 865 setOperationAction(ISD::FROUND, MVT::f128, Legal); 866 867 setOperationAction(ISD::SELECT, MVT::f128, Expand); 868 setOperationAction(ISD::FP_ROUND, MVT::f64, Legal); 869 setOperationAction(ISD::FP_ROUND, MVT::f32, Legal); 870 setTruncStoreAction(MVT::f128, MVT::f64, Expand); 871 setTruncStoreAction(MVT::f128, MVT::f32, Expand); 872 setOperationAction(ISD::BITCAST, MVT::i128, Custom); 873 // No implementation for these ops for PowerPC. 874 setOperationAction(ISD::FSIN , MVT::f128, Expand); 875 setOperationAction(ISD::FCOS , MVT::f128, Expand); 876 setOperationAction(ISD::FPOW, MVT::f128, Expand); 877 setOperationAction(ISD::FPOWI, MVT::f128, Expand); 878 setOperationAction(ISD::FREM, MVT::f128, Expand); 879 } 880 881 } 882 883 if (Subtarget.hasP9Altivec()) { 884 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 885 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); 886 } 887 } 888 889 if (Subtarget.hasQPX()) { 890 setOperationAction(ISD::FADD, MVT::v4f64, Legal); 891 setOperationAction(ISD::FSUB, MVT::v4f64, Legal); 892 setOperationAction(ISD::FMUL, MVT::v4f64, Legal); 893 setOperationAction(ISD::FREM, MVT::v4f64, Expand); 894 895 setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); 896 setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand); 897 898 setOperationAction(ISD::LOAD , MVT::v4f64, Custom); 899 setOperationAction(ISD::STORE , MVT::v4f64, Custom); 900 901 setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom); 902 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom); 903 904 if (!Subtarget.useCRBits()) 905 setOperationAction(ISD::SELECT, MVT::v4f64, Expand); 906 setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); 907 908 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal); 909 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand); 910 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand); 911 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand); 912 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom); 913 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal); 914 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); 915 916 setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal); 917 setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); 918 919 setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); 920 setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); 921 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); 922 923 setOperationAction(ISD::FNEG , MVT::v4f64, Legal); 924 setOperationAction(ISD::FABS , MVT::v4f64, Legal); 925 setOperationAction(ISD::FSIN , MVT::v4f64, Expand); 926 setOperationAction(ISD::FCOS , MVT::v4f64, Expand); 927 setOperationAction(ISD::FPOW , MVT::v4f64, Expand); 928 setOperationAction(ISD::FLOG , MVT::v4f64, Expand); 929 setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); 930 setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); 931 setOperationAction(ISD::FEXP , MVT::v4f64, Expand); 932 setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand); 933 934 setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal); 935 setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal); 936 937 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal); 938 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal); 939 940 addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass); 941 942 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 943 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 944 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 945 setOperationAction(ISD::FREM, MVT::v4f32, Expand); 946 947 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); 948 setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand); 949 950 setOperationAction(ISD::LOAD , MVT::v4f32, Custom); 951 setOperationAction(ISD::STORE , MVT::v4f32, Custom); 952 953 if (!Subtarget.useCRBits()) 954 setOperationAction(ISD::SELECT, MVT::v4f32, Expand); 955 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 956 957 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal); 958 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand); 959 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand); 960 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand); 961 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom); 962 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 963 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 964 965 setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal); 966 setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand); 967 968 setOperationAction(ISD::FNEG , MVT::v4f32, Legal); 969 setOperationAction(ISD::FABS , MVT::v4f32, Legal); 970 setOperationAction(ISD::FSIN , MVT::v4f32, Expand); 971 setOperationAction(ISD::FCOS , MVT::v4f32, Expand); 972 setOperationAction(ISD::FPOW , MVT::v4f32, Expand); 973 setOperationAction(ISD::FLOG , MVT::v4f32, Expand); 974 setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); 975 setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); 976 setOperationAction(ISD::FEXP , MVT::v4f32, Expand); 977 setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand); 978 979 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); 980 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); 981 982 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal); 983 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal); 984 985 addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass); 986 987 setOperationAction(ISD::AND , MVT::v4i1, Legal); 988 setOperationAction(ISD::OR , MVT::v4i1, Legal); 989 setOperationAction(ISD::XOR , MVT::v4i1, Legal); 990 991 if (!Subtarget.useCRBits()) 992 setOperationAction(ISD::SELECT, MVT::v4i1, Expand); 993 setOperationAction(ISD::VSELECT, MVT::v4i1, Legal); 994 995 setOperationAction(ISD::LOAD , MVT::v4i1, Custom); 996 setOperationAction(ISD::STORE , MVT::v4i1, Custom); 997 998 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom); 999 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand); 1000 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand); 1001 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand); 1002 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom); 1003 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand); 1004 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); 1005 1006 setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom); 1007 setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom); 1008 1009 addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass); 1010 1011 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); 1012 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); 1013 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); 1014 setOperationAction(ISD::FROUND, MVT::v4f64, Legal); 1015 1016 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 1017 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 1018 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 1019 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 1020 1021 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand); 1022 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); 1023 1024 // These need to set FE_INEXACT, and so cannot be vectorized here. 1025 setOperationAction(ISD::FRINT, MVT::v4f64, Expand); 1026 setOperationAction(ISD::FRINT, MVT::v4f32, Expand); 1027 1028 if (TM.Options.UnsafeFPMath) { 1029 setOperationAction(ISD::FDIV, MVT::v4f64, Legal); 1030 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); 1031 1032 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 1033 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 1034 } else { 1035 setOperationAction(ISD::FDIV, MVT::v4f64, Expand); 1036 setOperationAction(ISD::FSQRT, MVT::v4f64, Expand); 1037 1038 setOperationAction(ISD::FDIV, MVT::v4f32, Expand); 1039 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 1040 } 1041 } 1042 1043 if (Subtarget.has64BitSupport()) 1044 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 1045 1046 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); 1047 1048 if (!isPPC64) { 1049 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); 1050 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); 1051 } 1052 1053 setBooleanContents(ZeroOrOneBooleanContent); 1054 1055 if (Subtarget.hasAltivec()) { 1056 // Altivec instructions set fields to all zeros or all ones. 1057 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 1058 } 1059 1060 if (!isPPC64) { 1061 // These libcalls are not available in 32-bit. 1062 setLibcallName(RTLIB::SHL_I128, nullptr); 1063 setLibcallName(RTLIB::SRL_I128, nullptr); 1064 setLibcallName(RTLIB::SRA_I128, nullptr); 1065 } 1066 1067 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); 1068 1069 // We have target-specific dag combine patterns for the following nodes: 1070 setTargetDAGCombine(ISD::ADD); 1071 setTargetDAGCombine(ISD::SHL); 1072 setTargetDAGCombine(ISD::SRA); 1073 setTargetDAGCombine(ISD::SRL); 1074 setTargetDAGCombine(ISD::SINT_TO_FP); 1075 setTargetDAGCombine(ISD::BUILD_VECTOR); 1076 if (Subtarget.hasFPCVT()) 1077 setTargetDAGCombine(ISD::UINT_TO_FP); 1078 setTargetDAGCombine(ISD::LOAD); 1079 setTargetDAGCombine(ISD::STORE); 1080 setTargetDAGCombine(ISD::BR_CC); 1081 if (Subtarget.useCRBits()) 1082 setTargetDAGCombine(ISD::BRCOND); 1083 setTargetDAGCombine(ISD::BSWAP); 1084 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 1085 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 1086 setTargetDAGCombine(ISD::INTRINSIC_VOID); 1087 1088 setTargetDAGCombine(ISD::SIGN_EXTEND); 1089 setTargetDAGCombine(ISD::ZERO_EXTEND); 1090 setTargetDAGCombine(ISD::ANY_EXTEND); 1091 1092 setTargetDAGCombine(ISD::TRUNCATE); 1093 1094 if (Subtarget.useCRBits()) { 1095 setTargetDAGCombine(ISD::TRUNCATE); 1096 setTargetDAGCombine(ISD::SETCC); 1097 setTargetDAGCombine(ISD::SELECT_CC); 1098 } 1099 1100 // Use reciprocal estimates. 1101 if (TM.Options.UnsafeFPMath) { 1102 setTargetDAGCombine(ISD::FDIV); 1103 setTargetDAGCombine(ISD::FSQRT); 1104 } 1105 1106 if (Subtarget.hasP9Altivec()) { 1107 setTargetDAGCombine(ISD::ABS); 1108 setTargetDAGCombine(ISD::VSELECT); 1109 } 1110 1111 // Darwin long double math library functions have $LDBL128 appended. 1112 if (Subtarget.isDarwin()) { 1113 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 1114 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 1115 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 1116 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 1117 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 1118 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 1119 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 1120 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 1121 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 1122 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 1123 } 1124 1125 if (EnableQuadPrecision) { 1126 setLibcallName(RTLIB::LOG_F128, "logf128"); 1127 setLibcallName(RTLIB::LOG2_F128, "log2f128"); 1128 setLibcallName(RTLIB::LOG10_F128, "log10f128"); 1129 setLibcallName(RTLIB::EXP_F128, "expf128"); 1130 setLibcallName(RTLIB::EXP2_F128, "exp2f128"); 1131 setLibcallName(RTLIB::SIN_F128, "sinf128"); 1132 setLibcallName(RTLIB::COS_F128, "cosf128"); 1133 setLibcallName(RTLIB::POW_F128, "powf128"); 1134 setLibcallName(RTLIB::FMIN_F128, "fminf128"); 1135 setLibcallName(RTLIB::FMAX_F128, "fmaxf128"); 1136 setLibcallName(RTLIB::POWI_F128, "__powikf2"); 1137 setLibcallName(RTLIB::REM_F128, "fmodf128"); 1138 } 1139 1140 // With 32 condition bits, we don't need to sink (and duplicate) compares 1141 // aggressively in CodeGenPrep. 1142 if (Subtarget.useCRBits()) { 1143 setHasMultipleConditionRegisters(); 1144 setJumpIsExpensive(); 1145 } 1146 1147 setMinFunctionAlignment(2); 1148 if (Subtarget.isDarwin()) 1149 setPrefFunctionAlignment(4); 1150 1151 switch (Subtarget.getDarwinDirective()) { 1152 default: break; 1153 case PPC::DIR_970: 1154 case PPC::DIR_A2: 1155 case PPC::DIR_E500: 1156 case PPC::DIR_E500mc: 1157 case PPC::DIR_E5500: 1158 case PPC::DIR_PWR4: 1159 case PPC::DIR_PWR5: 1160 case PPC::DIR_PWR5X: 1161 case PPC::DIR_PWR6: 1162 case PPC::DIR_PWR6X: 1163 case PPC::DIR_PWR7: 1164 case PPC::DIR_PWR8: 1165 case PPC::DIR_PWR9: 1166 setPrefFunctionAlignment(4); 1167 setPrefLoopAlignment(4); 1168 break; 1169 } 1170 1171 if (Subtarget.enableMachineScheduler()) 1172 setSchedulingPreference(Sched::Source); 1173 else 1174 setSchedulingPreference(Sched::Hybrid); 1175 1176 computeRegisterProperties(STI.getRegisterInfo()); 1177 1178 // The Freescale cores do better with aggressive inlining of memcpy and 1179 // friends. GCC uses same threshold of 128 bytes (= 32 word stores). 1180 if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || 1181 Subtarget.getDarwinDirective() == PPC::DIR_E5500) { 1182 MaxStoresPerMemset = 32; 1183 MaxStoresPerMemsetOptSize = 16; 1184 MaxStoresPerMemcpy = 32; 1185 MaxStoresPerMemcpyOptSize = 8; 1186 MaxStoresPerMemmove = 32; 1187 MaxStoresPerMemmoveOptSize = 8; 1188 } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) { 1189 // The A2 also benefits from (very) aggressive inlining of memcpy and 1190 // friends. The overhead of a the function call, even when warm, can be 1191 // over one hundred cycles. 1192 MaxStoresPerMemset = 128; 1193 MaxStoresPerMemcpy = 128; 1194 MaxStoresPerMemmove = 128; 1195 MaxLoadsPerMemcmp = 128; 1196 } else { 1197 MaxLoadsPerMemcmp = 8; 1198 MaxLoadsPerMemcmpOptSize = 4; 1199 } 1200 } 1201 1202 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine 1203 /// the desired ByVal argument alignment. 1204 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, 1205 unsigned MaxMaxAlign) { 1206 if (MaxAlign == MaxMaxAlign) 1207 return; 1208 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 1209 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) 1210 MaxAlign = 32; 1211 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) 1212 MaxAlign = 16; 1213 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 1214 unsigned EltAlign = 0; 1215 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); 1216 if (EltAlign > MaxAlign) 1217 MaxAlign = EltAlign; 1218 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 1219 for (auto *EltTy : STy->elements()) { 1220 unsigned EltAlign = 0; 1221 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign); 1222 if (EltAlign > MaxAlign) 1223 MaxAlign = EltAlign; 1224 if (MaxAlign == MaxMaxAlign) 1225 break; 1226 } 1227 } 1228 } 1229 1230 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 1231 /// function arguments in the caller parameter area. 1232 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, 1233 const DataLayout &DL) const { 1234 // Darwin passes everything on 4 byte boundary. 1235 if (Subtarget.isDarwin()) 1236 return 4; 1237 1238 // 16byte and wider vectors are passed on 16byte boundary. 1239 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 1240 unsigned Align = Subtarget.isPPC64() ? 8 : 4; 1241 if (Subtarget.hasAltivec() || Subtarget.hasQPX()) 1242 getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); 1243 return Align; 1244 } 1245 1246 unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, 1247 CallingConv:: ID CC, 1248 EVT VT) const { 1249 if (Subtarget.hasSPE() && VT == MVT::f64) 1250 return 2; 1251 return PPCTargetLowering::getNumRegisters(Context, VT); 1252 } 1253 1254 MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, 1255 CallingConv:: ID CC, 1256 EVT VT) const { 1257 if (Subtarget.hasSPE() && VT == MVT::f64) 1258 return MVT::i32; 1259 return PPCTargetLowering::getRegisterType(Context, VT); 1260 } 1261 1262 bool PPCTargetLowering::useSoftFloat() const { 1263 return Subtarget.useSoftFloat(); 1264 } 1265 1266 bool PPCTargetLowering::hasSPE() const { 1267 return Subtarget.hasSPE(); 1268 } 1269 1270 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 1271 switch ((PPCISD::NodeType)Opcode) { 1272 case PPCISD::FIRST_NUMBER: break; 1273 case PPCISD::FSEL: return "PPCISD::FSEL"; 1274 case PPCISD::FCFID: return "PPCISD::FCFID"; 1275 case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; 1276 case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; 1277 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; 1278 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 1279 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 1280 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; 1281 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; 1282 case PPCISD::FP_TO_UINT_IN_VSR: 1283 return "PPCISD::FP_TO_UINT_IN_VSR,"; 1284 case PPCISD::FP_TO_SINT_IN_VSR: 1285 return "PPCISD::FP_TO_SINT_IN_VSR"; 1286 case PPCISD::FRE: return "PPCISD::FRE"; 1287 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; 1288 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 1289 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 1290 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 1291 case PPCISD::VPERM: return "PPCISD::VPERM"; 1292 case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; 1293 case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; 1294 case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE"; 1295 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; 1296 case PPCISD::VECSHL: return "PPCISD::VECSHL"; 1297 case PPCISD::CMPB: return "PPCISD::CMPB"; 1298 case PPCISD::Hi: return "PPCISD::Hi"; 1299 case PPCISD::Lo: return "PPCISD::Lo"; 1300 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 1301 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8"; 1302 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16"; 1303 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 1304 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; 1305 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 1306 case PPCISD::SRL: return "PPCISD::SRL"; 1307 case PPCISD::SRA: return "PPCISD::SRA"; 1308 case PPCISD::SHL: return "PPCISD::SHL"; 1309 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; 1310 case PPCISD::CALL: return "PPCISD::CALL"; 1311 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; 1312 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 1313 case PPCISD::BCTRL: return "PPCISD::BCTRL"; 1314 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; 1315 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 1316 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; 1317 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; 1318 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; 1319 case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; 1320 case PPCISD::MFVSR: return "PPCISD::MFVSR"; 1321 case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; 1322 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; 1323 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; 1324 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; 1325 case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT"; 1326 case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT"; 1327 case PPCISD::VCMP: return "PPCISD::VCMP"; 1328 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 1329 case PPCISD::LBRX: return "PPCISD::LBRX"; 1330 case PPCISD::STBRX: return "PPCISD::STBRX"; 1331 case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; 1332 case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; 1333 case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; 1334 case PPCISD::STXSIX: return "PPCISD::STXSIX"; 1335 case PPCISD::VEXTS: return "PPCISD::VEXTS"; 1336 case PPCISD::SExtVElems: return "PPCISD::SExtVElems"; 1337 case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; 1338 case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; 1339 case PPCISD::ST_VSR_SCAL_INT: 1340 return "PPCISD::ST_VSR_SCAL_INT"; 1341 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 1342 case PPCISD::BDNZ: return "PPCISD::BDNZ"; 1343 case PPCISD::BDZ: return "PPCISD::BDZ"; 1344 case PPCISD::MFFS: return "PPCISD::MFFS"; 1345 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 1346 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 1347 case PPCISD::CR6SET: return "PPCISD::CR6SET"; 1348 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; 1349 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; 1350 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; 1351 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; 1352 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; 1353 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; 1354 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; 1355 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; 1356 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; 1357 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; 1358 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; 1359 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; 1360 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; 1361 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; 1362 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; 1363 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; 1364 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; 1365 case PPCISD::SC: return "PPCISD::SC"; 1366 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; 1367 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; 1368 case PPCISD::RFEBB: return "PPCISD::RFEBB"; 1369 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; 1370 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; 1371 case PPCISD::VABSD: return "PPCISD::VABSD"; 1372 case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; 1373 case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; 1374 case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; 1375 case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; 1376 case PPCISD::QBFLT: return "PPCISD::QBFLT"; 1377 case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; 1378 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; 1379 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; 1380 } 1381 return nullptr; 1382 } 1383 1384 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, 1385 EVT VT) const { 1386 if (!VT.isVector()) 1387 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; 1388 1389 if (Subtarget.hasQPX()) 1390 return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); 1391 1392 return VT.changeVectorElementTypeToInteger(); 1393 } 1394 1395 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { 1396 assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); 1397 return true; 1398 } 1399 1400 //===----------------------------------------------------------------------===// 1401 // Node matching predicates, for use by the tblgen matching code. 1402 //===----------------------------------------------------------------------===// 1403 1404 /// isFloatingPointZero - Return true if this is 0.0 or -0.0. 1405 static bool isFloatingPointZero(SDValue Op) { 1406 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 1407 return CFP->getValueAPF().isZero(); 1408 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 1409 // Maybe this has already been legalized into the constant pool? 1410 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 1411 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 1412 return CFP->getValueAPF().isZero(); 1413 } 1414 return false; 1415 } 1416 1417 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 1418 /// true if Op is undef or if it matches the specified value. 1419 static bool isConstantOrUndef(int Op, int Val) { 1420 return Op < 0 || Op == Val; 1421 } 1422 1423 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 1424 /// VPKUHUM instruction. 1425 /// The ShuffleKind distinguishes between big-endian operations with 1426 /// two different inputs (0), either-endian operations with two identical 1427 /// inputs (1), and little-endian operations with two different inputs (2). 1428 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1429 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1430 SelectionDAG &DAG) { 1431 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1432 if (ShuffleKind == 0) { 1433 if (IsLE) 1434 return false; 1435 for (unsigned i = 0; i != 16; ++i) 1436 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 1437 return false; 1438 } else if (ShuffleKind == 2) { 1439 if (!IsLE) 1440 return false; 1441 for (unsigned i = 0; i != 16; ++i) 1442 if (!isConstantOrUndef(N->getMaskElt(i), i*2)) 1443 return false; 1444 } else if (ShuffleKind == 1) { 1445 unsigned j = IsLE ? 0 : 1; 1446 for (unsigned i = 0; i != 8; ++i) 1447 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || 1448 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) 1449 return false; 1450 } 1451 return true; 1452 } 1453 1454 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 1455 /// VPKUWUM instruction. 1456 /// The ShuffleKind distinguishes between big-endian operations with 1457 /// two different inputs (0), either-endian operations with two identical 1458 /// inputs (1), and little-endian operations with two different inputs (2). 1459 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1460 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1461 SelectionDAG &DAG) { 1462 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1463 if (ShuffleKind == 0) { 1464 if (IsLE) 1465 return false; 1466 for (unsigned i = 0; i != 16; i += 2) 1467 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 1468 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 1469 return false; 1470 } else if (ShuffleKind == 2) { 1471 if (!IsLE) 1472 return false; 1473 for (unsigned i = 0; i != 16; i += 2) 1474 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 1475 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) 1476 return false; 1477 } else if (ShuffleKind == 1) { 1478 unsigned j = IsLE ? 0 : 2; 1479 for (unsigned i = 0; i != 8; i += 2) 1480 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 1481 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 1482 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 1483 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) 1484 return false; 1485 } 1486 return true; 1487 } 1488 1489 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a 1490 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the 1491 /// current subtarget. 1492 /// 1493 /// The ShuffleKind distinguishes between big-endian operations with 1494 /// two different inputs (0), either-endian operations with two identical 1495 /// inputs (1), and little-endian operations with two different inputs (2). 1496 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1497 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1498 SelectionDAG &DAG) { 1499 const PPCSubtarget& Subtarget = 1500 static_cast<const PPCSubtarget&>(DAG.getSubtarget()); 1501 if (!Subtarget.hasP8Vector()) 1502 return false; 1503 1504 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1505 if (ShuffleKind == 0) { 1506 if (IsLE) 1507 return false; 1508 for (unsigned i = 0; i != 16; i += 4) 1509 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) || 1510 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) || 1511 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) || 1512 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7)) 1513 return false; 1514 } else if (ShuffleKind == 2) { 1515 if (!IsLE) 1516 return false; 1517 for (unsigned i = 0; i != 16; i += 4) 1518 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 1519 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) || 1520 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) || 1521 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3)) 1522 return false; 1523 } else if (ShuffleKind == 1) { 1524 unsigned j = IsLE ? 0 : 4; 1525 for (unsigned i = 0; i != 8; i += 4) 1526 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 1527 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 1528 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) || 1529 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) || 1530 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 1531 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) || 1532 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) || 1533 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)) 1534 return false; 1535 } 1536 return true; 1537 } 1538 1539 /// isVMerge - Common function, used to match vmrg* shuffles. 1540 /// 1541 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 1542 unsigned LHSStart, unsigned RHSStart) { 1543 if (N->getValueType(0) != MVT::v16i8) 1544 return false; 1545 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 1546 "Unsupported merge size!"); 1547 1548 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 1549 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 1550 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 1551 LHSStart+j+i*UnitSize) || 1552 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 1553 RHSStart+j+i*UnitSize)) 1554 return false; 1555 } 1556 return true; 1557 } 1558 1559 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 1560 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). 1561 /// The ShuffleKind distinguishes between big-endian merges with two 1562 /// different inputs (0), either-endian merges with two identical inputs (1), 1563 /// and little-endian merges with two different inputs (2). For the latter, 1564 /// the input operands are swapped (see PPCInstrAltivec.td). 1565 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 1566 unsigned ShuffleKind, SelectionDAG &DAG) { 1567 if (DAG.getDataLayout().isLittleEndian()) { 1568 if (ShuffleKind == 1) // unary 1569 return isVMerge(N, UnitSize, 0, 0); 1570 else if (ShuffleKind == 2) // swapped 1571 return isVMerge(N, UnitSize, 0, 16); 1572 else 1573 return false; 1574 } else { 1575 if (ShuffleKind == 1) // unary 1576 return isVMerge(N, UnitSize, 8, 8); 1577 else if (ShuffleKind == 0) // normal 1578 return isVMerge(N, UnitSize, 8, 24); 1579 else 1580 return false; 1581 } 1582 } 1583 1584 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 1585 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). 1586 /// The ShuffleKind distinguishes between big-endian merges with two 1587 /// different inputs (0), either-endian merges with two identical inputs (1), 1588 /// and little-endian merges with two different inputs (2). For the latter, 1589 /// the input operands are swapped (see PPCInstrAltivec.td). 1590 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 1591 unsigned ShuffleKind, SelectionDAG &DAG) { 1592 if (DAG.getDataLayout().isLittleEndian()) { 1593 if (ShuffleKind == 1) // unary 1594 return isVMerge(N, UnitSize, 8, 8); 1595 else if (ShuffleKind == 2) // swapped 1596 return isVMerge(N, UnitSize, 8, 24); 1597 else 1598 return false; 1599 } else { 1600 if (ShuffleKind == 1) // unary 1601 return isVMerge(N, UnitSize, 0, 0); 1602 else if (ShuffleKind == 0) // normal 1603 return isVMerge(N, UnitSize, 0, 16); 1604 else 1605 return false; 1606 } 1607 } 1608 1609 /** 1610 * Common function used to match vmrgew and vmrgow shuffles 1611 * 1612 * The indexOffset determines whether to look for even or odd words in 1613 * the shuffle mask. This is based on the of the endianness of the target 1614 * machine. 1615 * - Little Endian: 1616 * - Use offset of 0 to check for odd elements 1617 * - Use offset of 4 to check for even elements 1618 * - Big Endian: 1619 * - Use offset of 0 to check for even elements 1620 * - Use offset of 4 to check for odd elements 1621 * A detailed description of the vector element ordering for little endian and 1622 * big endian can be found at 1623 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html 1624 * Targeting your applications - what little endian and big endian IBM XL C/C++ 1625 * compiler differences mean to you 1626 * 1627 * The mask to the shuffle vector instruction specifies the indices of the 1628 * elements from the two input vectors to place in the result. The elements are 1629 * numbered in array-access order, starting with the first vector. These vectors 1630 * are always of type v16i8, thus each vector will contain 16 elements of size 1631 * 8. More info on the shuffle vector can be found in the 1632 * http://llvm.org/docs/LangRef.html#shufflevector-instruction 1633 * Language Reference. 1634 * 1635 * The RHSStartValue indicates whether the same input vectors are used (unary) 1636 * or two different input vectors are used, based on the following: 1637 * - If the instruction uses the same vector for both inputs, the range of the 1638 * indices will be 0 to 15. In this case, the RHSStart value passed should 1639 * be 0. 1640 * - If the instruction has two different vectors then the range of the 1641 * indices will be 0 to 31. In this case, the RHSStart value passed should 1642 * be 16 (indices 0-15 specify elements in the first vector while indices 16 1643 * to 31 specify elements in the second vector). 1644 * 1645 * \param[in] N The shuffle vector SD Node to analyze 1646 * \param[in] IndexOffset Specifies whether to look for even or odd elements 1647 * \param[in] RHSStartValue Specifies the starting index for the righthand input 1648 * vector to the shuffle_vector instruction 1649 * \return true iff this shuffle vector represents an even or odd word merge 1650 */ 1651 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, 1652 unsigned RHSStartValue) { 1653 if (N->getValueType(0) != MVT::v16i8) 1654 return false; 1655 1656 for (unsigned i = 0; i < 2; ++i) 1657 for (unsigned j = 0; j < 4; ++j) 1658 if (!isConstantOrUndef(N->getMaskElt(i*4+j), 1659 i*RHSStartValue+j+IndexOffset) || 1660 !isConstantOrUndef(N->getMaskElt(i*4+j+8), 1661 i*RHSStartValue+j+IndexOffset+8)) 1662 return false; 1663 return true; 1664 } 1665 1666 /** 1667 * Determine if the specified shuffle mask is suitable for the vmrgew or 1668 * vmrgow instructions. 1669 * 1670 * \param[in] N The shuffle vector SD Node to analyze 1671 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false) 1672 * \param[in] ShuffleKind Identify the type of merge: 1673 * - 0 = big-endian merge with two different inputs; 1674 * - 1 = either-endian merge with two identical inputs; 1675 * - 2 = little-endian merge with two different inputs (inputs are swapped for 1676 * little-endian merges). 1677 * \param[in] DAG The current SelectionDAG 1678 * \return true iff this shuffle mask 1679 */ 1680 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, 1681 unsigned ShuffleKind, SelectionDAG &DAG) { 1682 if (DAG.getDataLayout().isLittleEndian()) { 1683 unsigned indexOffset = CheckEven ? 4 : 0; 1684 if (ShuffleKind == 1) // Unary 1685 return isVMerge(N, indexOffset, 0); 1686 else if (ShuffleKind == 2) // swapped 1687 return isVMerge(N, indexOffset, 16); 1688 else 1689 return false; 1690 } 1691 else { 1692 unsigned indexOffset = CheckEven ? 0 : 4; 1693 if (ShuffleKind == 1) // Unary 1694 return isVMerge(N, indexOffset, 0); 1695 else if (ShuffleKind == 0) // Normal 1696 return isVMerge(N, indexOffset, 16); 1697 else 1698 return false; 1699 } 1700 return false; 1701 } 1702 1703 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 1704 /// amount, otherwise return -1. 1705 /// The ShuffleKind distinguishes between big-endian operations with two 1706 /// different inputs (0), either-endian operations with two identical inputs 1707 /// (1), and little-endian operations with two different inputs (2). For the 1708 /// latter, the input operands are swapped (see PPCInstrAltivec.td). 1709 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, 1710 SelectionDAG &DAG) { 1711 if (N->getValueType(0) != MVT::v16i8) 1712 return -1; 1713 1714 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1715 1716 // Find the first non-undef value in the shuffle mask. 1717 unsigned i; 1718 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 1719 /*search*/; 1720 1721 if (i == 16) return -1; // all undef. 1722 1723 // Otherwise, check to see if the rest of the elements are consecutively 1724 // numbered from this value. 1725 unsigned ShiftAmt = SVOp->getMaskElt(i); 1726 if (ShiftAmt < i) return -1; 1727 1728 ShiftAmt -= i; 1729 bool isLE = DAG.getDataLayout().isLittleEndian(); 1730 1731 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { 1732 // Check the rest of the elements to see if they are consecutive. 1733 for (++i; i != 16; ++i) 1734 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1735 return -1; 1736 } else if (ShuffleKind == 1) { 1737 // Check the rest of the elements to see if they are consecutive. 1738 for (++i; i != 16; ++i) 1739 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 1740 return -1; 1741 } else 1742 return -1; 1743 1744 if (isLE) 1745 ShiftAmt = 16 - ShiftAmt; 1746 1747 return ShiftAmt; 1748 } 1749 1750 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 1751 /// specifies a splat of a single element that is suitable for input to 1752 /// VSPLTB/VSPLTH/VSPLTW. 1753 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 1754 assert(N->getValueType(0) == MVT::v16i8 && 1755 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 1756 1757 // The consecutive indices need to specify an element, not part of two 1758 // different elements. So abandon ship early if this isn't the case. 1759 if (N->getMaskElt(0) % EltSize != 0) 1760 return false; 1761 1762 // This is a splat operation if each element of the permute is the same, and 1763 // if the value doesn't reference the second vector. 1764 unsigned ElementBase = N->getMaskElt(0); 1765 1766 // FIXME: Handle UNDEF elements too! 1767 if (ElementBase >= 16) 1768 return false; 1769 1770 // Check that the indices are consecutive, in the case of a multi-byte element 1771 // splatted with a v16i8 mask. 1772 for (unsigned i = 1; i != EltSize; ++i) 1773 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 1774 return false; 1775 1776 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 1777 if (N->getMaskElt(i) < 0) continue; 1778 for (unsigned j = 0; j != EltSize; ++j) 1779 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 1780 return false; 1781 } 1782 return true; 1783 } 1784 1785 /// Check that the mask is shuffling N byte elements. Within each N byte 1786 /// element of the mask, the indices could be either in increasing or 1787 /// decreasing order as long as they are consecutive. 1788 /// \param[in] N the shuffle vector SD Node to analyze 1789 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/ 1790 /// Word/DoubleWord/QuadWord). 1791 /// \param[in] StepLen the delta indices number among the N byte element, if 1792 /// the mask is in increasing/decreasing order then it is 1/-1. 1793 /// \return true iff the mask is shuffling N byte elements. 1794 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width, 1795 int StepLen) { 1796 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) && 1797 "Unexpected element width."); 1798 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width."); 1799 1800 unsigned NumOfElem = 16 / Width; 1801 unsigned MaskVal[16]; // Width is never greater than 16 1802 for (unsigned i = 0; i < NumOfElem; ++i) { 1803 MaskVal[0] = N->getMaskElt(i * Width); 1804 if ((StepLen == 1) && (MaskVal[0] % Width)) { 1805 return false; 1806 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) { 1807 return false; 1808 } 1809 1810 for (unsigned int j = 1; j < Width; ++j) { 1811 MaskVal[j] = N->getMaskElt(i * Width + j); 1812 if (MaskVal[j] != MaskVal[j-1] + StepLen) { 1813 return false; 1814 } 1815 } 1816 } 1817 1818 return true; 1819 } 1820 1821 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, 1822 unsigned &InsertAtByte, bool &Swap, bool IsLE) { 1823 if (!isNByteElemShuffleMask(N, 4, 1)) 1824 return false; 1825 1826 // Now we look at mask elements 0,4,8,12 1827 unsigned M0 = N->getMaskElt(0) / 4; 1828 unsigned M1 = N->getMaskElt(4) / 4; 1829 unsigned M2 = N->getMaskElt(8) / 4; 1830 unsigned M3 = N->getMaskElt(12) / 4; 1831 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 }; 1832 unsigned BigEndianShifts[] = { 3, 0, 1, 2 }; 1833 1834 // Below, let H and L be arbitrary elements of the shuffle mask 1835 // where H is in the range [4,7] and L is in the range [0,3]. 1836 // H, 1, 2, 3 or L, 5, 6, 7 1837 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) || 1838 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) { 1839 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3]; 1840 InsertAtByte = IsLE ? 12 : 0; 1841 Swap = M0 < 4; 1842 return true; 1843 } 1844 // 0, H, 2, 3 or 4, L, 6, 7 1845 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) || 1846 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) { 1847 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3]; 1848 InsertAtByte = IsLE ? 8 : 4; 1849 Swap = M1 < 4; 1850 return true; 1851 } 1852 // 0, 1, H, 3 or 4, 5, L, 7 1853 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) || 1854 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) { 1855 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3]; 1856 InsertAtByte = IsLE ? 4 : 8; 1857 Swap = M2 < 4; 1858 return true; 1859 } 1860 // 0, 1, 2, H or 4, 5, 6, L 1861 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) || 1862 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) { 1863 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3]; 1864 InsertAtByte = IsLE ? 0 : 12; 1865 Swap = M3 < 4; 1866 return true; 1867 } 1868 1869 // If both vector operands for the shuffle are the same vector, the mask will 1870 // contain only elements from the first one and the second one will be undef. 1871 if (N->getOperand(1).isUndef()) { 1872 ShiftElts = 0; 1873 Swap = true; 1874 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1; 1875 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) { 1876 InsertAtByte = IsLE ? 12 : 0; 1877 return true; 1878 } 1879 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) { 1880 InsertAtByte = IsLE ? 8 : 4; 1881 return true; 1882 } 1883 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) { 1884 InsertAtByte = IsLE ? 4 : 8; 1885 return true; 1886 } 1887 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) { 1888 InsertAtByte = IsLE ? 0 : 12; 1889 return true; 1890 } 1891 } 1892 1893 return false; 1894 } 1895 1896 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, 1897 bool &Swap, bool IsLE) { 1898 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); 1899 // Ensure each byte index of the word is consecutive. 1900 if (!isNByteElemShuffleMask(N, 4, 1)) 1901 return false; 1902 1903 // Now we look at mask elements 0,4,8,12, which are the beginning of words. 1904 unsigned M0 = N->getMaskElt(0) / 4; 1905 unsigned M1 = N->getMaskElt(4) / 4; 1906 unsigned M2 = N->getMaskElt(8) / 4; 1907 unsigned M3 = N->getMaskElt(12) / 4; 1908 1909 // If both vector operands for the shuffle are the same vector, the mask will 1910 // contain only elements from the first one and the second one will be undef. 1911 if (N->getOperand(1).isUndef()) { 1912 assert(M0 < 4 && "Indexing into an undef vector?"); 1913 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4) 1914 return false; 1915 1916 ShiftElts = IsLE ? (4 - M0) % 4 : M0; 1917 Swap = false; 1918 return true; 1919 } 1920 1921 // Ensure each word index of the ShuffleVector Mask is consecutive. 1922 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8) 1923 return false; 1924 1925 if (IsLE) { 1926 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) { 1927 // Input vectors don't need to be swapped if the leading element 1928 // of the result is one of the 3 left elements of the second vector 1929 // (or if there is no shift to be done at all). 1930 Swap = false; 1931 ShiftElts = (8 - M0) % 8; 1932 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) { 1933 // Input vectors need to be swapped if the leading element 1934 // of the result is one of the 3 left elements of the first vector 1935 // (or if we're shifting by 4 - thereby simply swapping the vectors). 1936 Swap = true; 1937 ShiftElts = (4 - M0) % 4; 1938 } 1939 1940 return true; 1941 } else { // BE 1942 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) { 1943 // Input vectors don't need to be swapped if the leading element 1944 // of the result is one of the 4 elements of the first vector. 1945 Swap = false; 1946 ShiftElts = M0; 1947 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) { 1948 // Input vectors need to be swapped if the leading element 1949 // of the result is one of the 4 elements of the right vector. 1950 Swap = true; 1951 ShiftElts = M0 - 4; 1952 } 1953 1954 return true; 1955 } 1956 } 1957 1958 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) { 1959 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); 1960 1961 if (!isNByteElemShuffleMask(N, Width, -1)) 1962 return false; 1963 1964 for (int i = 0; i < 16; i += Width) 1965 if (N->getMaskElt(i) != i + Width - 1) 1966 return false; 1967 1968 return true; 1969 } 1970 1971 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) { 1972 return isXXBRShuffleMaskHelper(N, 2); 1973 } 1974 1975 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) { 1976 return isXXBRShuffleMaskHelper(N, 4); 1977 } 1978 1979 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) { 1980 return isXXBRShuffleMaskHelper(N, 8); 1981 } 1982 1983 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) { 1984 return isXXBRShuffleMaskHelper(N, 16); 1985 } 1986 1987 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap 1988 /// if the inputs to the instruction should be swapped and set \p DM to the 1989 /// value for the immediate. 1990 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI 1991 /// AND element 0 of the result comes from the first input (LE) or second input 1992 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered. 1993 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle 1994 /// mask. 1995 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM, 1996 bool &Swap, bool IsLE) { 1997 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); 1998 1999 // Ensure each byte index of the double word is consecutive. 2000 if (!isNByteElemShuffleMask(N, 8, 1)) 2001 return false; 2002 2003 unsigned M0 = N->getMaskElt(0) / 8; 2004 unsigned M1 = N->getMaskElt(8) / 8; 2005 assert(((M0 | M1) < 4) && "A mask element out of bounds?"); 2006 2007 // If both vector operands for the shuffle are the same vector, the mask will 2008 // contain only elements from the first one and the second one will be undef. 2009 if (N->getOperand(1).isUndef()) { 2010 if ((M0 | M1) < 2) { 2011 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1); 2012 Swap = false; 2013 return true; 2014 } else 2015 return false; 2016 } 2017 2018 if (IsLE) { 2019 if (M0 > 1 && M1 < 2) { 2020 Swap = false; 2021 } else if (M0 < 2 && M1 > 1) { 2022 M0 = (M0 + 2) % 4; 2023 M1 = (M1 + 2) % 4; 2024 Swap = true; 2025 } else 2026 return false; 2027 2028 // Note: if control flow comes here that means Swap is already set above 2029 DM = (((~M1) & 1) << 1) + ((~M0) & 1); 2030 return true; 2031 } else { // BE 2032 if (M0 < 2 && M1 > 1) { 2033 Swap = false; 2034 } else if (M0 > 1 && M1 < 2) { 2035 M0 = (M0 + 2) % 4; 2036 M1 = (M1 + 2) % 4; 2037 Swap = true; 2038 } else 2039 return false; 2040 2041 // Note: if control flow comes here that means Swap is already set above 2042 DM = (M0 << 1) + (M1 & 1); 2043 return true; 2044 } 2045 } 2046 2047 2048 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 2049 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 2050 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, 2051 SelectionDAG &DAG) { 2052 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 2053 assert(isSplatShuffleMask(SVOp, EltSize)); 2054 if (DAG.getDataLayout().isLittleEndian()) 2055 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); 2056 else 2057 return SVOp->getMaskElt(0) / EltSize; 2058 } 2059 2060 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 2061 /// by using a vspltis[bhw] instruction of the specified element size, return 2062 /// the constant being splatted. The ByteSize field indicates the number of 2063 /// bytes of each element [124] -> [bhw]. 2064 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 2065 SDValue OpVal(nullptr, 0); 2066 2067 // If ByteSize of the splat is bigger than the element size of the 2068 // build_vector, then we have a case where we are checking for a splat where 2069 // multiple elements of the buildvector are folded together into a single 2070 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 2071 unsigned EltSize = 16/N->getNumOperands(); 2072 if (EltSize < ByteSize) { 2073 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 2074 SDValue UniquedVals[4]; 2075 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 2076 2077 // See if all of the elements in the buildvector agree across. 2078 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 2079 if (N->getOperand(i).isUndef()) continue; 2080 // If the element isn't a constant, bail fully out. 2081 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 2082 2083 if (!UniquedVals[i&(Multiple-1)].getNode()) 2084 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 2085 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 2086 return SDValue(); // no match. 2087 } 2088 2089 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 2090 // either constant or undef values that are identical for each chunk. See 2091 // if these chunks can form into a larger vspltis*. 2092 2093 // Check to see if all of the leading entries are either 0 or -1. If 2094 // neither, then this won't fit into the immediate field. 2095 bool LeadingZero = true; 2096 bool LeadingOnes = true; 2097 for (unsigned i = 0; i != Multiple-1; ++i) { 2098 if (!UniquedVals[i].getNode()) continue; // Must have been undefs. 2099 2100 LeadingZero &= isNullConstant(UniquedVals[i]); 2101 LeadingOnes &= isAllOnesConstant(UniquedVals[i]); 2102 } 2103 // Finally, check the least significant entry. 2104 if (LeadingZero) { 2105 if (!UniquedVals[Multiple-1].getNode()) 2106 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef 2107 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 2108 if (Val < 16) // 0,0,0,4 -> vspltisw(4) 2109 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); 2110 } 2111 if (LeadingOnes) { 2112 if (!UniquedVals[Multiple-1].getNode()) 2113 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef 2114 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 2115 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 2116 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); 2117 } 2118 2119 return SDValue(); 2120 } 2121 2122 // Check to see if this buildvec has a single non-undef value in its elements. 2123 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 2124 if (N->getOperand(i).isUndef()) continue; 2125 if (!OpVal.getNode()) 2126 OpVal = N->getOperand(i); 2127 else if (OpVal != N->getOperand(i)) 2128 return SDValue(); 2129 } 2130 2131 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. 2132 2133 unsigned ValSizeInBytes = EltSize; 2134 uint64_t Value = 0; 2135 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 2136 Value = CN->getZExtValue(); 2137 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 2138 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 2139 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 2140 } 2141 2142 // If the splat value is larger than the element value, then we can never do 2143 // this splat. The only case that we could fit the replicated bits into our 2144 // immediate field for would be zero, and we prefer to use vxor for it. 2145 if (ValSizeInBytes < ByteSize) return SDValue(); 2146 2147 // If the element value is larger than the splat value, check if it consists 2148 // of a repeated bit pattern of size ByteSize. 2149 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)) 2150 return SDValue(); 2151 2152 // Properly sign extend the value. 2153 int MaskVal = SignExtend32(Value, ByteSize * 8); 2154 2155 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 2156 if (MaskVal == 0) return SDValue(); 2157 2158 // Finally, if this value fits in a 5 bit sext field, return it 2159 if (SignExtend32<5>(MaskVal) == MaskVal) 2160 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32); 2161 return SDValue(); 2162 } 2163 2164 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift 2165 /// amount, otherwise return -1. 2166 int PPC::isQVALIGNIShuffleMask(SDNode *N) { 2167 EVT VT = N->getValueType(0); 2168 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1) 2169 return -1; 2170 2171 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 2172 2173 // Find the first non-undef value in the shuffle mask. 2174 unsigned i; 2175 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i) 2176 /*search*/; 2177 2178 if (i == 4) return -1; // all undef. 2179 2180 // Otherwise, check to see if the rest of the elements are consecutively 2181 // numbered from this value. 2182 unsigned ShiftAmt = SVOp->getMaskElt(i); 2183 if (ShiftAmt < i) return -1; 2184 ShiftAmt -= i; 2185 2186 // Check the rest of the elements to see if they are consecutive. 2187 for (++i; i != 4; ++i) 2188 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 2189 return -1; 2190 2191 return ShiftAmt; 2192 } 2193 2194 //===----------------------------------------------------------------------===// 2195 // Addressing Mode Selection 2196 //===----------------------------------------------------------------------===// 2197 2198 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit 2199 /// or 64-bit immediate, and if the value can be accurately represented as a 2200 /// sign extension from a 16-bit value. If so, this returns true and the 2201 /// immediate. 2202 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) { 2203 if (!isa<ConstantSDNode>(N)) 2204 return false; 2205 2206 Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue(); 2207 if (N->getValueType(0) == MVT::i32) 2208 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 2209 else 2210 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 2211 } 2212 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) { 2213 return isIntS16Immediate(Op.getNode(), Imm); 2214 } 2215 2216 /// SelectAddressRegReg - Given the specified addressed, check to see if it 2217 /// can be represented as an indexed [r+r] operation. Returns false if it 2218 /// can be more efficiently represented with [r+imm]. 2219 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 2220 SDValue &Index, 2221 SelectionDAG &DAG) const { 2222 int16_t imm = 0; 2223 if (N.getOpcode() == ISD::ADD) { 2224 if (isIntS16Immediate(N.getOperand(1), imm)) 2225 return false; // r+i 2226 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 2227 return false; // r+i 2228 2229 Base = N.getOperand(0); 2230 Index = N.getOperand(1); 2231 return true; 2232 } else if (N.getOpcode() == ISD::OR) { 2233 if (isIntS16Immediate(N.getOperand(1), imm)) 2234 return false; // r+i can fold it if we can. 2235 2236 // If this is an or of disjoint bitfields, we can codegen this as an add 2237 // (for better address arithmetic) if the LHS and RHS of the OR are provably 2238 // disjoint. 2239 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0)); 2240 2241 if (LHSKnown.Zero.getBoolValue()) { 2242 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1)); 2243 // If all of the bits are known zero on the LHS or RHS, the add won't 2244 // carry. 2245 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) { 2246 Base = N.getOperand(0); 2247 Index = N.getOperand(1); 2248 return true; 2249 } 2250 } 2251 } 2252 2253 return false; 2254 } 2255 2256 // If we happen to be doing an i64 load or store into a stack slot that has 2257 // less than a 4-byte alignment, then the frame-index elimination may need to 2258 // use an indexed load or store instruction (because the offset may not be a 2259 // multiple of 4). The extra register needed to hold the offset comes from the 2260 // register scavenger, and it is possible that the scavenger will need to use 2261 // an emergency spill slot. As a result, we need to make sure that a spill slot 2262 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned 2263 // stack slot. 2264 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { 2265 // FIXME: This does not handle the LWA case. 2266 if (VT != MVT::i64) 2267 return; 2268 2269 // NOTE: We'll exclude negative FIs here, which come from argument 2270 // lowering, because there are no known test cases triggering this problem 2271 // using packed structures (or similar). We can remove this exclusion if 2272 // we find such a test case. The reason why this is so test-case driven is 2273 // because this entire 'fixup' is only to prevent crashes (from the 2274 // register scavenger) on not-really-valid inputs. For example, if we have: 2275 // %a = alloca i1 2276 // %b = bitcast i1* %a to i64* 2277 // store i64* a, i64 b 2278 // then the store should really be marked as 'align 1', but is not. If it 2279 // were marked as 'align 1' then the indexed form would have been 2280 // instruction-selected initially, and the problem this 'fixup' is preventing 2281 // won't happen regardless. 2282 if (FrameIdx < 0) 2283 return; 2284 2285 MachineFunction &MF = DAG.getMachineFunction(); 2286 MachineFrameInfo &MFI = MF.getFrameInfo(); 2287 2288 unsigned Align = MFI.getObjectAlignment(FrameIdx); 2289 if (Align >= 4) 2290 return; 2291 2292 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2293 FuncInfo->setHasNonRISpills(); 2294 } 2295 2296 /// Returns true if the address N can be represented by a base register plus 2297 /// a signed 16-bit displacement [r+imm], and if it is not better 2298 /// represented as reg+reg. If \p Alignment is non-zero, only accept 2299 /// displacements that are multiples of that value. 2300 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 2301 SDValue &Base, 2302 SelectionDAG &DAG, 2303 unsigned Alignment) const { 2304 // FIXME dl should come from parent load or store, not from address 2305 SDLoc dl(N); 2306 // If this can be more profitably realized as r+r, fail. 2307 if (SelectAddressRegReg(N, Disp, Base, DAG)) 2308 return false; 2309 2310 if (N.getOpcode() == ISD::ADD) { 2311 int16_t imm = 0; 2312 if (isIntS16Immediate(N.getOperand(1), imm) && 2313 (!Alignment || (imm % Alignment) == 0)) { 2314 Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); 2315 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 2316 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 2317 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 2318 } else { 2319 Base = N.getOperand(0); 2320 } 2321 return true; // [r+i] 2322 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 2323 // Match LOAD (ADD (X, Lo(G))). 2324 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 2325 && "Cannot handle constant offsets yet!"); 2326 Disp = N.getOperand(1).getOperand(0); // The global address. 2327 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 2328 Disp.getOpcode() == ISD::TargetGlobalTLSAddress || 2329 Disp.getOpcode() == ISD::TargetConstantPool || 2330 Disp.getOpcode() == ISD::TargetJumpTable); 2331 Base = N.getOperand(0); 2332 return true; // [&g+r] 2333 } 2334 } else if (N.getOpcode() == ISD::OR) { 2335 int16_t imm = 0; 2336 if (isIntS16Immediate(N.getOperand(1), imm) && 2337 (!Alignment || (imm % Alignment) == 0)) { 2338 // If this is an or of disjoint bitfields, we can codegen this as an add 2339 // (for better address arithmetic) if the LHS and RHS of the OR are 2340 // provably disjoint. 2341 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0)); 2342 2343 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 2344 // If all of the bits are known zero on the LHS or RHS, the add won't 2345 // carry. 2346 if (FrameIndexSDNode *FI = 2347 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 2348 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 2349 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 2350 } else { 2351 Base = N.getOperand(0); 2352 } 2353 Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); 2354 return true; 2355 } 2356 } 2357 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 2358 // Loading from a constant address. 2359 2360 // If this address fits entirely in a 16-bit sext immediate field, codegen 2361 // this as "d, 0" 2362 int16_t Imm; 2363 if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) { 2364 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); 2365 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 2366 CN->getValueType(0)); 2367 return true; 2368 } 2369 2370 // Handle 32-bit sext immediates with LIS + addr mode. 2371 if ((CN->getValueType(0) == MVT::i32 || 2372 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && 2373 (!Alignment || (CN->getZExtValue() % Alignment) == 0)) { 2374 int Addr = (int)CN->getZExtValue(); 2375 2376 // Otherwise, break this down into an LIS + disp. 2377 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32); 2378 2379 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl, 2380 MVT::i32); 2381 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 2382 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 2383 return true; 2384 } 2385 } 2386 2387 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout())); 2388 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) { 2389 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 2390 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 2391 } else 2392 Base = N; 2393 return true; // [r+0] 2394 } 2395 2396 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be 2397 /// represented as an indexed [r+r] operation. 2398 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 2399 SDValue &Index, 2400 SelectionDAG &DAG) const { 2401 // Check to see if we can easily represent this as an [r+r] address. This 2402 // will fail if it thinks that the address is more profitably represented as 2403 // reg+imm, e.g. where imm = 0. 2404 if (SelectAddressRegReg(N, Base, Index, DAG)) 2405 return true; 2406 2407 // If the address is the result of an add, we will utilize the fact that the 2408 // address calculation includes an implicit add. However, we can reduce 2409 // register pressure if we do not materialize a constant just for use as the 2410 // index register. We only get rid of the add if it is not an add of a 2411 // value and a 16-bit signed constant and both have a single use. 2412 int16_t imm = 0; 2413 if (N.getOpcode() == ISD::ADD && 2414 (!isIntS16Immediate(N.getOperand(1), imm) || 2415 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) { 2416 Base = N.getOperand(0); 2417 Index = N.getOperand(1); 2418 return true; 2419 } 2420 2421 // Otherwise, do it the hard way, using R0 as the base register. 2422 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 2423 N.getValueType()); 2424 Index = N; 2425 return true; 2426 } 2427 2428 /// Returns true if we should use a direct load into vector instruction 2429 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence. 2430 static bool usePartialVectorLoads(SDNode *N) { 2431 if (!N->hasOneUse()) 2432 return false; 2433 2434 // If there are any other uses other than scalar to vector, then we should 2435 // keep it as a scalar load -> direct move pattern to prevent multiple 2436 // loads. Currently, only check for i64 since we have lxsd/lfd to do this 2437 // efficiently, but no update equivalent. 2438 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 2439 EVT MemVT = LD->getMemoryVT(); 2440 if (MemVT.isSimple() && MemVT.getSimpleVT().SimpleTy == MVT::i64) { 2441 SDNode *User = *(LD->use_begin()); 2442 if (User->getOpcode() == ISD::SCALAR_TO_VECTOR) 2443 return true; 2444 } 2445 } 2446 2447 return false; 2448 } 2449 2450 /// getPreIndexedAddressParts - returns true by value, base pointer and 2451 /// offset pointer and addressing mode by reference if the node's address 2452 /// can be legally represented as pre-indexed load / store address. 2453 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 2454 SDValue &Offset, 2455 ISD::MemIndexedMode &AM, 2456 SelectionDAG &DAG) const { 2457 if (DisablePPCPreinc) return false; 2458 2459 bool isLoad = true; 2460 SDValue Ptr; 2461 EVT VT; 2462 unsigned Alignment; 2463 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 2464 Ptr = LD->getBasePtr(); 2465 VT = LD->getMemoryVT(); 2466 Alignment = LD->getAlignment(); 2467 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 2468 Ptr = ST->getBasePtr(); 2469 VT = ST->getMemoryVT(); 2470 Alignment = ST->getAlignment(); 2471 isLoad = false; 2472 } else 2473 return false; 2474 2475 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector 2476 // instructions because we can fold these into a more efficient instruction 2477 // instead, (such as LXSD). 2478 if (isLoad && usePartialVectorLoads(N)) { 2479 return false; 2480 } 2481 2482 // PowerPC doesn't have preinc load/store instructions for vectors (except 2483 // for QPX, which does have preinc r+r forms). 2484 if (VT.isVector()) { 2485 if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) { 2486 return false; 2487 } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) { 2488 AM = ISD::PRE_INC; 2489 return true; 2490 } 2491 } 2492 2493 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { 2494 // Common code will reject creating a pre-inc form if the base pointer 2495 // is a frame index, or if N is a store and the base pointer is either 2496 // the same as or a predecessor of the value being stored. Check for 2497 // those situations here, and try with swapped Base/Offset instead. 2498 bool Swap = false; 2499 2500 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) 2501 Swap = true; 2502 else if (!isLoad) { 2503 SDValue Val = cast<StoreSDNode>(N)->getValue(); 2504 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) 2505 Swap = true; 2506 } 2507 2508 if (Swap) 2509 std::swap(Base, Offset); 2510 2511 AM = ISD::PRE_INC; 2512 return true; 2513 } 2514 2515 // LDU/STU can only handle immediates that are a multiple of 4. 2516 if (VT != MVT::i64) { 2517 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0)) 2518 return false; 2519 } else { 2520 // LDU/STU need an address with at least 4-byte alignment. 2521 if (Alignment < 4) 2522 return false; 2523 2524 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4)) 2525 return false; 2526 } 2527 2528 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 2529 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 2530 // sext i32 to i64 when addr mode is r+i. 2531 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 2532 LD->getExtensionType() == ISD::SEXTLOAD && 2533 isa<ConstantSDNode>(Offset)) 2534 return false; 2535 } 2536 2537 AM = ISD::PRE_INC; 2538 return true; 2539 } 2540 2541 //===----------------------------------------------------------------------===// 2542 // LowerOperation implementation 2543 //===----------------------------------------------------------------------===// 2544 2545 /// Return true if we should reference labels using a PICBase, set the HiOpFlags 2546 /// and LoOpFlags to the target MO flags. 2547 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, 2548 unsigned &HiOpFlags, unsigned &LoOpFlags, 2549 const GlobalValue *GV = nullptr) { 2550 HiOpFlags = PPCII::MO_HA; 2551 LoOpFlags = PPCII::MO_LO; 2552 2553 // Don't use the pic base if not in PIC relocation model. 2554 if (IsPIC) { 2555 HiOpFlags |= PPCII::MO_PIC_FLAG; 2556 LoOpFlags |= PPCII::MO_PIC_FLAG; 2557 } 2558 2559 // If this is a reference to a global value that requires a non-lazy-ptr, make 2560 // sure that instruction lowering adds it. 2561 if (GV && Subtarget.hasLazyResolverStub(GV)) { 2562 HiOpFlags |= PPCII::MO_NLP_FLAG; 2563 LoOpFlags |= PPCII::MO_NLP_FLAG; 2564 2565 if (GV->hasHiddenVisibility()) { 2566 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 2567 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 2568 } 2569 } 2570 } 2571 2572 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 2573 SelectionDAG &DAG) { 2574 SDLoc DL(HiPart); 2575 EVT PtrVT = HiPart.getValueType(); 2576 SDValue Zero = DAG.getConstant(0, DL, PtrVT); 2577 2578 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 2579 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 2580 2581 // With PIC, the first instruction is actually "GR+hi(&G)". 2582 if (isPIC) 2583 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 2584 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 2585 2586 // Generate non-pic code that has direct accesses to the constant pool. 2587 // The address of the global is just (hi(&g)+lo(&g)). 2588 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 2589 } 2590 2591 static void setUsesTOCBasePtr(MachineFunction &MF) { 2592 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2593 FuncInfo->setUsesTOCBasePtr(); 2594 } 2595 2596 static void setUsesTOCBasePtr(SelectionDAG &DAG) { 2597 setUsesTOCBasePtr(DAG.getMachineFunction()); 2598 } 2599 2600 static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, 2601 SDValue GA) { 2602 EVT VT = Is64Bit ? MVT::i64 : MVT::i32; 2603 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : 2604 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); 2605 2606 SDValue Ops[] = { GA, Reg }; 2607 return DAG.getMemIntrinsicNode( 2608 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, 2609 MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, 2610 MachineMemOperand::MOLoad); 2611 } 2612 2613 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 2614 SelectionDAG &DAG) const { 2615 EVT PtrVT = Op.getValueType(); 2616 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2617 const Constant *C = CP->getConstVal(); 2618 2619 // 64-bit SVR4 ABI code is always position-independent. 2620 // The actual address of the GlobalValue is stored in the TOC. 2621 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2622 setUsesTOCBasePtr(DAG); 2623 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); 2624 return getTOCEntry(DAG, SDLoc(CP), true, GA); 2625 } 2626 2627 unsigned MOHiFlag, MOLoFlag; 2628 bool IsPIC = isPositionIndependent(); 2629 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2630 2631 if (IsPIC && Subtarget.isSVR4ABI()) { 2632 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 2633 PPCII::MO_PIC_FLAG); 2634 return getTOCEntry(DAG, SDLoc(CP), false, GA); 2635 } 2636 2637 SDValue CPIHi = 2638 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 2639 SDValue CPILo = 2640 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 2641 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG); 2642 } 2643 2644 // For 64-bit PowerPC, prefer the more compact relative encodings. 2645 // This trades 32 bits per jump table entry for one or two instructions 2646 // on the jump site. 2647 unsigned PPCTargetLowering::getJumpTableEncoding() const { 2648 if (isJumpTableRelative()) 2649 return MachineJumpTableInfo::EK_LabelDifference32; 2650 2651 return TargetLowering::getJumpTableEncoding(); 2652 } 2653 2654 bool PPCTargetLowering::isJumpTableRelative() const { 2655 if (Subtarget.isPPC64()) 2656 return true; 2657 return TargetLowering::isJumpTableRelative(); 2658 } 2659 2660 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table, 2661 SelectionDAG &DAG) const { 2662 if (!Subtarget.isPPC64()) 2663 return TargetLowering::getPICJumpTableRelocBase(Table, DAG); 2664 2665 switch (getTargetMachine().getCodeModel()) { 2666 case CodeModel::Small: 2667 case CodeModel::Medium: 2668 return TargetLowering::getPICJumpTableRelocBase(Table, DAG); 2669 default: 2670 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(), 2671 getPointerTy(DAG.getDataLayout())); 2672 } 2673 } 2674 2675 const MCExpr * 2676 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 2677 unsigned JTI, 2678 MCContext &Ctx) const { 2679 if (!Subtarget.isPPC64()) 2680 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); 2681 2682 switch (getTargetMachine().getCodeModel()) { 2683 case CodeModel::Small: 2684 case CodeModel::Medium: 2685 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); 2686 default: 2687 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); 2688 } 2689 } 2690 2691 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 2692 EVT PtrVT = Op.getValueType(); 2693 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 2694 2695 // 64-bit SVR4 ABI code is always position-independent. 2696 // The actual address of the GlobalValue is stored in the TOC. 2697 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2698 setUsesTOCBasePtr(DAG); 2699 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 2700 return getTOCEntry(DAG, SDLoc(JT), true, GA); 2701 } 2702 2703 unsigned MOHiFlag, MOLoFlag; 2704 bool IsPIC = isPositionIndependent(); 2705 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2706 2707 if (IsPIC && Subtarget.isSVR4ABI()) { 2708 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 2709 PPCII::MO_PIC_FLAG); 2710 return getTOCEntry(DAG, SDLoc(GA), false, GA); 2711 } 2712 2713 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 2714 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 2715 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG); 2716 } 2717 2718 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 2719 SelectionDAG &DAG) const { 2720 EVT PtrVT = Op.getValueType(); 2721 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op); 2722 const BlockAddress *BA = BASDN->getBlockAddress(); 2723 2724 // 64-bit SVR4 ABI code is always position-independent. 2725 // The actual BlockAddress is stored in the TOC. 2726 if (Subtarget.isSVR4ABI() && 2727 (Subtarget.isPPC64() || isPositionIndependent())) { 2728 if (Subtarget.isPPC64()) 2729 setUsesTOCBasePtr(DAG); 2730 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); 2731 return getTOCEntry(DAG, SDLoc(BASDN), Subtarget.isPPC64(), GA); 2732 } 2733 2734 unsigned MOHiFlag, MOLoFlag; 2735 bool IsPIC = isPositionIndependent(); 2736 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2737 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); 2738 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); 2739 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG); 2740 } 2741 2742 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 2743 SelectionDAG &DAG) const { 2744 // FIXME: TLS addresses currently use medium model code sequences, 2745 // which is the most useful form. Eventually support for small and 2746 // large models could be added if users need it, at the cost of 2747 // additional complexity. 2748 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2749 if (DAG.getTarget().useEmulatedTLS()) 2750 return LowerToTLSEmulatedModel(GA, DAG); 2751 2752 SDLoc dl(GA); 2753 const GlobalValue *GV = GA->getGlobal(); 2754 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2755 bool is64bit = Subtarget.isPPC64(); 2756 const Module *M = DAG.getMachineFunction().getFunction().getParent(); 2757 PICLevel::Level picLevel = M->getPICLevel(); 2758 2759 TLSModel::Model Model = getTargetMachine().getTLSModel(GV); 2760 2761 if (Model == TLSModel::LocalExec) { 2762 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2763 PPCII::MO_TPREL_HA); 2764 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2765 PPCII::MO_TPREL_LO); 2766 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64) 2767 : DAG.getRegister(PPC::R2, MVT::i32); 2768 2769 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); 2770 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); 2771 } 2772 2773 if (Model == TLSModel::InitialExec) { 2774 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2775 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2776 PPCII::MO_TLS); 2777 SDValue GOTPtr; 2778 if (is64bit) { 2779 setUsesTOCBasePtr(DAG); 2780 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2781 GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, 2782 PtrVT, GOTReg, TGA); 2783 } else 2784 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); 2785 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, 2786 PtrVT, TGA, GOTPtr); 2787 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); 2788 } 2789 2790 if (Model == TLSModel::GeneralDynamic) { 2791 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2792 SDValue GOTPtr; 2793 if (is64bit) { 2794 setUsesTOCBasePtr(DAG); 2795 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2796 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, 2797 GOTReg, TGA); 2798 } else { 2799 if (picLevel == PICLevel::SmallPIC) 2800 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 2801 else 2802 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 2803 } 2804 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT, 2805 GOTPtr, TGA, TGA); 2806 } 2807 2808 if (Model == TLSModel::LocalDynamic) { 2809 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2810 SDValue GOTPtr; 2811 if (is64bit) { 2812 setUsesTOCBasePtr(DAG); 2813 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2814 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, 2815 GOTReg, TGA); 2816 } else { 2817 if (picLevel == PICLevel::SmallPIC) 2818 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 2819 else 2820 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 2821 } 2822 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl, 2823 PtrVT, GOTPtr, TGA, TGA); 2824 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, 2825 PtrVT, TLSAddr, TGA); 2826 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); 2827 } 2828 2829 llvm_unreachable("Unknown TLS model!"); 2830 } 2831 2832 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 2833 SelectionDAG &DAG) const { 2834 EVT PtrVT = Op.getValueType(); 2835 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 2836 SDLoc DL(GSDN); 2837 const GlobalValue *GV = GSDN->getGlobal(); 2838 2839 // 64-bit SVR4 ABI code is always position-independent. 2840 // The actual address of the GlobalValue is stored in the TOC. 2841 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2842 setUsesTOCBasePtr(DAG); 2843 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 2844 return getTOCEntry(DAG, DL, true, GA); 2845 } 2846 2847 unsigned MOHiFlag, MOLoFlag; 2848 bool IsPIC = isPositionIndependent(); 2849 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV); 2850 2851 if (IsPIC && Subtarget.isSVR4ABI()) { 2852 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 2853 GSDN->getOffset(), 2854 PPCII::MO_PIC_FLAG); 2855 return getTOCEntry(DAG, DL, false, GA); 2856 } 2857 2858 SDValue GAHi = 2859 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 2860 SDValue GALo = 2861 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 2862 2863 SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG); 2864 2865 // If the global reference is actually to a non-lazy-pointer, we have to do an 2866 // extra load to get the address of the global. 2867 if (MOHiFlag & PPCII::MO_NLP_FLAG) 2868 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 2869 return Ptr; 2870 } 2871 2872 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 2873 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 2874 SDLoc dl(Op); 2875 2876 if (Op.getValueType() == MVT::v2i64) { 2877 // When the operands themselves are v2i64 values, we need to do something 2878 // special because VSX has no underlying comparison operations for these. 2879 if (Op.getOperand(0).getValueType() == MVT::v2i64) { 2880 // Equality can be handled by casting to the legal type for Altivec 2881 // comparisons, everything else needs to be expanded. 2882 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 2883 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, 2884 DAG.getSetCC(dl, MVT::v4i32, 2885 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), 2886 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), 2887 CC)); 2888 } 2889 2890 return SDValue(); 2891 } 2892 2893 // We handle most of these in the usual way. 2894 return Op; 2895 } 2896 2897 // If we're comparing for equality to zero, expose the fact that this is 2898 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can 2899 // fold the new nodes. 2900 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG)) 2901 return V; 2902 2903 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 2904 // Leave comparisons against 0 and -1 alone for now, since they're usually 2905 // optimized. FIXME: revisit this when we can custom lower all setcc 2906 // optimizations. 2907 if (C->isAllOnesValue() || C->isNullValue()) 2908 return SDValue(); 2909 } 2910 2911 // If we have an integer seteq/setne, turn it into a compare against zero 2912 // by xor'ing the rhs with the lhs, which is faster than setting a 2913 // condition register, reading it back out, and masking the correct bit. The 2914 // normal approach here uses sub to do this instead of xor. Using xor exposes 2915 // the result to other bit-twiddling opportunities. 2916 EVT LHSVT = Op.getOperand(0).getValueType(); 2917 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 2918 EVT VT = Op.getValueType(); 2919 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 2920 Op.getOperand(1)); 2921 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC); 2922 } 2923 return SDValue(); 2924 } 2925 2926 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { 2927 SDNode *Node = Op.getNode(); 2928 EVT VT = Node->getValueType(0); 2929 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2930 SDValue InChain = Node->getOperand(0); 2931 SDValue VAListPtr = Node->getOperand(1); 2932 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 2933 SDLoc dl(Node); 2934 2935 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 2936 2937 // gpr_index 2938 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 2939 VAListPtr, MachinePointerInfo(SV), MVT::i8); 2940 InChain = GprIndex.getValue(1); 2941 2942 if (VT == MVT::i64) { 2943 // Check if GprIndex is even 2944 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 2945 DAG.getConstant(1, dl, MVT::i32)); 2946 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 2947 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE); 2948 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 2949 DAG.getConstant(1, dl, MVT::i32)); 2950 // Align GprIndex to be even if it isn't 2951 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 2952 GprIndex); 2953 } 2954 2955 // fpr index is 1 byte after gpr 2956 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2957 DAG.getConstant(1, dl, MVT::i32)); 2958 2959 // fpr 2960 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 2961 FprPtr, MachinePointerInfo(SV), MVT::i8); 2962 InChain = FprIndex.getValue(1); 2963 2964 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2965 DAG.getConstant(8, dl, MVT::i32)); 2966 2967 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2968 DAG.getConstant(4, dl, MVT::i32)); 2969 2970 // areas 2971 SDValue OverflowArea = 2972 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo()); 2973 InChain = OverflowArea.getValue(1); 2974 2975 SDValue RegSaveArea = 2976 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo()); 2977 InChain = RegSaveArea.getValue(1); 2978 2979 // select overflow_area if index > 8 2980 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 2981 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT); 2982 2983 // adjustment constant gpr_index * 4/8 2984 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 2985 VT.isInteger() ? GprIndex : FprIndex, 2986 DAG.getConstant(VT.isInteger() ? 4 : 8, dl, 2987 MVT::i32)); 2988 2989 // OurReg = RegSaveArea + RegConstant 2990 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 2991 RegConstant); 2992 2993 // Floating types are 32 bytes into RegSaveArea 2994 if (VT.isFloatingPoint()) 2995 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 2996 DAG.getConstant(32, dl, MVT::i32)); 2997 2998 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 2999 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 3000 VT.isInteger() ? GprIndex : FprIndex, 3001 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl, 3002 MVT::i32)); 3003 3004 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 3005 VT.isInteger() ? VAListPtr : FprPtr, 3006 MachinePointerInfo(SV), MVT::i8); 3007 3008 // determine if we should load from reg_save_area or overflow_area 3009 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 3010 3011 // increase overflow_area by 4/8 if gpr/fpr > 8 3012 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 3013 DAG.getConstant(VT.isInteger() ? 4 : 8, 3014 dl, MVT::i32)); 3015 3016 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 3017 OverflowAreaPlusN); 3018 3019 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr, 3020 MachinePointerInfo(), MVT::i32); 3021 3022 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo()); 3023 } 3024 3025 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { 3026 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); 3027 3028 // We have to copy the entire va_list struct: 3029 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte 3030 return DAG.getMemcpy(Op.getOperand(0), Op, 3031 Op.getOperand(1), Op.getOperand(2), 3032 DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true, 3033 false, MachinePointerInfo(), MachinePointerInfo()); 3034 } 3035 3036 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 3037 SelectionDAG &DAG) const { 3038 return Op.getOperand(0); 3039 } 3040 3041 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 3042 SelectionDAG &DAG) const { 3043 SDValue Chain = Op.getOperand(0); 3044 SDValue Trmp = Op.getOperand(1); // trampoline 3045 SDValue FPtr = Op.getOperand(2); // nested function 3046 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 3047 SDLoc dl(Op); 3048 3049 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3050 bool isPPC64 = (PtrVT == MVT::i64); 3051 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); 3052 3053 TargetLowering::ArgListTy Args; 3054 TargetLowering::ArgListEntry Entry; 3055 3056 Entry.Ty = IntPtrTy; 3057 Entry.Node = Trmp; Args.push_back(Entry); 3058 3059 // TrampSize == (isPPC64 ? 48 : 40); 3060 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl, 3061 isPPC64 ? MVT::i64 : MVT::i32); 3062 Args.push_back(Entry); 3063 3064 Entry.Node = FPtr; Args.push_back(Entry); 3065 Entry.Node = Nest; Args.push_back(Entry); 3066 3067 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 3068 TargetLowering::CallLoweringInfo CLI(DAG); 3069 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee( 3070 CallingConv::C, Type::getVoidTy(*DAG.getContext()), 3071 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args)); 3072 3073 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 3074 return CallResult.second; 3075 } 3076 3077 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { 3078 MachineFunction &MF = DAG.getMachineFunction(); 3079 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3080 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3081 3082 SDLoc dl(Op); 3083 3084 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 3085 // vastart just stores the address of the VarArgsFrameIndex slot into the 3086 // memory location argument. 3087 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3088 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 3089 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 3090 MachinePointerInfo(SV)); 3091 } 3092 3093 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 3094 // We suppose the given va_list is already allocated. 3095 // 3096 // typedef struct { 3097 // char gpr; /* index into the array of 8 GPRs 3098 // * stored in the register save area 3099 // * gpr=0 corresponds to r3, 3100 // * gpr=1 to r4, etc. 3101 // */ 3102 // char fpr; /* index into the array of 8 FPRs 3103 // * stored in the register save area 3104 // * fpr=0 corresponds to f1, 3105 // * fpr=1 to f2, etc. 3106 // */ 3107 // char *overflow_arg_area; 3108 // /* location on stack that holds 3109 // * the next overflow argument 3110 // */ 3111 // char *reg_save_area; 3112 // /* where r3:r10 and f1:f8 (if saved) 3113 // * are stored 3114 // */ 3115 // } va_list[1]; 3116 3117 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); 3118 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); 3119 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 3120 PtrVT); 3121 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 3122 PtrVT); 3123 3124 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 3125 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT); 3126 3127 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 3128 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT); 3129 3130 uint64_t FPROffset = 1; 3131 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT); 3132 3133 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 3134 3135 // Store first byte : number of int regs 3136 SDValue firstStore = 3137 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1), 3138 MachinePointerInfo(SV), MVT::i8); 3139 uint64_t nextOffset = FPROffset; 3140 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 3141 ConstFPROffset); 3142 3143 // Store second byte : number of float regs 3144 SDValue secondStore = 3145 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 3146 MachinePointerInfo(SV, nextOffset), MVT::i8); 3147 nextOffset += StackOffset; 3148 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 3149 3150 // Store second word : arguments given on stack 3151 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 3152 MachinePointerInfo(SV, nextOffset)); 3153 nextOffset += FrameOffset; 3154 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 3155 3156 // Store third word : arguments given in registers 3157 return DAG.getStore(thirdStore, dl, FR, nextPtr, 3158 MachinePointerInfo(SV, nextOffset)); 3159 } 3160 3161 /// FPR - The set of FP registers that should be allocated for arguments, 3162 /// on Darwin. 3163 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, 3164 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, 3165 PPC::F11, PPC::F12, PPC::F13}; 3166 3167 /// QFPR - The set of QPX registers that should be allocated for arguments. 3168 static const MCPhysReg QFPR[] = { 3169 PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, 3170 PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13}; 3171 3172 /// CalculateStackSlotSize - Calculates the size reserved for this argument on 3173 /// the stack. 3174 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 3175 unsigned PtrByteSize) { 3176 unsigned ArgSize = ArgVT.getStoreSize(); 3177 if (Flags.isByVal()) 3178 ArgSize = Flags.getByValSize(); 3179 3180 // Round up to multiples of the pointer size, except for array members, 3181 // which are always packed. 3182 if (!Flags.isInConsecutiveRegs()) 3183 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3184 3185 return ArgSize; 3186 } 3187 3188 /// CalculateStackSlotAlignment - Calculates the alignment of this argument 3189 /// on the stack. 3190 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, 3191 ISD::ArgFlagsTy Flags, 3192 unsigned PtrByteSize) { 3193 unsigned Align = PtrByteSize; 3194 3195 // Altivec parameters are padded to a 16 byte boundary. 3196 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 3197 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 3198 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || 3199 ArgVT == MVT::v1i128 || ArgVT == MVT::f128) 3200 Align = 16; 3201 // QPX vector types stored in double-precision are padded to a 32 byte 3202 // boundary. 3203 else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1) 3204 Align = 32; 3205 3206 // ByVal parameters are aligned as requested. 3207 if (Flags.isByVal()) { 3208 unsigned BVAlign = Flags.getByValAlign(); 3209 if (BVAlign > PtrByteSize) { 3210 if (BVAlign % PtrByteSize != 0) 3211 llvm_unreachable( 3212 "ByVal alignment is not a multiple of the pointer size"); 3213 3214 Align = BVAlign; 3215 } 3216 } 3217 3218 // Array members are always packed to their original alignment. 3219 if (Flags.isInConsecutiveRegs()) { 3220 // If the array member was split into multiple registers, the first 3221 // needs to be aligned to the size of the full type. (Except for 3222 // ppcf128, which is only aligned as its f64 components.) 3223 if (Flags.isSplit() && OrigVT != MVT::ppcf128) 3224 Align = OrigVT.getStoreSize(); 3225 else 3226 Align = ArgVT.getStoreSize(); 3227 } 3228 3229 return Align; 3230 } 3231 3232 /// CalculateStackSlotUsed - Return whether this argument will use its 3233 /// stack slot (instead of being passed in registers). ArgOffset, 3234 /// AvailableFPRs, and AvailableVRs must hold the current argument 3235 /// position, and will be updated to account for this argument. 3236 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, 3237 ISD::ArgFlagsTy Flags, 3238 unsigned PtrByteSize, 3239 unsigned LinkageSize, 3240 unsigned ParamAreaSize, 3241 unsigned &ArgOffset, 3242 unsigned &AvailableFPRs, 3243 unsigned &AvailableVRs, bool HasQPX) { 3244 bool UseMemory = false; 3245 3246 // Respect alignment of argument on the stack. 3247 unsigned Align = 3248 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 3249 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 3250 // If there's no space left in the argument save area, we must 3251 // use memory (this check also catches zero-sized arguments). 3252 if (ArgOffset >= LinkageSize + ParamAreaSize) 3253 UseMemory = true; 3254 3255 // Allocate argument on the stack. 3256 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 3257 if (Flags.isInConsecutiveRegsLast()) 3258 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3259 // If we overran the argument save area, we must use memory 3260 // (this check catches arguments passed partially in memory) 3261 if (ArgOffset > LinkageSize + ParamAreaSize) 3262 UseMemory = true; 3263 3264 // However, if the argument is actually passed in an FPR or a VR, 3265 // we don't use memory after all. 3266 if (!Flags.isByVal()) { 3267 if (ArgVT == MVT::f32 || ArgVT == MVT::f64 || 3268 // QPX registers overlap with the scalar FP registers. 3269 (HasQPX && (ArgVT == MVT::v4f32 || 3270 ArgVT == MVT::v4f64 || 3271 ArgVT == MVT::v4i1))) 3272 if (AvailableFPRs > 0) { 3273 --AvailableFPRs; 3274 return false; 3275 } 3276 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 3277 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 3278 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || 3279 ArgVT == MVT::v1i128 || ArgVT == MVT::f128) 3280 if (AvailableVRs > 0) { 3281 --AvailableVRs; 3282 return false; 3283 } 3284 } 3285 3286 return UseMemory; 3287 } 3288 3289 /// EnsureStackAlignment - Round stack frame size up from NumBytes to 3290 /// ensure minimum alignment required for target. 3291 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, 3292 unsigned NumBytes) { 3293 unsigned TargetAlign = Lowering->getStackAlignment(); 3294 unsigned AlignMask = TargetAlign - 1; 3295 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 3296 return NumBytes; 3297 } 3298 3299 SDValue PPCTargetLowering::LowerFormalArguments( 3300 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 3301 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 3302 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3303 if (Subtarget.isSVR4ABI()) { 3304 if (Subtarget.isPPC64()) 3305 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, 3306 dl, DAG, InVals); 3307 else 3308 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, 3309 dl, DAG, InVals); 3310 } else { 3311 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 3312 dl, DAG, InVals); 3313 } 3314 } 3315 3316 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( 3317 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 3318 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 3319 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3320 3321 // 32-bit SVR4 ABI Stack Frame Layout: 3322 // +-----------------------------------+ 3323 // +--> | Back chain | 3324 // | +-----------------------------------+ 3325 // | | Floating-point register save area | 3326 // | +-----------------------------------+ 3327 // | | General register save area | 3328 // | +-----------------------------------+ 3329 // | | CR save word | 3330 // | +-----------------------------------+ 3331 // | | VRSAVE save word | 3332 // | +-----------------------------------+ 3333 // | | Alignment padding | 3334 // | +-----------------------------------+ 3335 // | | Vector register save area | 3336 // | +-----------------------------------+ 3337 // | | Local variable space | 3338 // | +-----------------------------------+ 3339 // | | Parameter list area | 3340 // | +-----------------------------------+ 3341 // | | LR save word | 3342 // | +-----------------------------------+ 3343 // SP--> +--- | Back chain | 3344 // +-----------------------------------+ 3345 // 3346 // Specifications: 3347 // System V Application Binary Interface PowerPC Processor Supplement 3348 // AltiVec Technology Programming Interface Manual 3349 3350 MachineFunction &MF = DAG.getMachineFunction(); 3351 MachineFrameInfo &MFI = MF.getFrameInfo(); 3352 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3353 3354 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3355 // Potential tail calls could cause overwriting of argument stack slots. 3356 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 3357 (CallConv == CallingConv::Fast)); 3358 unsigned PtrByteSize = 4; 3359 3360 // Assign locations to all of the incoming arguments. 3361 SmallVector<CCValAssign, 16> ArgLocs; 3362 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 3363 *DAG.getContext()); 3364 3365 // Reserve space for the linkage area on the stack. 3366 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3367 CCInfo.AllocateStack(LinkageSize, PtrByteSize); 3368 if (useSoftFloat() || hasSPE()) 3369 CCInfo.PreAnalyzeFormalArguments(Ins); 3370 3371 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); 3372 CCInfo.clearWasPPCF128(); 3373 3374 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 3375 CCValAssign &VA = ArgLocs[i]; 3376 3377 // Arguments stored in registers. 3378 if (VA.isRegLoc()) { 3379 const TargetRegisterClass *RC; 3380 EVT ValVT = VA.getValVT(); 3381 3382 switch (ValVT.getSimpleVT().SimpleTy) { 3383 default: 3384 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 3385 case MVT::i1: 3386 case MVT::i32: 3387 RC = &PPC::GPRCRegClass; 3388 break; 3389 case MVT::f32: 3390 if (Subtarget.hasP8Vector()) 3391 RC = &PPC::VSSRCRegClass; 3392 else if (Subtarget.hasSPE()) 3393 RC = &PPC::SPE4RCRegClass; 3394 else 3395 RC = &PPC::F4RCRegClass; 3396 break; 3397 case MVT::f64: 3398 if (Subtarget.hasVSX()) 3399 RC = &PPC::VSFRCRegClass; 3400 else if (Subtarget.hasSPE()) 3401 RC = &PPC::SPERCRegClass; 3402 else 3403 RC = &PPC::F8RCRegClass; 3404 break; 3405 case MVT::v16i8: 3406 case MVT::v8i16: 3407 case MVT::v4i32: 3408 RC = &PPC::VRRCRegClass; 3409 break; 3410 case MVT::v4f32: 3411 RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass; 3412 break; 3413 case MVT::v2f64: 3414 case MVT::v2i64: 3415 RC = &PPC::VRRCRegClass; 3416 break; 3417 case MVT::v4f64: 3418 RC = &PPC::QFRCRegClass; 3419 break; 3420 case MVT::v4i1: 3421 RC = &PPC::QBRCRegClass; 3422 break; 3423 } 3424 3425 // Transform the arguments stored in physical registers into virtual ones. 3426 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 3427 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, 3428 ValVT == MVT::i1 ? MVT::i32 : ValVT); 3429 3430 if (ValVT == MVT::i1) 3431 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); 3432 3433 InVals.push_back(ArgValue); 3434 } else { 3435 // Argument stored in memory. 3436 assert(VA.isMemLoc()); 3437 3438 // Get the extended size of the argument type in stack 3439 unsigned ArgSize = VA.getLocVT().getStoreSize(); 3440 // Get the actual size of the argument type 3441 unsigned ObjSize = VA.getValVT().getStoreSize(); 3442 unsigned ArgOffset = VA.getLocMemOffset(); 3443 // Stack objects in PPC32 are right justified. 3444 ArgOffset += ArgSize - ObjSize; 3445 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable); 3446 3447 // Create load nodes to retrieve arguments from the stack. 3448 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3449 InVals.push_back( 3450 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo())); 3451 } 3452 } 3453 3454 // Assign locations to all of the incoming aggregate by value arguments. 3455 // Aggregates passed by value are stored in the local variable space of the 3456 // caller's stack frame, right above the parameter list area. 3457 SmallVector<CCValAssign, 16> ByValArgLocs; 3458 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3459 ByValArgLocs, *DAG.getContext()); 3460 3461 // Reserve stack space for the allocations in CCInfo. 3462 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 3463 3464 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); 3465 3466 // Area that is at least reserved in the caller of this function. 3467 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 3468 MinReservedArea = std::max(MinReservedArea, LinkageSize); 3469 3470 // Set the size that is at least reserved in caller of this function. Tail 3471 // call optimized function's reserved stack space needs to be aligned so that 3472 // taking the difference between two stack areas will result in an aligned 3473 // stack. 3474 MinReservedArea = 3475 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3476 FuncInfo->setMinReservedArea(MinReservedArea); 3477 3478 SmallVector<SDValue, 8> MemOps; 3479 3480 // If the function takes variable number of arguments, make a frame index for 3481 // the start of the first vararg value... for expansion of llvm.va_start. 3482 if (isVarArg) { 3483 static const MCPhysReg GPArgRegs[] = { 3484 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 3485 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 3486 }; 3487 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 3488 3489 static const MCPhysReg FPArgRegs[] = { 3490 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 3491 PPC::F8 3492 }; 3493 unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 3494 3495 if (useSoftFloat() || hasSPE()) 3496 NumFPArgRegs = 0; 3497 3498 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); 3499 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs)); 3500 3501 // Make room for NumGPArgRegs and NumFPArgRegs. 3502 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 3503 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; 3504 3505 FuncInfo->setVarArgsStackOffset( 3506 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8, 3507 CCInfo.getNextStackOffset(), true)); 3508 3509 FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false)); 3510 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3511 3512 // The fixed integer arguments of a variadic function are stored to the 3513 // VarArgsFrameIndex on the stack so that they may be loaded by 3514 // dereferencing the result of va_next. 3515 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 3516 // Get an existing live-in vreg, or add a new one. 3517 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 3518 if (!VReg) 3519 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 3520 3521 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3522 SDValue Store = 3523 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3524 MemOps.push_back(Store); 3525 // Increment the address by four for the next argument to store 3526 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); 3527 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3528 } 3529 3530 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 3531 // is set. 3532 // The double arguments are stored to the VarArgsFrameIndex 3533 // on the stack. 3534 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 3535 // Get an existing live-in vreg, or add a new one. 3536 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 3537 if (!VReg) 3538 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 3539 3540 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 3541 SDValue Store = 3542 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3543 MemOps.push_back(Store); 3544 // Increment the address by eight for the next argument to store 3545 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl, 3546 PtrVT); 3547 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3548 } 3549 } 3550 3551 if (!MemOps.empty()) 3552 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3553 3554 return Chain; 3555 } 3556 3557 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3558 // value to MVT::i64 and then truncate to the correct register size. 3559 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, 3560 EVT ObjectVT, SelectionDAG &DAG, 3561 SDValue ArgVal, 3562 const SDLoc &dl) const { 3563 if (Flags.isSExt()) 3564 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 3565 DAG.getValueType(ObjectVT)); 3566 else if (Flags.isZExt()) 3567 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 3568 DAG.getValueType(ObjectVT)); 3569 3570 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); 3571 } 3572 3573 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( 3574 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 3575 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 3576 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3577 // TODO: add description of PPC stack frame format, or at least some docs. 3578 // 3579 bool isELFv2ABI = Subtarget.isELFv2ABI(); 3580 bool isLittleEndian = Subtarget.isLittleEndian(); 3581 MachineFunction &MF = DAG.getMachineFunction(); 3582 MachineFrameInfo &MFI = MF.getFrameInfo(); 3583 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3584 3585 assert(!(CallConv == CallingConv::Fast && isVarArg) && 3586 "fastcc not supported on varargs functions"); 3587 3588 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3589 // Potential tail calls could cause overwriting of argument stack slots. 3590 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 3591 (CallConv == CallingConv::Fast)); 3592 unsigned PtrByteSize = 8; 3593 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3594 3595 static const MCPhysReg GPR[] = { 3596 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3597 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3598 }; 3599 static const MCPhysReg VR[] = { 3600 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3601 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3602 }; 3603 3604 const unsigned Num_GPR_Regs = array_lengthof(GPR); 3605 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13; 3606 const unsigned Num_VR_Regs = array_lengthof(VR); 3607 const unsigned Num_QFPR_Regs = Num_FPR_Regs; 3608 3609 // Do a first pass over the arguments to determine whether the ABI 3610 // guarantees that our caller has allocated the parameter save area 3611 // on its stack frame. In the ELFv1 ABI, this is always the case; 3612 // in the ELFv2 ABI, it is true if this is a vararg function or if 3613 // any parameter is located in a stack slot. 3614 3615 bool HasParameterArea = !isELFv2ABI || isVarArg; 3616 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; 3617 unsigned NumBytes = LinkageSize; 3618 unsigned AvailableFPRs = Num_FPR_Regs; 3619 unsigned AvailableVRs = Num_VR_Regs; 3620 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 3621 if (Ins[i].Flags.isNest()) 3622 continue; 3623 3624 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, 3625 PtrByteSize, LinkageSize, ParamAreaSize, 3626 NumBytes, AvailableFPRs, AvailableVRs, 3627 Subtarget.hasQPX())) 3628 HasParameterArea = true; 3629 } 3630 3631 // Add DAG nodes to load the arguments or copy them out of registers. On 3632 // entry to a function on PPC, the arguments start after the linkage area, 3633 // although the first ones are often in registers. 3634 3635 unsigned ArgOffset = LinkageSize; 3636 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3637 unsigned &QFPR_idx = FPR_idx; 3638 SmallVector<SDValue, 8> MemOps; 3639 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin(); 3640 unsigned CurArgIdx = 0; 3641 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 3642 SDValue ArgVal; 3643 bool needsLoad = false; 3644 EVT ObjectVT = Ins[ArgNo].VT; 3645 EVT OrigVT = Ins[ArgNo].ArgVT; 3646 unsigned ObjSize = ObjectVT.getStoreSize(); 3647 unsigned ArgSize = ObjSize; 3648 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3649 if (Ins[ArgNo].isOrigArg()) { 3650 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 3651 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 3652 } 3653 // We re-align the argument offset for each argument, except when using the 3654 // fast calling convention, when we need to make sure we do that only when 3655 // we'll actually use a stack slot. 3656 unsigned CurArgOffset, Align; 3657 auto ComputeArgOffset = [&]() { 3658 /* Respect alignment of argument on the stack. */ 3659 Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); 3660 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 3661 CurArgOffset = ArgOffset; 3662 }; 3663 3664 if (CallConv != CallingConv::Fast) { 3665 ComputeArgOffset(); 3666 3667 /* Compute GPR index associated with argument offset. */ 3668 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 3669 GPR_idx = std::min(GPR_idx, Num_GPR_Regs); 3670 } 3671 3672 // FIXME the codegen can be much improved in some cases. 3673 // We do not have to keep everything in memory. 3674 if (Flags.isByVal()) { 3675 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 3676 3677 if (CallConv == CallingConv::Fast) 3678 ComputeArgOffset(); 3679 3680 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 3681 ObjSize = Flags.getByValSize(); 3682 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3683 // Empty aggregate parameters do not take up registers. Examples: 3684 // struct { } a; 3685 // union { } b; 3686 // int c[0]; 3687 // etc. However, we have to provide a place-holder in InVals, so 3688 // pretend we have an 8-byte item at the current address for that 3689 // purpose. 3690 if (!ObjSize) { 3691 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true); 3692 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3693 InVals.push_back(FIN); 3694 continue; 3695 } 3696 3697 // Create a stack object covering all stack doublewords occupied 3698 // by the argument. If the argument is (fully or partially) on 3699 // the stack, or if the argument is fully in registers but the 3700 // caller has allocated the parameter save anyway, we can refer 3701 // directly to the caller's stack frame. Otherwise, create a 3702 // local copy in our own frame. 3703 int FI; 3704 if (HasParameterArea || 3705 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize) 3706 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true); 3707 else 3708 FI = MFI.CreateStackObject(ArgSize, Align, false); 3709 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3710 3711 // Handle aggregates smaller than 8 bytes. 3712 if (ObjSize < PtrByteSize) { 3713 // The value of the object is its address, which differs from the 3714 // address of the enclosing doubleword on big-endian systems. 3715 SDValue Arg = FIN; 3716 if (!isLittleEndian) { 3717 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT); 3718 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); 3719 } 3720 InVals.push_back(Arg); 3721 3722 if (GPR_idx != Num_GPR_Regs) { 3723 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3724 FuncInfo->addLiveInAttr(VReg, Flags); 3725 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3726 SDValue Store; 3727 3728 if (ObjSize==1 || ObjSize==2 || ObjSize==4) { 3729 EVT ObjType = (ObjSize == 1 ? MVT::i8 : 3730 (ObjSize == 2 ? MVT::i16 : MVT::i32)); 3731 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, 3732 MachinePointerInfo(&*FuncArg), ObjType); 3733 } else { 3734 // For sizes that don't fit a truncating store (3, 5, 6, 7), 3735 // store the whole register as-is to the parameter save area 3736 // slot. 3737 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3738 MachinePointerInfo(&*FuncArg)); 3739 } 3740 3741 MemOps.push_back(Store); 3742 } 3743 // Whether we copied from a register or not, advance the offset 3744 // into the parameter save area by a full doubleword. 3745 ArgOffset += PtrByteSize; 3746 continue; 3747 } 3748 3749 // The value of the object is its address, which is the address of 3750 // its first stack doubleword. 3751 InVals.push_back(FIN); 3752 3753 // Store whatever pieces of the object are in registers to memory. 3754 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3755 if (GPR_idx == Num_GPR_Regs) 3756 break; 3757 3758 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3759 FuncInfo->addLiveInAttr(VReg, Flags); 3760 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3761 SDValue Addr = FIN; 3762 if (j) { 3763 SDValue Off = DAG.getConstant(j, dl, PtrVT); 3764 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); 3765 } 3766 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, 3767 MachinePointerInfo(&*FuncArg, j)); 3768 MemOps.push_back(Store); 3769 ++GPR_idx; 3770 } 3771 ArgOffset += ArgSize; 3772 continue; 3773 } 3774 3775 switch (ObjectVT.getSimpleVT().SimpleTy) { 3776 default: llvm_unreachable("Unhandled argument type!"); 3777 case MVT::i1: 3778 case MVT::i32: 3779 case MVT::i64: 3780 if (Flags.isNest()) { 3781 // The 'nest' parameter, if any, is passed in R11. 3782 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass); 3783 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3784 3785 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3786 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3787 3788 break; 3789 } 3790 3791 // These can be scalar arguments or elements of an integer array type 3792 // passed directly. Clang may use those instead of "byval" aggregate 3793 // types to avoid forcing arguments to memory unnecessarily. 3794 if (GPR_idx != Num_GPR_Regs) { 3795 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3796 FuncInfo->addLiveInAttr(VReg, Flags); 3797 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3798 3799 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3800 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3801 // value to MVT::i64 and then truncate to the correct register size. 3802 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3803 } else { 3804 if (CallConv == CallingConv::Fast) 3805 ComputeArgOffset(); 3806 3807 needsLoad = true; 3808 ArgSize = PtrByteSize; 3809 } 3810 if (CallConv != CallingConv::Fast || needsLoad) 3811 ArgOffset += 8; 3812 break; 3813 3814 case MVT::f32: 3815 case MVT::f64: 3816 // These can be scalar arguments or elements of a float array type 3817 // passed directly. The latter are used to implement ELFv2 homogenous 3818 // float aggregates. 3819 if (FPR_idx != Num_FPR_Regs) { 3820 unsigned VReg; 3821 3822 if (ObjectVT == MVT::f32) 3823 VReg = MF.addLiveIn(FPR[FPR_idx], 3824 Subtarget.hasP8Vector() 3825 ? &PPC::VSSRCRegClass 3826 : &PPC::F4RCRegClass); 3827 else 3828 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() 3829 ? &PPC::VSFRCRegClass 3830 : &PPC::F8RCRegClass); 3831 3832 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3833 ++FPR_idx; 3834 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) { 3835 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 3836 // once we support fp <-> gpr moves. 3837 3838 // This can only ever happen in the presence of f32 array types, 3839 // since otherwise we never run out of FPRs before running out 3840 // of GPRs. 3841 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3842 FuncInfo->addLiveInAttr(VReg, Flags); 3843 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3844 3845 if (ObjectVT == MVT::f32) { 3846 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0)) 3847 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, 3848 DAG.getConstant(32, dl, MVT::i32)); 3849 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); 3850 } 3851 3852 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); 3853 } else { 3854 if (CallConv == CallingConv::Fast) 3855 ComputeArgOffset(); 3856 3857 needsLoad = true; 3858 } 3859 3860 // When passing an array of floats, the array occupies consecutive 3861 // space in the argument area; only round up to the next doubleword 3862 // at the end of the array. Otherwise, each float takes 8 bytes. 3863 if (CallConv != CallingConv::Fast || needsLoad) { 3864 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; 3865 ArgOffset += ArgSize; 3866 if (Flags.isInConsecutiveRegsLast()) 3867 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3868 } 3869 break; 3870 case MVT::v4f32: 3871 case MVT::v4i32: 3872 case MVT::v8i16: 3873 case MVT::v16i8: 3874 case MVT::v2f64: 3875 case MVT::v2i64: 3876 case MVT::v1i128: 3877 case MVT::f128: 3878 if (!Subtarget.hasQPX()) { 3879 // These can be scalar arguments or elements of a vector array type 3880 // passed directly. The latter are used to implement ELFv2 homogenous 3881 // vector aggregates. 3882 if (VR_idx != Num_VR_Regs) { 3883 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3884 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3885 ++VR_idx; 3886 } else { 3887 if (CallConv == CallingConv::Fast) 3888 ComputeArgOffset(); 3889 needsLoad = true; 3890 } 3891 if (CallConv != CallingConv::Fast || needsLoad) 3892 ArgOffset += 16; 3893 break; 3894 } // not QPX 3895 3896 assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && 3897 "Invalid QPX parameter type"); 3898 LLVM_FALLTHROUGH; 3899 3900 case MVT::v4f64: 3901 case MVT::v4i1: 3902 // QPX vectors are treated like their scalar floating-point subregisters 3903 // (except that they're larger). 3904 unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32; 3905 if (QFPR_idx != Num_QFPR_Regs) { 3906 const TargetRegisterClass *RC; 3907 switch (ObjectVT.getSimpleVT().SimpleTy) { 3908 case MVT::v4f64: RC = &PPC::QFRCRegClass; break; 3909 case MVT::v4f32: RC = &PPC::QSRCRegClass; break; 3910 default: RC = &PPC::QBRCRegClass; break; 3911 } 3912 3913 unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC); 3914 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3915 ++QFPR_idx; 3916 } else { 3917 if (CallConv == CallingConv::Fast) 3918 ComputeArgOffset(); 3919 needsLoad = true; 3920 } 3921 if (CallConv != CallingConv::Fast || needsLoad) 3922 ArgOffset += Sz; 3923 break; 3924 } 3925 3926 // We need to load the argument to a virtual register if we determined 3927 // above that we ran out of physical registers of the appropriate type. 3928 if (needsLoad) { 3929 if (ObjSize < ArgSize && !isLittleEndian) 3930 CurArgOffset += ArgSize - ObjSize; 3931 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable); 3932 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3933 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); 3934 } 3935 3936 InVals.push_back(ArgVal); 3937 } 3938 3939 // Area that is at least reserved in the caller of this function. 3940 unsigned MinReservedArea; 3941 if (HasParameterArea) 3942 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); 3943 else 3944 MinReservedArea = LinkageSize; 3945 3946 // Set the size that is at least reserved in caller of this function. Tail 3947 // call optimized functions' reserved stack space needs to be aligned so that 3948 // taking the difference between two stack areas will result in an aligned 3949 // stack. 3950 MinReservedArea = 3951 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3952 FuncInfo->setMinReservedArea(MinReservedArea); 3953 3954 // If the function takes variable number of arguments, make a frame index for 3955 // the start of the first vararg value... for expansion of llvm.va_start. 3956 if (isVarArg) { 3957 int Depth = ArgOffset; 3958 3959 FuncInfo->setVarArgsFrameIndex( 3960 MFI.CreateFixedObject(PtrByteSize, Depth, true)); 3961 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3962 3963 // If this function is vararg, store any remaining integer argument regs 3964 // to their spots on the stack so that they may be loaded by dereferencing 3965 // the result of va_next. 3966 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 3967 GPR_idx < Num_GPR_Regs; ++GPR_idx) { 3968 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3969 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3970 SDValue Store = 3971 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3972 MemOps.push_back(Store); 3973 // Increment the address by four for the next argument to store 3974 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT); 3975 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3976 } 3977 } 3978 3979 if (!MemOps.empty()) 3980 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3981 3982 return Chain; 3983 } 3984 3985 SDValue PPCTargetLowering::LowerFormalArguments_Darwin( 3986 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 3987 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 3988 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3989 // TODO: add description of PPC stack frame format, or at least some docs. 3990 // 3991 MachineFunction &MF = DAG.getMachineFunction(); 3992 MachineFrameInfo &MFI = MF.getFrameInfo(); 3993 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3994 3995 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3996 bool isPPC64 = PtrVT == MVT::i64; 3997 // Potential tail calls could cause overwriting of argument stack slots. 3998 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 3999 (CallConv == CallingConv::Fast)); 4000 unsigned PtrByteSize = isPPC64 ? 8 : 4; 4001 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 4002 unsigned ArgOffset = LinkageSize; 4003 // Area that is at least reserved in caller of this function. 4004 unsigned MinReservedArea = ArgOffset; 4005 4006 static const MCPhysReg GPR_32[] = { // 32-bit registers. 4007 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 4008 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 4009 }; 4010 static const MCPhysReg GPR_64[] = { // 64-bit registers. 4011 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4012 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4013 }; 4014 static const MCPhysReg VR[] = { 4015 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4016 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4017 }; 4018 4019 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 4020 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13; 4021 const unsigned Num_VR_Regs = array_lengthof( VR); 4022 4023 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 4024 4025 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 4026 4027 // In 32-bit non-varargs functions, the stack space for vectors is after the 4028 // stack space for non-vectors. We do not use this space unless we have 4029 // too many vectors to fit in registers, something that only occurs in 4030 // constructed examples:), but we have to walk the arglist to figure 4031 // that out...for the pathological case, compute VecArgOffset as the 4032 // start of the vector parameter area. Computing VecArgOffset is the 4033 // entire point of the following loop. 4034 unsigned VecArgOffset = ArgOffset; 4035 if (!isVarArg && !isPPC64) { 4036 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 4037 ++ArgNo) { 4038 EVT ObjectVT = Ins[ArgNo].VT; 4039 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 4040 4041 if (Flags.isByVal()) { 4042 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 4043 unsigned ObjSize = Flags.getByValSize(); 4044 unsigned ArgSize = 4045 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 4046 VecArgOffset += ArgSize; 4047 continue; 4048 } 4049 4050 switch(ObjectVT.getSimpleVT().SimpleTy) { 4051 default: llvm_unreachable("Unhandled argument type!"); 4052 case MVT::i1: 4053 case MVT::i32: 4054 case MVT::f32: 4055 VecArgOffset += 4; 4056 break; 4057 case MVT::i64: // PPC64 4058 case MVT::f64: 4059 // FIXME: We are guaranteed to be !isPPC64 at this point. 4060 // Does MVT::i64 apply? 4061 VecArgOffset += 8; 4062 break; 4063 case MVT::v4f32: 4064 case MVT::v4i32: 4065 case MVT::v8i16: 4066 case MVT::v16i8: 4067 // Nothing to do, we're only looking at Nonvector args here. 4068 break; 4069 } 4070 } 4071 } 4072 // We've found where the vector parameter area in memory is. Skip the 4073 // first 12 parameters; these don't use that memory. 4074 VecArgOffset = ((VecArgOffset+15)/16)*16; 4075 VecArgOffset += 12*16; 4076 4077 // Add DAG nodes to load the arguments or copy them out of registers. On 4078 // entry to a function on PPC, the arguments start after the linkage area, 4079 // although the first ones are often in registers. 4080 4081 SmallVector<SDValue, 8> MemOps; 4082 unsigned nAltivecParamsAtEnd = 0; 4083 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin(); 4084 unsigned CurArgIdx = 0; 4085 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 4086 SDValue ArgVal; 4087 bool needsLoad = false; 4088 EVT ObjectVT = Ins[ArgNo].VT; 4089 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 4090 unsigned ArgSize = ObjSize; 4091 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 4092 if (Ins[ArgNo].isOrigArg()) { 4093 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 4094 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 4095 } 4096 unsigned CurArgOffset = ArgOffset; 4097 4098 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 4099 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 4100 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 4101 if (isVarArg || isPPC64) { 4102 MinReservedArea = ((MinReservedArea+15)/16)*16; 4103 MinReservedArea += CalculateStackSlotSize(ObjectVT, 4104 Flags, 4105 PtrByteSize); 4106 } else nAltivecParamsAtEnd++; 4107 } else 4108 // Calculate min reserved area. 4109 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 4110 Flags, 4111 PtrByteSize); 4112 4113 // FIXME the codegen can be much improved in some cases. 4114 // We do not have to keep everything in memory. 4115 if (Flags.isByVal()) { 4116 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 4117 4118 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 4119 ObjSize = Flags.getByValSize(); 4120 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 4121 // Objects of size 1 and 2 are right justified, everything else is 4122 // left justified. This means the memory address is adjusted forwards. 4123 if (ObjSize==1 || ObjSize==2) { 4124 CurArgOffset = CurArgOffset + (4 - ObjSize); 4125 } 4126 // The value of the object is its address. 4127 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true); 4128 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 4129 InVals.push_back(FIN); 4130 if (ObjSize==1 || ObjSize==2) { 4131 if (GPR_idx != Num_GPR_Regs) { 4132 unsigned VReg; 4133 if (isPPC64) 4134 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 4135 else 4136 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 4137 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 4138 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; 4139 SDValue Store = 4140 DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 4141 MachinePointerInfo(&*FuncArg), ObjType); 4142 MemOps.push_back(Store); 4143 ++GPR_idx; 4144 } 4145 4146 ArgOffset += PtrByteSize; 4147 4148 continue; 4149 } 4150 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 4151 // Store whatever pieces of the object are in registers 4152 // to memory. ArgOffset will be the address of the beginning 4153 // of the object. 4154 if (GPR_idx != Num_GPR_Regs) { 4155 unsigned VReg; 4156 if (isPPC64) 4157 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 4158 else 4159 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 4160 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true); 4161 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 4162 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 4163 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 4164 MachinePointerInfo(&*FuncArg, j)); 4165 MemOps.push_back(Store); 4166 ++GPR_idx; 4167 ArgOffset += PtrByteSize; 4168 } else { 4169 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 4170 break; 4171 } 4172 } 4173 continue; 4174 } 4175 4176 switch (ObjectVT.getSimpleVT().SimpleTy) { 4177 default: llvm_unreachable("Unhandled argument type!"); 4178 case MVT::i1: 4179 case MVT::i32: 4180 if (!isPPC64) { 4181 if (GPR_idx != Num_GPR_Regs) { 4182 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 4183 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 4184 4185 if (ObjectVT == MVT::i1) 4186 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); 4187 4188 ++GPR_idx; 4189 } else { 4190 needsLoad = true; 4191 ArgSize = PtrByteSize; 4192 } 4193 // All int arguments reserve stack space in the Darwin ABI. 4194 ArgOffset += PtrByteSize; 4195 break; 4196 } 4197 LLVM_FALLTHROUGH; 4198 case MVT::i64: // PPC64 4199 if (GPR_idx != Num_GPR_Regs) { 4200 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 4201 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 4202 4203 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 4204 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 4205 // value to MVT::i64 and then truncate to the correct register size. 4206 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 4207 4208 ++GPR_idx; 4209 } else { 4210 needsLoad = true; 4211 ArgSize = PtrByteSize; 4212 } 4213 // All int arguments reserve stack space in the Darwin ABI. 4214 ArgOffset += 8; 4215 break; 4216 4217 case MVT::f32: 4218 case MVT::f64: 4219 // Every 4 bytes of argument space consumes one of the GPRs available for 4220 // argument passing. 4221 if (GPR_idx != Num_GPR_Regs) { 4222 ++GPR_idx; 4223 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 4224 ++GPR_idx; 4225 } 4226 if (FPR_idx != Num_FPR_Regs) { 4227 unsigned VReg; 4228 4229 if (ObjectVT == MVT::f32) 4230 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 4231 else 4232 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 4233 4234 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 4235 ++FPR_idx; 4236 } else { 4237 needsLoad = true; 4238 } 4239 4240 // All FP arguments reserve stack space in the Darwin ABI. 4241 ArgOffset += isPPC64 ? 8 : ObjSize; 4242 break; 4243 case MVT::v4f32: 4244 case MVT::v4i32: 4245 case MVT::v8i16: 4246 case MVT::v16i8: 4247 // Note that vector arguments in registers don't reserve stack space, 4248 // except in varargs functions. 4249 if (VR_idx != Num_VR_Regs) { 4250 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 4251 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 4252 if (isVarArg) { 4253 while ((ArgOffset % 16) != 0) { 4254 ArgOffset += PtrByteSize; 4255 if (GPR_idx != Num_GPR_Regs) 4256 GPR_idx++; 4257 } 4258 ArgOffset += 16; 4259 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 4260 } 4261 ++VR_idx; 4262 } else { 4263 if (!isVarArg && !isPPC64) { 4264 // Vectors go after all the nonvectors. 4265 CurArgOffset = VecArgOffset; 4266 VecArgOffset += 16; 4267 } else { 4268 // Vectors are aligned. 4269 ArgOffset = ((ArgOffset+15)/16)*16; 4270 CurArgOffset = ArgOffset; 4271 ArgOffset += 16; 4272 } 4273 needsLoad = true; 4274 } 4275 break; 4276 } 4277 4278 // We need to load the argument to a virtual register if we determined above 4279 // that we ran out of physical registers of the appropriate type. 4280 if (needsLoad) { 4281 int FI = MFI.CreateFixedObject(ObjSize, 4282 CurArgOffset + (ArgSize - ObjSize), 4283 isImmutable); 4284 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 4285 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); 4286 } 4287 4288 InVals.push_back(ArgVal); 4289 } 4290 4291 // Allow for Altivec parameters at the end, if needed. 4292 if (nAltivecParamsAtEnd) { 4293 MinReservedArea = ((MinReservedArea+15)/16)*16; 4294 MinReservedArea += 16*nAltivecParamsAtEnd; 4295 } 4296 4297 // Area that is at least reserved in the caller of this function. 4298 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); 4299 4300 // Set the size that is at least reserved in caller of this function. Tail 4301 // call optimized functions' reserved stack space needs to be aligned so that 4302 // taking the difference between two stack areas will result in an aligned 4303 // stack. 4304 MinReservedArea = 4305 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 4306 FuncInfo->setMinReservedArea(MinReservedArea); 4307 4308 // If the function takes variable number of arguments, make a frame index for 4309 // the start of the first vararg value... for expansion of llvm.va_start. 4310 if (isVarArg) { 4311 int Depth = ArgOffset; 4312 4313 FuncInfo->setVarArgsFrameIndex( 4314 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8, 4315 Depth, true)); 4316 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 4317 4318 // If this function is vararg, store any remaining integer argument regs 4319 // to their spots on the stack so that they may be loaded by dereferencing 4320 // the result of va_next. 4321 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 4322 unsigned VReg; 4323 4324 if (isPPC64) 4325 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 4326 else 4327 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 4328 4329 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 4330 SDValue Store = 4331 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 4332 MemOps.push_back(Store); 4333 // Increment the address by four for the next argument to store 4334 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); 4335 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 4336 } 4337 } 4338 4339 if (!MemOps.empty()) 4340 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 4341 4342 return Chain; 4343 } 4344 4345 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 4346 /// adjusted to accommodate the arguments for the tailcall. 4347 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 4348 unsigned ParamSize) { 4349 4350 if (!isTailCall) return 0; 4351 4352 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 4353 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 4354 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 4355 // Remember only if the new adjustment is bigger. 4356 if (SPDiff < FI->getTailCallSPDelta()) 4357 FI->setTailCallSPDelta(SPDiff); 4358 4359 return SPDiff; 4360 } 4361 4362 static bool isFunctionGlobalAddress(SDValue Callee); 4363 4364 static bool 4365 callsShareTOCBase(const Function *Caller, SDValue Callee, 4366 const TargetMachine &TM) { 4367 // If !G, Callee can be an external symbol. 4368 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 4369 if (!G) 4370 return false; 4371 4372 // The medium and large code models are expected to provide a sufficiently 4373 // large TOC to provide all data addressing needs of a module with a 4374 // single TOC. Since each module will be addressed with a single TOC then we 4375 // only need to check that caller and callee don't cross dso boundaries. 4376 if (CodeModel::Medium == TM.getCodeModel() || 4377 CodeModel::Large == TM.getCodeModel()) 4378 return TM.shouldAssumeDSOLocal(*Caller->getParent(), G->getGlobal()); 4379 4380 // Otherwise we need to ensure callee and caller are in the same section, 4381 // since the linker may allocate multiple TOCs, and we don't know which 4382 // sections will belong to the same TOC base. 4383 4384 const GlobalValue *GV = G->getGlobal(); 4385 if (!GV->isStrongDefinitionForLinker()) 4386 return false; 4387 4388 // Any explicitly-specified sections and section prefixes must also match. 4389 // Also, if we're using -ffunction-sections, then each function is always in 4390 // a different section (the same is true for COMDAT functions). 4391 if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() || 4392 GV->getSection() != Caller->getSection()) 4393 return false; 4394 if (const auto *F = dyn_cast<Function>(GV)) { 4395 if (F->getSectionPrefix() != Caller->getSectionPrefix()) 4396 return false; 4397 } 4398 4399 // If the callee might be interposed, then we can't assume the ultimate call 4400 // target will be in the same section. Even in cases where we can assume that 4401 // interposition won't happen, in any case where the linker might insert a 4402 // stub to allow for interposition, we must generate code as though 4403 // interposition might occur. To understand why this matters, consider a 4404 // situation where: a -> b -> c where the arrows indicate calls. b and c are 4405 // in the same section, but a is in a different module (i.e. has a different 4406 // TOC base pointer). If the linker allows for interposition between b and c, 4407 // then it will generate a stub for the call edge between b and c which will 4408 // save the TOC pointer into the designated stack slot allocated by b. If we 4409 // return true here, and therefore allow a tail call between b and c, that 4410 // stack slot won't exist and the b -> c stub will end up saving b'c TOC base 4411 // pointer into the stack slot allocated by a (where the a -> b stub saved 4412 // a's TOC base pointer). If we're not considering a tail call, but rather, 4413 // whether a nop is needed after the call instruction in b, because the linker 4414 // will insert a stub, it might complain about a missing nop if we omit it 4415 // (although many don't complain in this case). 4416 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV)) 4417 return false; 4418 4419 return true; 4420 } 4421 4422 static bool 4423 needStackSlotPassParameters(const PPCSubtarget &Subtarget, 4424 const SmallVectorImpl<ISD::OutputArg> &Outs) { 4425 assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64()); 4426 4427 const unsigned PtrByteSize = 8; 4428 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 4429 4430 static const MCPhysReg GPR[] = { 4431 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4432 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4433 }; 4434 static const MCPhysReg VR[] = { 4435 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4436 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4437 }; 4438 4439 const unsigned NumGPRs = array_lengthof(GPR); 4440 const unsigned NumFPRs = 13; 4441 const unsigned NumVRs = array_lengthof(VR); 4442 const unsigned ParamAreaSize = NumGPRs * PtrByteSize; 4443 4444 unsigned NumBytes = LinkageSize; 4445 unsigned AvailableFPRs = NumFPRs; 4446 unsigned AvailableVRs = NumVRs; 4447 4448 for (const ISD::OutputArg& Param : Outs) { 4449 if (Param.Flags.isNest()) continue; 4450 4451 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, 4452 PtrByteSize, LinkageSize, ParamAreaSize, 4453 NumBytes, AvailableFPRs, AvailableVRs, 4454 Subtarget.hasQPX())) 4455 return true; 4456 } 4457 return false; 4458 } 4459 4460 static bool 4461 hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) { 4462 if (CS.arg_size() != CallerFn->arg_size()) 4463 return false; 4464 4465 ImmutableCallSite::arg_iterator CalleeArgIter = CS.arg_begin(); 4466 ImmutableCallSite::arg_iterator CalleeArgEnd = CS.arg_end(); 4467 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin(); 4468 4469 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) { 4470 const Value* CalleeArg = *CalleeArgIter; 4471 const Value* CallerArg = &(*CallerArgIter); 4472 if (CalleeArg == CallerArg) 4473 continue; 4474 4475 // e.g. @caller([4 x i64] %a, [4 x i64] %b) { 4476 // tail call @callee([4 x i64] undef, [4 x i64] %b) 4477 // } 4478 // 1st argument of callee is undef and has the same type as caller. 4479 if (CalleeArg->getType() == CallerArg->getType() && 4480 isa<UndefValue>(CalleeArg)) 4481 continue; 4482 4483 return false; 4484 } 4485 4486 return true; 4487 } 4488 4489 // Returns true if TCO is possible between the callers and callees 4490 // calling conventions. 4491 static bool 4492 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, 4493 CallingConv::ID CalleeCC) { 4494 // Tail calls are possible with fastcc and ccc. 4495 auto isTailCallableCC = [] (CallingConv::ID CC){ 4496 return CC == CallingConv::C || CC == CallingConv::Fast; 4497 }; 4498 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC)) 4499 return false; 4500 4501 // We can safely tail call both fastcc and ccc callees from a c calling 4502 // convention caller. If the caller is fastcc, we may have less stack space 4503 // than a non-fastcc caller with the same signature so disable tail-calls in 4504 // that case. 4505 return CallerCC == CallingConv::C || CallerCC == CalleeCC; 4506 } 4507 4508 bool 4509 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( 4510 SDValue Callee, 4511 CallingConv::ID CalleeCC, 4512 ImmutableCallSite CS, 4513 bool isVarArg, 4514 const SmallVectorImpl<ISD::OutputArg> &Outs, 4515 const SmallVectorImpl<ISD::InputArg> &Ins, 4516 SelectionDAG& DAG) const { 4517 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; 4518 4519 if (DisableSCO && !TailCallOpt) return false; 4520 4521 // Variadic argument functions are not supported. 4522 if (isVarArg) return false; 4523 4524 auto &Caller = DAG.getMachineFunction().getFunction(); 4525 // Check that the calling conventions are compatible for tco. 4526 if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC)) 4527 return false; 4528 4529 // Caller contains any byval parameter is not supported. 4530 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); })) 4531 return false; 4532 4533 // Callee contains any byval parameter is not supported, too. 4534 // Note: This is a quick work around, because in some cases, e.g. 4535 // caller's stack size > callee's stack size, we are still able to apply 4536 // sibling call optimization. For example, gcc is able to do SCO for caller1 4537 // in the following example, but not for caller2. 4538 // struct test { 4539 // long int a; 4540 // char ary[56]; 4541 // } gTest; 4542 // __attribute__((noinline)) int callee(struct test v, struct test *b) { 4543 // b->a = v.a; 4544 // return 0; 4545 // } 4546 // void caller1(struct test a, struct test c, struct test *b) { 4547 // callee(gTest, b); } 4548 // void caller2(struct test *b) { callee(gTest, b); } 4549 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); })) 4550 return false; 4551 4552 // If callee and caller use different calling conventions, we cannot pass 4553 // parameters on stack since offsets for the parameter area may be different. 4554 if (Caller.getCallingConv() != CalleeCC && 4555 needStackSlotPassParameters(Subtarget, Outs)) 4556 return false; 4557 4558 // No TCO/SCO on indirect call because Caller have to restore its TOC 4559 if (!isFunctionGlobalAddress(Callee) && 4560 !isa<ExternalSymbolSDNode>(Callee)) 4561 return false; 4562 4563 // If the caller and callee potentially have different TOC bases then we 4564 // cannot tail call since we need to restore the TOC pointer after the call. 4565 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 4566 if (!callsShareTOCBase(&Caller, Callee, getTargetMachine())) 4567 return false; 4568 4569 // TCO allows altering callee ABI, so we don't have to check further. 4570 if (CalleeCC == CallingConv::Fast && TailCallOpt) 4571 return true; 4572 4573 if (DisableSCO) return false; 4574 4575 // If callee use the same argument list that caller is using, then we can 4576 // apply SCO on this case. If it is not, then we need to check if callee needs 4577 // stack for passing arguments. 4578 if (!hasSameArgumentList(&Caller, CS) && 4579 needStackSlotPassParameters(Subtarget, Outs)) { 4580 return false; 4581 } 4582 4583 return true; 4584 } 4585 4586 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 4587 /// for tail call optimization. Targets which want to do tail call 4588 /// optimization should implement this function. 4589 bool 4590 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 4591 CallingConv::ID CalleeCC, 4592 bool isVarArg, 4593 const SmallVectorImpl<ISD::InputArg> &Ins, 4594 SelectionDAG& DAG) const { 4595 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 4596 return false; 4597 4598 // Variable argument functions are not supported. 4599 if (isVarArg) 4600 return false; 4601 4602 MachineFunction &MF = DAG.getMachineFunction(); 4603 CallingConv::ID CallerCC = MF.getFunction().getCallingConv(); 4604 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 4605 // Functions containing by val parameters are not supported. 4606 for (unsigned i = 0; i != Ins.size(); i++) { 4607 ISD::ArgFlagsTy Flags = Ins[i].Flags; 4608 if (Flags.isByVal()) return false; 4609 } 4610 4611 // Non-PIC/GOT tail calls are supported. 4612 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 4613 return true; 4614 4615 // At the moment we can only do local tail calls (in same module, hidden 4616 // or protected) if we are generating PIC. 4617 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4618 return G->getGlobal()->hasHiddenVisibility() 4619 || G->getGlobal()->hasProtectedVisibility(); 4620 } 4621 4622 return false; 4623 } 4624 4625 /// isCallCompatibleAddress - Return the immediate to use if the specified 4626 /// 32-bit value is representable in the immediate field of a BxA instruction. 4627 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 4628 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 4629 if (!C) return nullptr; 4630 4631 int Addr = C->getZExtValue(); 4632 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 4633 SignExtend32<26>(Addr) != Addr) 4634 return nullptr; // Top 6 bits have to be sext of immediate. 4635 4636 return DAG 4637 .getConstant( 4638 (int)C->getZExtValue() >> 2, SDLoc(Op), 4639 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())) 4640 .getNode(); 4641 } 4642 4643 namespace { 4644 4645 struct TailCallArgumentInfo { 4646 SDValue Arg; 4647 SDValue FrameIdxOp; 4648 int FrameIdx = 0; 4649 4650 TailCallArgumentInfo() = default; 4651 }; 4652 4653 } // end anonymous namespace 4654 4655 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 4656 static void StoreTailCallArgumentsToStackSlot( 4657 SelectionDAG &DAG, SDValue Chain, 4658 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs, 4659 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) { 4660 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 4661 SDValue Arg = TailCallArgs[i].Arg; 4662 SDValue FIN = TailCallArgs[i].FrameIdxOp; 4663 int FI = TailCallArgs[i].FrameIdx; 4664 // Store relative to framepointer. 4665 MemOpChains.push_back(DAG.getStore( 4666 Chain, dl, Arg, FIN, 4667 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); 4668 } 4669 } 4670 4671 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 4672 /// the appropriate stack slot for the tail call optimized function call. 4673 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, 4674 SDValue OldRetAddr, SDValue OldFP, 4675 int SPDiff, const SDLoc &dl) { 4676 if (SPDiff) { 4677 // Calculate the new stack slot for the return address. 4678 MachineFunction &MF = DAG.getMachineFunction(); 4679 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); 4680 const PPCFrameLowering *FL = Subtarget.getFrameLowering(); 4681 bool isPPC64 = Subtarget.isPPC64(); 4682 int SlotSize = isPPC64 ? 8 : 4; 4683 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset(); 4684 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize, 4685 NewRetAddrLoc, true); 4686 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 4687 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 4688 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, 4689 MachinePointerInfo::getFixedStack(MF, NewRetAddr)); 4690 4691 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 4692 // slot as the FP is never overwritten. 4693 if (Subtarget.isDarwinABI()) { 4694 int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset(); 4695 int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc, 4696 true); 4697 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 4698 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, 4699 MachinePointerInfo::getFixedStack( 4700 DAG.getMachineFunction(), NewFPIdx)); 4701 } 4702 } 4703 return Chain; 4704 } 4705 4706 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 4707 /// the position of the argument. 4708 static void 4709 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 4710 SDValue Arg, int SPDiff, unsigned ArgOffset, 4711 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) { 4712 int Offset = ArgOffset + SPDiff; 4713 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8; 4714 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); 4715 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 4716 SDValue FIN = DAG.getFrameIndex(FI, VT); 4717 TailCallArgumentInfo Info; 4718 Info.Arg = Arg; 4719 Info.FrameIdxOp = FIN; 4720 Info.FrameIdx = FI; 4721 TailCallArguments.push_back(Info); 4722 } 4723 4724 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 4725 /// stack slot. Returns the chain as result and the loaded frame pointers in 4726 /// LROpOut/FPOpout. Used when tail calling. 4727 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr( 4728 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut, 4729 SDValue &FPOpOut, const SDLoc &dl) const { 4730 if (SPDiff) { 4731 // Load the LR and FP stack slot for later adjusting. 4732 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; 4733 LROpOut = getReturnAddrFrameIndex(DAG); 4734 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo()); 4735 Chain = SDValue(LROpOut.getNode(), 1); 4736 4737 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 4738 // slot as the FP is never overwritten. 4739 if (Subtarget.isDarwinABI()) { 4740 FPOpOut = getFramePointerFrameIndex(DAG); 4741 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo()); 4742 Chain = SDValue(FPOpOut.getNode(), 1); 4743 } 4744 } 4745 return Chain; 4746 } 4747 4748 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 4749 /// by "Src" to address "Dst" of size "Size". Alignment information is 4750 /// specified by the specific parameter attribute. The copy will be passed as 4751 /// a byval function parameter. 4752 /// Sometimes what we are copying is the end of a larger object, the part that 4753 /// does not fit in registers. 4754 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, 4755 SDValue Chain, ISD::ArgFlagsTy Flags, 4756 SelectionDAG &DAG, const SDLoc &dl) { 4757 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); 4758 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 4759 false, false, false, MachinePointerInfo(), 4760 MachinePointerInfo()); 4761 } 4762 4763 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 4764 /// tail calls. 4765 static void LowerMemOpCallTo( 4766 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, 4767 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, 4768 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains, 4769 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) { 4770 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 4771 if (!isTailCall) { 4772 if (isVector) { 4773 SDValue StackPtr; 4774 if (isPPC64) 4775 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4776 else 4777 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4778 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 4779 DAG.getConstant(ArgOffset, dl, PtrVT)); 4780 } 4781 MemOpChains.push_back( 4782 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); 4783 // Calculate and remember argument location. 4784 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 4785 TailCallArguments); 4786 } 4787 4788 static void 4789 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 4790 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, 4791 SDValue FPOp, 4792 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) { 4793 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 4794 // might overwrite each other in case of tail call optimization. 4795 SmallVector<SDValue, 8> MemOpChains2; 4796 // Do not flag preceding copytoreg stuff together with the following stuff. 4797 InFlag = SDValue(); 4798 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 4799 MemOpChains2, dl); 4800 if (!MemOpChains2.empty()) 4801 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); 4802 4803 // Store the return address to the appropriate stack slot. 4804 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl); 4805 4806 // Emit callseq_end just before tailcall node. 4807 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 4808 DAG.getIntPtrConstant(0, dl, true), InFlag, dl); 4809 InFlag = Chain.getValue(1); 4810 } 4811 4812 // Is this global address that of a function that can be called by name? (as 4813 // opposed to something that must hold a descriptor for an indirect call). 4814 static bool isFunctionGlobalAddress(SDValue Callee) { 4815 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 4816 if (Callee.getOpcode() == ISD::GlobalTLSAddress || 4817 Callee.getOpcode() == ISD::TargetGlobalTLSAddress) 4818 return false; 4819 4820 return G->getGlobal()->getValueType()->isFunctionTy(); 4821 } 4822 4823 return false; 4824 } 4825 4826 static unsigned 4827 PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, 4828 SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall, 4829 bool isPatchPoint, bool hasNest, 4830 SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, 4831 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, 4832 ImmutableCallSite CS, const PPCSubtarget &Subtarget) { 4833 bool isPPC64 = Subtarget.isPPC64(); 4834 bool isSVR4ABI = Subtarget.isSVR4ABI(); 4835 bool isELFv2ABI = Subtarget.isELFv2ABI(); 4836 4837 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 4838 NodeTys.push_back(MVT::Other); // Returns a chain 4839 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 4840 4841 unsigned CallOpc = PPCISD::CALL; 4842 4843 bool needIndirectCall = true; 4844 if (!isSVR4ABI || !isPPC64) 4845 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 4846 // If this is an absolute destination address, use the munged value. 4847 Callee = SDValue(Dest, 0); 4848 needIndirectCall = false; 4849 } 4850 4851 // PC-relative references to external symbols should go through $stub, unless 4852 // we're building with the leopard linker or later, which automatically 4853 // synthesizes these stubs. 4854 const TargetMachine &TM = DAG.getTarget(); 4855 const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); 4856 const GlobalValue *GV = nullptr; 4857 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4858 GV = G->getGlobal(); 4859 bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); 4860 bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64; 4861 4862 if (isFunctionGlobalAddress(Callee)) { 4863 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); 4864 // A call to a TLS address is actually an indirect call to a 4865 // thread-specific pointer. 4866 unsigned OpFlags = 0; 4867 if (UsePlt) 4868 OpFlags = PPCII::MO_PLT; 4869 4870 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 4871 // every direct call is) turn it into a TargetGlobalAddress / 4872 // TargetExternalSymbol node so that legalize doesn't hack it. 4873 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 4874 Callee.getValueType(), 0, OpFlags); 4875 needIndirectCall = false; 4876 } 4877 4878 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 4879 unsigned char OpFlags = 0; 4880 4881 if (UsePlt) 4882 OpFlags = PPCII::MO_PLT; 4883 4884 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 4885 OpFlags); 4886 needIndirectCall = false; 4887 } 4888 4889 if (isPatchPoint) { 4890 // We'll form an invalid direct call when lowering a patchpoint; the full 4891 // sequence for an indirect call is complicated, and many of the 4892 // instructions introduced might have side effects (and, thus, can't be 4893 // removed later). The call itself will be removed as soon as the 4894 // argument/return lowering is complete, so the fact that it has the wrong 4895 // kind of operands should not really matter. 4896 needIndirectCall = false; 4897 } 4898 4899 if (needIndirectCall) { 4900 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 4901 // to do the call, we can't use PPCISD::CALL. 4902 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 4903 4904 if (isSVR4ABI && isPPC64 && !isELFv2ABI) { 4905 // Function pointers in the 64-bit SVR4 ABI do not point to the function 4906 // entry point, but to the function descriptor (the function entry point 4907 // address is part of the function descriptor though). 4908 // The function descriptor is a three doubleword structure with the 4909 // following fields: function entry point, TOC base address and 4910 // environment pointer. 4911 // Thus for a call through a function pointer, the following actions need 4912 // to be performed: 4913 // 1. Save the TOC of the caller in the TOC save area of its stack 4914 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). 4915 // 2. Load the address of the function entry point from the function 4916 // descriptor. 4917 // 3. Load the TOC of the callee from the function descriptor into r2. 4918 // 4. Load the environment pointer from the function descriptor into 4919 // r11. 4920 // 5. Branch to the function entry point address. 4921 // 6. On return of the callee, the TOC of the caller needs to be 4922 // restored (this is done in FinishCall()). 4923 // 4924 // The loads are scheduled at the beginning of the call sequence, and the 4925 // register copies are flagged together to ensure that no other 4926 // operations can be scheduled in between. E.g. without flagging the 4927 // copies together, a TOC access in the caller could be scheduled between 4928 // the assignment of the callee TOC and the branch to the callee, which 4929 // results in the TOC access going through the TOC of the callee instead 4930 // of going through the TOC of the caller, which leads to incorrect code. 4931 4932 // Load the address of the function entry point from the function 4933 // descriptor. 4934 SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1); 4935 if (LDChain.getValueType() == MVT::Glue) 4936 LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2); 4937 4938 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() 4939 ? (MachineMemOperand::MODereferenceable | 4940 MachineMemOperand::MOInvariant) 4941 : MachineMemOperand::MONone; 4942 4943 MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr); 4944 SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, 4945 /* Alignment = */ 8, MMOFlags); 4946 4947 // Load environment pointer into r11. 4948 SDValue PtrOff = DAG.getIntPtrConstant(16, dl); 4949 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 4950 SDValue LoadEnvPtr = 4951 DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16), 4952 /* Alignment = */ 8, MMOFlags); 4953 4954 SDValue TOCOff = DAG.getIntPtrConstant(8, dl); 4955 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); 4956 SDValue TOCPtr = 4957 DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8), 4958 /* Alignment = */ 8, MMOFlags); 4959 4960 setUsesTOCBasePtr(DAG); 4961 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, 4962 InFlag); 4963 Chain = TOCVal.getValue(0); 4964 InFlag = TOCVal.getValue(1); 4965 4966 // If the function call has an explicit 'nest' parameter, it takes the 4967 // place of the environment pointer. 4968 if (!hasNest) { 4969 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 4970 InFlag); 4971 4972 Chain = EnvVal.getValue(0); 4973 InFlag = EnvVal.getValue(1); 4974 } 4975 4976 MTCTROps[0] = Chain; 4977 MTCTROps[1] = LoadFuncPtr; 4978 MTCTROps[2] = InFlag; 4979 } 4980 4981 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, 4982 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); 4983 InFlag = Chain.getValue(1); 4984 4985 NodeTys.clear(); 4986 NodeTys.push_back(MVT::Other); 4987 NodeTys.push_back(MVT::Glue); 4988 Ops.push_back(Chain); 4989 CallOpc = PPCISD::BCTRL; 4990 Callee.setNode(nullptr); 4991 // Add use of X11 (holding environment pointer) 4992 if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest) 4993 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); 4994 // Add CTR register as callee so a bctr can be emitted later. 4995 if (isTailCall) 4996 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 4997 } 4998 4999 // If this is a direct call, pass the chain and the callee. 5000 if (Callee.getNode()) { 5001 Ops.push_back(Chain); 5002 Ops.push_back(Callee); 5003 } 5004 // If this is a tail call add stack pointer delta. 5005 if (isTailCall) 5006 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32)); 5007 5008 // Add argument registers to the end of the list so that they are known live 5009 // into the call. 5010 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 5011 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 5012 RegsToPass[i].second.getValueType())); 5013 5014 // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live 5015 // into the call. 5016 // We do need to reserve X2 to appease the verifier for the PATCHPOINT. 5017 if (isSVR4ABI && isPPC64) { 5018 setUsesTOCBasePtr(DAG); 5019 5020 // We cannot add X2 as an operand here for PATCHPOINT, because there is no 5021 // way to mark dependencies as implicit here. We will add the X2 dependency 5022 // in EmitInstrWithCustomInserter. 5023 if (!isPatchPoint) 5024 Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); 5025 } 5026 5027 return CallOpc; 5028 } 5029 5030 SDValue PPCTargetLowering::LowerCallResult( 5031 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, 5032 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 5033 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 5034 SmallVector<CCValAssign, 16> RVLocs; 5035 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 5036 *DAG.getContext()); 5037 5038 CCRetInfo.AnalyzeCallResult( 5039 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold) 5040 ? RetCC_PPC_Cold 5041 : RetCC_PPC); 5042 5043 // Copy all of the result registers out of their specified physreg. 5044 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 5045 CCValAssign &VA = RVLocs[i]; 5046 assert(VA.isRegLoc() && "Can only return in registers!"); 5047 5048 SDValue Val = DAG.getCopyFromReg(Chain, dl, 5049 VA.getLocReg(), VA.getLocVT(), InFlag); 5050 Chain = Val.getValue(1); 5051 InFlag = Val.getValue(2); 5052 5053 switch (VA.getLocInfo()) { 5054 default: llvm_unreachable("Unknown loc info!"); 5055 case CCValAssign::Full: break; 5056 case CCValAssign::AExt: 5057 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 5058 break; 5059 case CCValAssign::ZExt: 5060 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, 5061 DAG.getValueType(VA.getValVT())); 5062 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 5063 break; 5064 case CCValAssign::SExt: 5065 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, 5066 DAG.getValueType(VA.getValVT())); 5067 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 5068 break; 5069 } 5070 5071 InVals.push_back(Val); 5072 } 5073 5074 return Chain; 5075 } 5076 5077 SDValue PPCTargetLowering::FinishCall( 5078 CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg, 5079 bool isPatchPoint, bool hasNest, SelectionDAG &DAG, 5080 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag, 5081 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, 5082 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, 5083 SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const { 5084 std::vector<EVT> NodeTys; 5085 SmallVector<SDValue, 8> Ops; 5086 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, 5087 SPDiff, isTailCall, isPatchPoint, hasNest, 5088 RegsToPass, Ops, NodeTys, CS, Subtarget); 5089 5090 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls 5091 if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) 5092 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); 5093 5094 // When performing tail call optimization the callee pops its arguments off 5095 // the stack. Account for this here so these bytes can be pushed back on in 5096 // PPCFrameLowering::eliminateCallFramePseudoInstr. 5097 int BytesCalleePops = 5098 (CallConv == CallingConv::Fast && 5099 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 5100 5101 // Add a register mask operand representing the call-preserved registers. 5102 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 5103 const uint32_t *Mask = 5104 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); 5105 assert(Mask && "Missing call preserved mask for calling convention"); 5106 Ops.push_back(DAG.getRegisterMask(Mask)); 5107 5108 if (InFlag.getNode()) 5109 Ops.push_back(InFlag); 5110 5111 // Emit tail call. 5112 if (isTailCall) { 5113 assert(((Callee.getOpcode() == ISD::Register && 5114 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 5115 Callee.getOpcode() == ISD::TargetExternalSymbol || 5116 Callee.getOpcode() == ISD::TargetGlobalAddress || 5117 isa<ConstantSDNode>(Callee)) && 5118 "Expecting an global address, external symbol, absolute value or register"); 5119 5120 DAG.getMachineFunction().getFrameInfo().setHasTailCall(); 5121 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); 5122 } 5123 5124 // Add a NOP immediately after the branch instruction when using the 64-bit 5125 // SVR4 ABI. At link time, if caller and callee are in a different module and 5126 // thus have a different TOC, the call will be replaced with a call to a stub 5127 // function which saves the current TOC, loads the TOC of the callee and 5128 // branches to the callee. The NOP will be replaced with a load instruction 5129 // which restores the TOC of the caller from the TOC save slot of the current 5130 // stack frame. If caller and callee belong to the same module (and have the 5131 // same TOC), the NOP will remain unchanged. 5132 5133 MachineFunction &MF = DAG.getMachineFunction(); 5134 if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && 5135 !isPatchPoint) { 5136 if (CallOpc == PPCISD::BCTRL) { 5137 // This is a call through a function pointer. 5138 // Restore the caller TOC from the save area into R2. 5139 // See PrepareCall() for more information about calls through function 5140 // pointers in the 64-bit SVR4 ABI. 5141 // We are using a target-specific load with r2 hard coded, because the 5142 // result of a target-independent load would never go directly into r2, 5143 // since r2 is a reserved register (which prevents the register allocator 5144 // from allocating it), resulting in an additional register being 5145 // allocated and an unnecessary move instruction being generated. 5146 CallOpc = PPCISD::BCTRL_LOAD_TOC; 5147 5148 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5149 SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); 5150 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); 5151 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); 5152 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); 5153 5154 // The address needs to go after the chain input but before the flag (or 5155 // any other variadic arguments). 5156 Ops.insert(std::next(Ops.begin()), AddTOC); 5157 } else if (CallOpc == PPCISD::CALL && 5158 !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) { 5159 // Otherwise insert NOP for non-local calls. 5160 CallOpc = PPCISD::CALL_NOP; 5161 } 5162 } 5163 5164 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); 5165 InFlag = Chain.getValue(1); 5166 5167 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 5168 DAG.getIntPtrConstant(BytesCalleePops, dl, true), 5169 InFlag, dl); 5170 if (!Ins.empty()) 5171 InFlag = Chain.getValue(1); 5172 5173 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 5174 Ins, dl, DAG, InVals); 5175 } 5176 5177 SDValue 5178 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 5179 SmallVectorImpl<SDValue> &InVals) const { 5180 SelectionDAG &DAG = CLI.DAG; 5181 SDLoc &dl = CLI.DL; 5182 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 5183 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 5184 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 5185 SDValue Chain = CLI.Chain; 5186 SDValue Callee = CLI.Callee; 5187 bool &isTailCall = CLI.IsTailCall; 5188 CallingConv::ID CallConv = CLI.CallConv; 5189 bool isVarArg = CLI.IsVarArg; 5190 bool isPatchPoint = CLI.IsPatchPoint; 5191 ImmutableCallSite CS = CLI.CS; 5192 5193 if (isTailCall) { 5194 if (Subtarget.useLongCalls() && !(CS && CS.isMustTailCall())) 5195 isTailCall = false; 5196 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) 5197 isTailCall = 5198 IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS, 5199 isVarArg, Outs, Ins, DAG); 5200 else 5201 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 5202 Ins, DAG); 5203 if (isTailCall) { 5204 ++NumTailCalls; 5205 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 5206 ++NumSiblingCalls; 5207 5208 assert(isa<GlobalAddressSDNode>(Callee) && 5209 "Callee should be an llvm::Function object."); 5210 LLVM_DEBUG( 5211 const GlobalValue *GV = 5212 cast<GlobalAddressSDNode>(Callee)->getGlobal(); 5213 const unsigned Width = 5214 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0"); 5215 dbgs() << "TCO caller: " 5216 << left_justify(DAG.getMachineFunction().getName(), Width) 5217 << ", callee linkage: " << GV->getVisibility() << ", " 5218 << GV->getLinkage() << "\n"); 5219 } 5220 } 5221 5222 if (!isTailCall && CS && CS.isMustTailCall()) 5223 report_fatal_error("failed to perform tail call elimination on a call " 5224 "site marked musttail"); 5225 5226 // When long calls (i.e. indirect calls) are always used, calls are always 5227 // made via function pointer. If we have a function name, first translate it 5228 // into a pointer. 5229 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) && 5230 !isTailCall) 5231 Callee = LowerGlobalAddress(Callee, DAG); 5232 5233 if (Subtarget.isSVR4ABI()) { 5234 if (Subtarget.isPPC64()) 5235 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, 5236 isTailCall, isPatchPoint, Outs, OutVals, Ins, 5237 dl, DAG, InVals, CS); 5238 else 5239 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, 5240 isTailCall, isPatchPoint, Outs, OutVals, Ins, 5241 dl, DAG, InVals, CS); 5242 } 5243 5244 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 5245 isTailCall, isPatchPoint, Outs, OutVals, Ins, 5246 dl, DAG, InVals, CS); 5247 } 5248 5249 SDValue PPCTargetLowering::LowerCall_32SVR4( 5250 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 5251 bool isTailCall, bool isPatchPoint, 5252 const SmallVectorImpl<ISD::OutputArg> &Outs, 5253 const SmallVectorImpl<SDValue> &OutVals, 5254 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 5255 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 5256 ImmutableCallSite CS) const { 5257 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description 5258 // of the 32-bit SVR4 ABI stack frame layout. 5259 5260 assert((CallConv == CallingConv::C || 5261 CallConv == CallingConv::Cold || 5262 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 5263 5264 unsigned PtrByteSize = 4; 5265 5266 MachineFunction &MF = DAG.getMachineFunction(); 5267 5268 // Mark this function as potentially containing a function that contains a 5269 // tail call. As a consequence the frame pointer will be used for dynamicalloc 5270 // and restoring the callers stack pointer in this functions epilog. This is 5271 // done because by tail calling the called function might overwrite the value 5272 // in this function's (MF) stack pointer stack slot 0(SP). 5273 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5274 CallConv == CallingConv::Fast) 5275 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 5276 5277 // Count how many bytes are to be pushed on the stack, including the linkage 5278 // area, parameter list area and the part of the local variable space which 5279 // contains copies of aggregates which are passed by value. 5280 5281 // Assign locations to all of the outgoing arguments. 5282 SmallVector<CCValAssign, 16> ArgLocs; 5283 PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); 5284 5285 // Reserve space for the linkage area on the stack. 5286 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(), 5287 PtrByteSize); 5288 if (useSoftFloat()) 5289 CCInfo.PreAnalyzeCallOperands(Outs); 5290 5291 if (isVarArg) { 5292 // Handle fixed and variable vector arguments differently. 5293 // Fixed vector arguments go into registers as long as registers are 5294 // available. Variable vector arguments always go into memory. 5295 unsigned NumArgs = Outs.size(); 5296 5297 for (unsigned i = 0; i != NumArgs; ++i) { 5298 MVT ArgVT = Outs[i].VT; 5299 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 5300 bool Result; 5301 5302 if (Outs[i].IsFixed) { 5303 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 5304 CCInfo); 5305 } else { 5306 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 5307 ArgFlags, CCInfo); 5308 } 5309 5310 if (Result) { 5311 #ifndef NDEBUG 5312 errs() << "Call operand #" << i << " has unhandled type " 5313 << EVT(ArgVT).getEVTString() << "\n"; 5314 #endif 5315 llvm_unreachable(nullptr); 5316 } 5317 } 5318 } else { 5319 // All arguments are treated the same. 5320 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); 5321 } 5322 CCInfo.clearWasPPCF128(); 5323 5324 // Assign locations to all of the outgoing aggregate by value arguments. 5325 SmallVector<CCValAssign, 16> ByValArgLocs; 5326 CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext()); 5327 5328 // Reserve stack space for the allocations in CCInfo. 5329 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 5330 5331 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); 5332 5333 // Size of the linkage area, parameter list area and the part of the local 5334 // space variable where copies of aggregates which are passed by value are 5335 // stored. 5336 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 5337 5338 // Calculate by how many bytes the stack has to be adjusted in case of tail 5339 // call optimization. 5340 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 5341 5342 // Adjust the stack pointer for the new arguments... 5343 // These operations are automatically eliminated by the prolog/epilog pass 5344 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); 5345 SDValue CallSeqStart = Chain; 5346 5347 // Load the return address and frame pointer so it can be moved somewhere else 5348 // later. 5349 SDValue LROp, FPOp; 5350 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 5351 5352 // Set up a copy of the stack pointer for use loading and storing any 5353 // arguments that may not fit in the registers available for argument 5354 // passing. 5355 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 5356 5357 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 5358 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 5359 SmallVector<SDValue, 8> MemOpChains; 5360 5361 bool seenFloatArg = false; 5362 // Walk the register/memloc assignments, inserting copies/loads. 5363 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 5364 i != e; 5365 ++i) { 5366 CCValAssign &VA = ArgLocs[i]; 5367 SDValue Arg = OutVals[i]; 5368 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5369 5370 if (Flags.isByVal()) { 5371 // Argument is an aggregate which is passed by value, thus we need to 5372 // create a copy of it in the local variable space of the current stack 5373 // frame (which is the stack frame of the caller) and pass the address of 5374 // this copy to the callee. 5375 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 5376 CCValAssign &ByValVA = ByValArgLocs[j++]; 5377 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 5378 5379 // Memory reserved in the local variable space of the callers stack frame. 5380 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 5381 5382 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 5383 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), 5384 StackPtr, PtrOff); 5385 5386 // Create a copy of the argument in the local area of the current 5387 // stack frame. 5388 SDValue MemcpyCall = 5389 CreateCopyOfByValArgument(Arg, PtrOff, 5390 CallSeqStart.getNode()->getOperand(0), 5391 Flags, DAG, dl); 5392 5393 // This must go outside the CALLSEQ_START..END. 5394 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0, 5395 SDLoc(MemcpyCall)); 5396 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 5397 NewCallSeqStart.getNode()); 5398 Chain = CallSeqStart = NewCallSeqStart; 5399 5400 // Pass the address of the aggregate copy on the stack either in a 5401 // physical register or in the parameter list area of the current stack 5402 // frame to the callee. 5403 Arg = PtrOff; 5404 } 5405 5406 // When useCRBits() is true, there can be i1 arguments. 5407 // It is because getRegisterType(MVT::i1) => MVT::i1, 5408 // and for other integer types getRegisterType() => MVT::i32. 5409 // Extend i1 and ensure callee will get i32. 5410 if (Arg.getValueType() == MVT::i1) 5411 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, 5412 dl, MVT::i32, Arg); 5413 5414 if (VA.isRegLoc()) { 5415 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 5416 // Put argument in a physical register. 5417 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 5418 } else { 5419 // Put argument in the parameter list area of the current stack frame. 5420 assert(VA.isMemLoc()); 5421 unsigned LocMemOffset = VA.getLocMemOffset(); 5422 5423 if (!isTailCall) { 5424 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 5425 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), 5426 StackPtr, PtrOff); 5427 5428 MemOpChains.push_back( 5429 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); 5430 } else { 5431 // Calculate and remember argument location. 5432 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 5433 TailCallArguments); 5434 } 5435 } 5436 } 5437 5438 if (!MemOpChains.empty()) 5439 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5440 5441 // Build a sequence of copy-to-reg nodes chained together with token chain 5442 // and flag operands which copy the outgoing args into the appropriate regs. 5443 SDValue InFlag; 5444 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5445 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5446 RegsToPass[i].second, InFlag); 5447 InFlag = Chain.getValue(1); 5448 } 5449 5450 // Set CR bit 6 to true if this is a vararg call with floating args passed in 5451 // registers. 5452 if (isVarArg) { 5453 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 5454 SDValue Ops[] = { Chain, InFlag }; 5455 5456 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, 5457 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); 5458 5459 InFlag = Chain.getValue(1); 5460 } 5461 5462 if (isTailCall) 5463 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 5464 TailCallArguments); 5465 5466 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, 5467 /* unused except on PPC64 ELFv1 */ false, DAG, 5468 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 5469 NumBytes, Ins, InVals, CS); 5470 } 5471 5472 // Copy an argument into memory, being careful to do this outside the 5473 // call sequence for the call to which the argument belongs. 5474 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq( 5475 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags, 5476 SelectionDAG &DAG, const SDLoc &dl) const { 5477 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 5478 CallSeqStart.getNode()->getOperand(0), 5479 Flags, DAG, dl); 5480 // The MEMCPY must go outside the CALLSEQ_START..END. 5481 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1); 5482 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0, 5483 SDLoc(MemcpyCall)); 5484 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 5485 NewCallSeqStart.getNode()); 5486 return NewCallSeqStart; 5487 } 5488 5489 SDValue PPCTargetLowering::LowerCall_64SVR4( 5490 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 5491 bool isTailCall, bool isPatchPoint, 5492 const SmallVectorImpl<ISD::OutputArg> &Outs, 5493 const SmallVectorImpl<SDValue> &OutVals, 5494 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 5495 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 5496 ImmutableCallSite CS) const { 5497 bool isELFv2ABI = Subtarget.isELFv2ABI(); 5498 bool isLittleEndian = Subtarget.isLittleEndian(); 5499 unsigned NumOps = Outs.size(); 5500 bool hasNest = false; 5501 bool IsSibCall = false; 5502 5503 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5504 unsigned PtrByteSize = 8; 5505 5506 MachineFunction &MF = DAG.getMachineFunction(); 5507 5508 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt) 5509 IsSibCall = true; 5510 5511 // Mark this function as potentially containing a function that contains a 5512 // tail call. As a consequence the frame pointer will be used for dynamicalloc 5513 // and restoring the callers stack pointer in this functions epilog. This is 5514 // done because by tail calling the called function might overwrite the value 5515 // in this function's (MF) stack pointer stack slot 0(SP). 5516 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5517 CallConv == CallingConv::Fast) 5518 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 5519 5520 assert(!(CallConv == CallingConv::Fast && isVarArg) && 5521 "fastcc not supported on varargs functions"); 5522 5523 // Count how many bytes are to be pushed on the stack, including the linkage 5524 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes 5525 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage 5526 // area is 32 bytes reserved space for [SP][CR][LR][TOC]. 5527 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 5528 unsigned NumBytes = LinkageSize; 5529 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 5530 unsigned &QFPR_idx = FPR_idx; 5531 5532 static const MCPhysReg GPR[] = { 5533 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 5534 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 5535 }; 5536 static const MCPhysReg VR[] = { 5537 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 5538 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 5539 }; 5540 5541 const unsigned NumGPRs = array_lengthof(GPR); 5542 const unsigned NumFPRs = useSoftFloat() ? 0 : 13; 5543 const unsigned NumVRs = array_lengthof(VR); 5544 const unsigned NumQFPRs = NumFPRs; 5545 5546 // On ELFv2, we can avoid allocating the parameter area if all the arguments 5547 // can be passed to the callee in registers. 5548 // For the fast calling convention, there is another check below. 5549 // Note: We should keep consistent with LowerFormalArguments_64SVR4() 5550 bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast; 5551 if (!HasParameterArea) { 5552 unsigned ParamAreaSize = NumGPRs * PtrByteSize; 5553 unsigned AvailableFPRs = NumFPRs; 5554 unsigned AvailableVRs = NumVRs; 5555 unsigned NumBytesTmp = NumBytes; 5556 for (unsigned i = 0; i != NumOps; ++i) { 5557 if (Outs[i].Flags.isNest()) continue; 5558 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags, 5559 PtrByteSize, LinkageSize, ParamAreaSize, 5560 NumBytesTmp, AvailableFPRs, AvailableVRs, 5561 Subtarget.hasQPX())) 5562 HasParameterArea = true; 5563 } 5564 } 5565 5566 // When using the fast calling convention, we don't provide backing for 5567 // arguments that will be in registers. 5568 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0; 5569 5570 // Avoid allocating parameter area for fastcc functions if all the arguments 5571 // can be passed in the registers. 5572 if (CallConv == CallingConv::Fast) 5573 HasParameterArea = false; 5574 5575 // Add up all the space actually used. 5576 for (unsigned i = 0; i != NumOps; ++i) { 5577 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5578 EVT ArgVT = Outs[i].VT; 5579 EVT OrigVT = Outs[i].ArgVT; 5580 5581 if (Flags.isNest()) 5582 continue; 5583 5584 if (CallConv == CallingConv::Fast) { 5585 if (Flags.isByVal()) { 5586 NumGPRsUsed += (Flags.getByValSize()+7)/8; 5587 if (NumGPRsUsed > NumGPRs) 5588 HasParameterArea = true; 5589 } else { 5590 switch (ArgVT.getSimpleVT().SimpleTy) { 5591 default: llvm_unreachable("Unexpected ValueType for argument!"); 5592 case MVT::i1: 5593 case MVT::i32: 5594 case MVT::i64: 5595 if (++NumGPRsUsed <= NumGPRs) 5596 continue; 5597 break; 5598 case MVT::v4i32: 5599 case MVT::v8i16: 5600 case MVT::v16i8: 5601 case MVT::v2f64: 5602 case MVT::v2i64: 5603 case MVT::v1i128: 5604 case MVT::f128: 5605 if (++NumVRsUsed <= NumVRs) 5606 continue; 5607 break; 5608 case MVT::v4f32: 5609 // When using QPX, this is handled like a FP register, otherwise, it 5610 // is an Altivec register. 5611 if (Subtarget.hasQPX()) { 5612 if (++NumFPRsUsed <= NumFPRs) 5613 continue; 5614 } else { 5615 if (++NumVRsUsed <= NumVRs) 5616 continue; 5617 } 5618 break; 5619 case MVT::f32: 5620 case MVT::f64: 5621 case MVT::v4f64: // QPX 5622 case MVT::v4i1: // QPX 5623 if (++NumFPRsUsed <= NumFPRs) 5624 continue; 5625 break; 5626 } 5627 HasParameterArea = true; 5628 } 5629 } 5630 5631 /* Respect alignment of argument on the stack. */ 5632 unsigned Align = 5633 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 5634 NumBytes = ((NumBytes + Align - 1) / Align) * Align; 5635 5636 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 5637 if (Flags.isInConsecutiveRegsLast()) 5638 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 5639 } 5640 5641 unsigned NumBytesActuallyUsed = NumBytes; 5642 5643 // In the old ELFv1 ABI, 5644 // the prolog code of the callee may store up to 8 GPR argument registers to 5645 // the stack, allowing va_start to index over them in memory if its varargs. 5646 // Because we cannot tell if this is needed on the caller side, we have to 5647 // conservatively assume that it is needed. As such, make sure we have at 5648 // least enough stack space for the caller to store the 8 GPRs. 5649 // In the ELFv2 ABI, we allocate the parameter area iff a callee 5650 // really requires memory operands, e.g. a vararg function. 5651 if (HasParameterArea) 5652 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 5653 else 5654 NumBytes = LinkageSize; 5655 5656 // Tail call needs the stack to be aligned. 5657 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5658 CallConv == CallingConv::Fast) 5659 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); 5660 5661 int SPDiff = 0; 5662 5663 // Calculate by how many bytes the stack has to be adjusted in case of tail 5664 // call optimization. 5665 if (!IsSibCall) 5666 SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 5667 5668 // To protect arguments on the stack from being clobbered in a tail call, 5669 // force all the loads to happen before doing any other lowering. 5670 if (isTailCall) 5671 Chain = DAG.getStackArgumentTokenFactor(Chain); 5672 5673 // Adjust the stack pointer for the new arguments... 5674 // These operations are automatically eliminated by the prolog/epilog pass 5675 if (!IsSibCall) 5676 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); 5677 SDValue CallSeqStart = Chain; 5678 5679 // Load the return address and frame pointer so it can be move somewhere else 5680 // later. 5681 SDValue LROp, FPOp; 5682 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 5683 5684 // Set up a copy of the stack pointer for use loading and storing any 5685 // arguments that may not fit in the registers available for argument 5686 // passing. 5687 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 5688 5689 // Figure out which arguments are going to go in registers, and which in 5690 // memory. Also, if this is a vararg function, floating point operations 5691 // must be stored to our stack, and loaded into integer regs as well, if 5692 // any integer regs are available for argument passing. 5693 unsigned ArgOffset = LinkageSize; 5694 5695 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 5696 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 5697 5698 SmallVector<SDValue, 8> MemOpChains; 5699 for (unsigned i = 0; i != NumOps; ++i) { 5700 SDValue Arg = OutVals[i]; 5701 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5702 EVT ArgVT = Outs[i].VT; 5703 EVT OrigVT = Outs[i].ArgVT; 5704 5705 // PtrOff will be used to store the current argument to the stack if a 5706 // register cannot be found for it. 5707 SDValue PtrOff; 5708 5709 // We re-align the argument offset for each argument, except when using the 5710 // fast calling convention, when we need to make sure we do that only when 5711 // we'll actually use a stack slot. 5712 auto ComputePtrOff = [&]() { 5713 /* Respect alignment of argument on the stack. */ 5714 unsigned Align = 5715 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 5716 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 5717 5718 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); 5719 5720 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5721 }; 5722 5723 if (CallConv != CallingConv::Fast) { 5724 ComputePtrOff(); 5725 5726 /* Compute GPR index associated with argument offset. */ 5727 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 5728 GPR_idx = std::min(GPR_idx, NumGPRs); 5729 } 5730 5731 // Promote integers to 64-bit values. 5732 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { 5733 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 5734 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 5735 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 5736 } 5737 5738 // FIXME memcpy is used way more than necessary. Correctness first. 5739 // Note: "by value" is code for passing a structure by value, not 5740 // basic types. 5741 if (Flags.isByVal()) { 5742 // Note: Size includes alignment padding, so 5743 // struct x { short a; char b; } 5744 // will have Size = 4. With #pragma pack(1), it will have Size = 3. 5745 // These are the proper values we need for right-justifying the 5746 // aggregate in a parameter register. 5747 unsigned Size = Flags.getByValSize(); 5748 5749 // An empty aggregate parameter takes up no storage and no 5750 // registers. 5751 if (Size == 0) 5752 continue; 5753 5754 if (CallConv == CallingConv::Fast) 5755 ComputePtrOff(); 5756 5757 // All aggregates smaller than 8 bytes must be passed right-justified. 5758 if (Size==1 || Size==2 || Size==4) { 5759 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); 5760 if (GPR_idx != NumGPRs) { 5761 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 5762 MachinePointerInfo(), VT); 5763 MemOpChains.push_back(Load.getValue(1)); 5764 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5765 5766 ArgOffset += PtrByteSize; 5767 continue; 5768 } 5769 } 5770 5771 if (GPR_idx == NumGPRs && Size < 8) { 5772 SDValue AddPtr = PtrOff; 5773 if (!isLittleEndian) { 5774 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, 5775 PtrOff.getValueType()); 5776 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5777 } 5778 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5779 CallSeqStart, 5780 Flags, DAG, dl); 5781 ArgOffset += PtrByteSize; 5782 continue; 5783 } 5784 // Copy entire object into memory. There are cases where gcc-generated 5785 // code assumes it is there, even if it could be put entirely into 5786 // registers. (This is not what the doc says.) 5787 5788 // FIXME: The above statement is likely due to a misunderstanding of the 5789 // documents. All arguments must be copied into the parameter area BY 5790 // THE CALLEE in the event that the callee takes the address of any 5791 // formal argument. That has not yet been implemented. However, it is 5792 // reasonable to use the stack area as a staging area for the register 5793 // load. 5794 5795 // Skip this for small aggregates, as we will use the same slot for a 5796 // right-justified copy, below. 5797 if (Size >= 8) 5798 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 5799 CallSeqStart, 5800 Flags, DAG, dl); 5801 5802 // When a register is available, pass a small aggregate right-justified. 5803 if (Size < 8 && GPR_idx != NumGPRs) { 5804 // The easiest way to get this right-justified in a register 5805 // is to copy the structure into the rightmost portion of a 5806 // local variable slot, then load the whole slot into the 5807 // register. 5808 // FIXME: The memcpy seems to produce pretty awful code for 5809 // small aggregates, particularly for packed ones. 5810 // FIXME: It would be preferable to use the slot in the 5811 // parameter save area instead of a new local variable. 5812 SDValue AddPtr = PtrOff; 5813 if (!isLittleEndian) { 5814 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType()); 5815 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5816 } 5817 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5818 CallSeqStart, 5819 Flags, DAG, dl); 5820 5821 // Load the slot into the register. 5822 SDValue Load = 5823 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo()); 5824 MemOpChains.push_back(Load.getValue(1)); 5825 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5826 5827 // Done with this argument. 5828 ArgOffset += PtrByteSize; 5829 continue; 5830 } 5831 5832 // For aggregates larger than PtrByteSize, copy the pieces of the 5833 // object that fit into registers from the parameter save area. 5834 for (unsigned j=0; j<Size; j+=PtrByteSize) { 5835 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); 5836 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 5837 if (GPR_idx != NumGPRs) { 5838 SDValue Load = 5839 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo()); 5840 MemOpChains.push_back(Load.getValue(1)); 5841 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5842 ArgOffset += PtrByteSize; 5843 } else { 5844 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 5845 break; 5846 } 5847 } 5848 continue; 5849 } 5850 5851 switch (Arg.getSimpleValueType().SimpleTy) { 5852 default: llvm_unreachable("Unexpected ValueType for argument!"); 5853 case MVT::i1: 5854 case MVT::i32: 5855 case MVT::i64: 5856 if (Flags.isNest()) { 5857 // The 'nest' parameter, if any, is passed in R11. 5858 RegsToPass.push_back(std::make_pair(PPC::X11, Arg)); 5859 hasNest = true; 5860 break; 5861 } 5862 5863 // These can be scalar arguments or elements of an integer array type 5864 // passed directly. Clang may use those instead of "byval" aggregate 5865 // types to avoid forcing arguments to memory unnecessarily. 5866 if (GPR_idx != NumGPRs) { 5867 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 5868 } else { 5869 if (CallConv == CallingConv::Fast) 5870 ComputePtrOff(); 5871 5872 assert(HasParameterArea && 5873 "Parameter area must exist to pass an argument in memory."); 5874 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5875 true, isTailCall, false, MemOpChains, 5876 TailCallArguments, dl); 5877 if (CallConv == CallingConv::Fast) 5878 ArgOffset += PtrByteSize; 5879 } 5880 if (CallConv != CallingConv::Fast) 5881 ArgOffset += PtrByteSize; 5882 break; 5883 case MVT::f32: 5884 case MVT::f64: { 5885 // These can be scalar arguments or elements of a float array type 5886 // passed directly. The latter are used to implement ELFv2 homogenous 5887 // float aggregates. 5888 5889 // Named arguments go into FPRs first, and once they overflow, the 5890 // remaining arguments go into GPRs and then the parameter save area. 5891 // Unnamed arguments for vararg functions always go to GPRs and 5892 // then the parameter save area. For now, put all arguments to vararg 5893 // routines always in both locations (FPR *and* GPR or stack slot). 5894 bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs; 5895 bool NeededLoad = false; 5896 5897 // First load the argument into the next available FPR. 5898 if (FPR_idx != NumFPRs) 5899 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 5900 5901 // Next, load the argument into GPR or stack slot if needed. 5902 if (!NeedGPROrStack) 5903 ; 5904 else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) { 5905 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 5906 // once we support fp <-> gpr moves. 5907 5908 // In the non-vararg case, this can only ever happen in the 5909 // presence of f32 array types, since otherwise we never run 5910 // out of FPRs before running out of GPRs. 5911 SDValue ArgVal; 5912 5913 // Double values are always passed in a single GPR. 5914 if (Arg.getValueType() != MVT::f32) { 5915 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); 5916 5917 // Non-array float values are extended and passed in a GPR. 5918 } else if (!Flags.isInConsecutiveRegs()) { 5919 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5920 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 5921 5922 // If we have an array of floats, we collect every odd element 5923 // together with its predecessor into one GPR. 5924 } else if (ArgOffset % PtrByteSize != 0) { 5925 SDValue Lo, Hi; 5926 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); 5927 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5928 if (!isLittleEndian) 5929 std::swap(Lo, Hi); 5930 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 5931 5932 // The final element, if even, goes into the first half of a GPR. 5933 } else if (Flags.isInConsecutiveRegsLast()) { 5934 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5935 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 5936 if (!isLittleEndian) 5937 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, 5938 DAG.getConstant(32, dl, MVT::i32)); 5939 5940 // Non-final even elements are skipped; they will be handled 5941 // together the with subsequent argument on the next go-around. 5942 } else 5943 ArgVal = SDValue(); 5944 5945 if (ArgVal.getNode()) 5946 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal)); 5947 } else { 5948 if (CallConv == CallingConv::Fast) 5949 ComputePtrOff(); 5950 5951 // Single-precision floating-point values are mapped to the 5952 // second (rightmost) word of the stack doubleword. 5953 if (Arg.getValueType() == MVT::f32 && 5954 !isLittleEndian && !Flags.isInConsecutiveRegs()) { 5955 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); 5956 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 5957 } 5958 5959 assert(HasParameterArea && 5960 "Parameter area must exist to pass an argument in memory."); 5961 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5962 true, isTailCall, false, MemOpChains, 5963 TailCallArguments, dl); 5964 5965 NeededLoad = true; 5966 } 5967 // When passing an array of floats, the array occupies consecutive 5968 // space in the argument area; only round up to the next doubleword 5969 // at the end of the array. Otherwise, each float takes 8 bytes. 5970 if (CallConv != CallingConv::Fast || NeededLoad) { 5971 ArgOffset += (Arg.getValueType() == MVT::f32 && 5972 Flags.isInConsecutiveRegs()) ? 4 : 8; 5973 if (Flags.isInConsecutiveRegsLast()) 5974 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 5975 } 5976 break; 5977 } 5978 case MVT::v4f32: 5979 case MVT::v4i32: 5980 case MVT::v8i16: 5981 case MVT::v16i8: 5982 case MVT::v2f64: 5983 case MVT::v2i64: 5984 case MVT::v1i128: 5985 case MVT::f128: 5986 if (!Subtarget.hasQPX()) { 5987 // These can be scalar arguments or elements of a vector array type 5988 // passed directly. The latter are used to implement ELFv2 homogenous 5989 // vector aggregates. 5990 5991 // For a varargs call, named arguments go into VRs or on the stack as 5992 // usual; unnamed arguments always go to the stack or the corresponding 5993 // GPRs when within range. For now, we always put the value in both 5994 // locations (or even all three). 5995 if (isVarArg) { 5996 assert(HasParameterArea && 5997 "Parameter area must exist if we have a varargs call."); 5998 // We could elide this store in the case where the object fits 5999 // entirely in R registers. Maybe later. 6000 SDValue Store = 6001 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 6002 MemOpChains.push_back(Store); 6003 if (VR_idx != NumVRs) { 6004 SDValue Load = 6005 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); 6006 MemOpChains.push_back(Load.getValue(1)); 6007 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 6008 } 6009 ArgOffset += 16; 6010 for (unsigned i=0; i<16; i+=PtrByteSize) { 6011 if (GPR_idx == NumGPRs) 6012 break; 6013 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 6014 DAG.getConstant(i, dl, PtrVT)); 6015 SDValue Load = 6016 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 6017 MemOpChains.push_back(Load.getValue(1)); 6018 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 6019 } 6020 break; 6021 } 6022 6023 // Non-varargs Altivec params go into VRs or on the stack. 6024 if (VR_idx != NumVRs) { 6025 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 6026 } else { 6027 if (CallConv == CallingConv::Fast) 6028 ComputePtrOff(); 6029 6030 assert(HasParameterArea && 6031 "Parameter area must exist to pass an argument in memory."); 6032 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 6033 true, isTailCall, true, MemOpChains, 6034 TailCallArguments, dl); 6035 if (CallConv == CallingConv::Fast) 6036 ArgOffset += 16; 6037 } 6038 6039 if (CallConv != CallingConv::Fast) 6040 ArgOffset += 16; 6041 break; 6042 } // not QPX 6043 6044 assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 && 6045 "Invalid QPX parameter type"); 6046 6047 LLVM_FALLTHROUGH; 6048 case MVT::v4f64: 6049 case MVT::v4i1: { 6050 bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32; 6051 if (isVarArg) { 6052 assert(HasParameterArea && 6053 "Parameter area must exist if we have a varargs call."); 6054 // We could elide this store in the case where the object fits 6055 // entirely in R registers. Maybe later. 6056 SDValue Store = 6057 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 6058 MemOpChains.push_back(Store); 6059 if (QFPR_idx != NumQFPRs) { 6060 SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store, 6061 PtrOff, MachinePointerInfo()); 6062 MemOpChains.push_back(Load.getValue(1)); 6063 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load)); 6064 } 6065 ArgOffset += (IsF32 ? 16 : 32); 6066 for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) { 6067 if (GPR_idx == NumGPRs) 6068 break; 6069 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 6070 DAG.getConstant(i, dl, PtrVT)); 6071 SDValue Load = 6072 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 6073 MemOpChains.push_back(Load.getValue(1)); 6074 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 6075 } 6076 break; 6077 } 6078 6079 // Non-varargs QPX params go into registers or on the stack. 6080 if (QFPR_idx != NumQFPRs) { 6081 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg)); 6082 } else { 6083 if (CallConv == CallingConv::Fast) 6084 ComputePtrOff(); 6085 6086 assert(HasParameterArea && 6087 "Parameter area must exist to pass an argument in memory."); 6088 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 6089 true, isTailCall, true, MemOpChains, 6090 TailCallArguments, dl); 6091 if (CallConv == CallingConv::Fast) 6092 ArgOffset += (IsF32 ? 16 : 32); 6093 } 6094 6095 if (CallConv != CallingConv::Fast) 6096 ArgOffset += (IsF32 ? 16 : 32); 6097 break; 6098 } 6099 } 6100 } 6101 6102 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && 6103 "mismatch in size of parameter area"); 6104 (void)NumBytesActuallyUsed; 6105 6106 if (!MemOpChains.empty()) 6107 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 6108 6109 // Check if this is an indirect call (MTCTR/BCTRL). 6110 // See PrepareCall() for more information about calls through function 6111 // pointers in the 64-bit SVR4 ABI. 6112 if (!isTailCall && !isPatchPoint && 6113 !isFunctionGlobalAddress(Callee) && 6114 !isa<ExternalSymbolSDNode>(Callee)) { 6115 // Load r2 into a virtual register and store it to the TOC save area. 6116 setUsesTOCBasePtr(DAG); 6117 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 6118 // TOC save area offset. 6119 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); 6120 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); 6121 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 6122 Chain = DAG.getStore( 6123 Val.getValue(1), dl, Val, AddPtr, 6124 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset)); 6125 // In the ELFv2 ABI, R12 must contain the address of an indirect callee. 6126 // This does not mean the MTCTR instruction must use R12; it's easier 6127 // to model this as an extra parameter, so do that. 6128 if (isELFv2ABI && !isPatchPoint) 6129 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); 6130 } 6131 6132 // Build a sequence of copy-to-reg nodes chained together with token chain 6133 // and flag operands which copy the outgoing args into the appropriate regs. 6134 SDValue InFlag; 6135 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 6136 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 6137 RegsToPass[i].second, InFlag); 6138 InFlag = Chain.getValue(1); 6139 } 6140 6141 if (isTailCall && !IsSibCall) 6142 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 6143 TailCallArguments); 6144 6145 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest, 6146 DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, 6147 SPDiff, NumBytes, Ins, InVals, CS); 6148 } 6149 6150 SDValue PPCTargetLowering::LowerCall_Darwin( 6151 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 6152 bool isTailCall, bool isPatchPoint, 6153 const SmallVectorImpl<ISD::OutputArg> &Outs, 6154 const SmallVectorImpl<SDValue> &OutVals, 6155 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 6156 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 6157 ImmutableCallSite CS) const { 6158 unsigned NumOps = Outs.size(); 6159 6160 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6161 bool isPPC64 = PtrVT == MVT::i64; 6162 unsigned PtrByteSize = isPPC64 ? 8 : 4; 6163 6164 MachineFunction &MF = DAG.getMachineFunction(); 6165 6166 // Mark this function as potentially containing a function that contains a 6167 // tail call. As a consequence the frame pointer will be used for dynamicalloc 6168 // and restoring the callers stack pointer in this functions epilog. This is 6169 // done because by tail calling the called function might overwrite the value 6170 // in this function's (MF) stack pointer stack slot 0(SP). 6171 if (getTargetMachine().Options.GuaranteedTailCallOpt && 6172 CallConv == CallingConv::Fast) 6173 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 6174 6175 // Count how many bytes are to be pushed on the stack, including the linkage 6176 // area, and parameter passing area. We start with 24/48 bytes, which is 6177 // prereserved space for [SP][CR][LR][3 x unused]. 6178 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 6179 unsigned NumBytes = LinkageSize; 6180 6181 // Add up all the space actually used. 6182 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 6183 // they all go in registers, but we must reserve stack space for them for 6184 // possible use by the caller. In varargs or 64-bit calls, parameters are 6185 // assigned stack space in order, with padding so Altivec parameters are 6186 // 16-byte aligned. 6187 unsigned nAltivecParamsAtEnd = 0; 6188 for (unsigned i = 0; i != NumOps; ++i) { 6189 ISD::ArgFlagsTy Flags = Outs[i].Flags; 6190 EVT ArgVT = Outs[i].VT; 6191 // Varargs Altivec parameters are padded to a 16 byte boundary. 6192 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 6193 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 6194 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) { 6195 if (!isVarArg && !isPPC64) { 6196 // Non-varargs Altivec parameters go after all the non-Altivec 6197 // parameters; handle those later so we know how much padding we need. 6198 nAltivecParamsAtEnd++; 6199 continue; 6200 } 6201 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 6202 NumBytes = ((NumBytes+15)/16)*16; 6203 } 6204 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 6205 } 6206 6207 // Allow for Altivec parameters at the end, if needed. 6208 if (nAltivecParamsAtEnd) { 6209 NumBytes = ((NumBytes+15)/16)*16; 6210 NumBytes += 16*nAltivecParamsAtEnd; 6211 } 6212 6213 // The prolog code of the callee may store up to 8 GPR argument registers to 6214 // the stack, allowing va_start to index over them in memory if its varargs. 6215 // Because we cannot tell if this is needed on the caller side, we have to 6216 // conservatively assume that it is needed. As such, make sure we have at 6217 // least enough stack space for the caller to store the 8 GPRs. 6218 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 6219 6220 // Tail call needs the stack to be aligned. 6221 if (getTargetMachine().Options.GuaranteedTailCallOpt && 6222 CallConv == CallingConv::Fast) 6223 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); 6224 6225 // Calculate by how many bytes the stack has to be adjusted in case of tail 6226 // call optimization. 6227 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 6228 6229 // To protect arguments on the stack from being clobbered in a tail call, 6230 // force all the loads to happen before doing any other lowering. 6231 if (isTailCall) 6232 Chain = DAG.getStackArgumentTokenFactor(Chain); 6233 6234 // Adjust the stack pointer for the new arguments... 6235 // These operations are automatically eliminated by the prolog/epilog pass 6236 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); 6237 SDValue CallSeqStart = Chain; 6238 6239 // Load the return address and frame pointer so it can be move somewhere else 6240 // later. 6241 SDValue LROp, FPOp; 6242 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 6243 6244 // Set up a copy of the stack pointer for use loading and storing any 6245 // arguments that may not fit in the registers available for argument 6246 // passing. 6247 SDValue StackPtr; 6248 if (isPPC64) 6249 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 6250 else 6251 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 6252 6253 // Figure out which arguments are going to go in registers, and which in 6254 // memory. Also, if this is a vararg function, floating point operations 6255 // must be stored to our stack, and loaded into integer regs as well, if 6256 // any integer regs are available for argument passing. 6257 unsigned ArgOffset = LinkageSize; 6258 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 6259 6260 static const MCPhysReg GPR_32[] = { // 32-bit registers. 6261 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 6262 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 6263 }; 6264 static const MCPhysReg GPR_64[] = { // 64-bit registers. 6265 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 6266 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 6267 }; 6268 static const MCPhysReg VR[] = { 6269 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 6270 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 6271 }; 6272 const unsigned NumGPRs = array_lengthof(GPR_32); 6273 const unsigned NumFPRs = 13; 6274 const unsigned NumVRs = array_lengthof(VR); 6275 6276 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 6277 6278 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 6279 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 6280 6281 SmallVector<SDValue, 8> MemOpChains; 6282 for (unsigned i = 0; i != NumOps; ++i) { 6283 SDValue Arg = OutVals[i]; 6284 ISD::ArgFlagsTy Flags = Outs[i].Flags; 6285 6286 // PtrOff will be used to store the current argument to the stack if a 6287 // register cannot be found for it. 6288 SDValue PtrOff; 6289 6290 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); 6291 6292 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 6293 6294 // On PPC64, promote integers to 64-bit values. 6295 if (isPPC64 && Arg.getValueType() == MVT::i32) { 6296 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 6297 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 6298 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 6299 } 6300 6301 // FIXME memcpy is used way more than necessary. Correctness first. 6302 // Note: "by value" is code for passing a structure by value, not 6303 // basic types. 6304 if (Flags.isByVal()) { 6305 unsigned Size = Flags.getByValSize(); 6306 // Very small objects are passed right-justified. Everything else is 6307 // passed left-justified. 6308 if (Size==1 || Size==2) { 6309 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 6310 if (GPR_idx != NumGPRs) { 6311 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 6312 MachinePointerInfo(), VT); 6313 MemOpChains.push_back(Load.getValue(1)); 6314 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 6315 6316 ArgOffset += PtrByteSize; 6317 } else { 6318 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, 6319 PtrOff.getValueType()); 6320 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 6321 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 6322 CallSeqStart, 6323 Flags, DAG, dl); 6324 ArgOffset += PtrByteSize; 6325 } 6326 continue; 6327 } 6328 // Copy entire object into memory. There are cases where gcc-generated 6329 // code assumes it is there, even if it could be put entirely into 6330 // registers. (This is not what the doc says.) 6331 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 6332 CallSeqStart, 6333 Flags, DAG, dl); 6334 6335 // For small aggregates (Darwin only) and aggregates >= PtrByteSize, 6336 // copy the pieces of the object that fit into registers from the 6337 // parameter save area. 6338 for (unsigned j=0; j<Size; j+=PtrByteSize) { 6339 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); 6340 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 6341 if (GPR_idx != NumGPRs) { 6342 SDValue Load = 6343 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo()); 6344 MemOpChains.push_back(Load.getValue(1)); 6345 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 6346 ArgOffset += PtrByteSize; 6347 } else { 6348 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 6349 break; 6350 } 6351 } 6352 continue; 6353 } 6354 6355 switch (Arg.getSimpleValueType().SimpleTy) { 6356 default: llvm_unreachable("Unexpected ValueType for argument!"); 6357 case MVT::i1: 6358 case MVT::i32: 6359 case MVT::i64: 6360 if (GPR_idx != NumGPRs) { 6361 if (Arg.getValueType() == MVT::i1) 6362 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg); 6363 6364 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 6365 } else { 6366 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 6367 isPPC64, isTailCall, false, MemOpChains, 6368 TailCallArguments, dl); 6369 } 6370 ArgOffset += PtrByteSize; 6371 break; 6372 case MVT::f32: 6373 case MVT::f64: 6374 if (FPR_idx != NumFPRs) { 6375 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 6376 6377 if (isVarArg) { 6378 SDValue Store = 6379 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 6380 MemOpChains.push_back(Store); 6381 6382 // Float varargs are always shadowed in available integer registers 6383 if (GPR_idx != NumGPRs) { 6384 SDValue Load = 6385 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo()); 6386 MemOpChains.push_back(Load.getValue(1)); 6387 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 6388 } 6389 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 6390 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); 6391 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 6392 SDValue Load = 6393 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo()); 6394 MemOpChains.push_back(Load.getValue(1)); 6395 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 6396 } 6397 } else { 6398 // If we have any FPRs remaining, we may also have GPRs remaining. 6399 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 6400 // GPRs. 6401 if (GPR_idx != NumGPRs) 6402 ++GPR_idx; 6403 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 6404 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 6405 ++GPR_idx; 6406 } 6407 } else 6408 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 6409 isPPC64, isTailCall, false, MemOpChains, 6410 TailCallArguments, dl); 6411 if (isPPC64) 6412 ArgOffset += 8; 6413 else 6414 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 6415 break; 6416 case MVT::v4f32: 6417 case MVT::v4i32: 6418 case MVT::v8i16: 6419 case MVT::v16i8: 6420 if (isVarArg) { 6421 // These go aligned on the stack, or in the corresponding R registers 6422 // when within range. The Darwin PPC ABI doc claims they also go in 6423 // V registers; in fact gcc does this only for arguments that are 6424 // prototyped, not for those that match the ... We do it for all 6425 // arguments, seems to work. 6426 while (ArgOffset % 16 !=0) { 6427 ArgOffset += PtrByteSize; 6428 if (GPR_idx != NumGPRs) 6429 GPR_idx++; 6430 } 6431 // We could elide this store in the case where the object fits 6432 // entirely in R registers. Maybe later. 6433 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 6434 DAG.getConstant(ArgOffset, dl, PtrVT)); 6435 SDValue Store = 6436 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 6437 MemOpChains.push_back(Store); 6438 if (VR_idx != NumVRs) { 6439 SDValue Load = 6440 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); 6441 MemOpChains.push_back(Load.getValue(1)); 6442 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 6443 } 6444 ArgOffset += 16; 6445 for (unsigned i=0; i<16; i+=PtrByteSize) { 6446 if (GPR_idx == NumGPRs) 6447 break; 6448 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 6449 DAG.getConstant(i, dl, PtrVT)); 6450 SDValue Load = 6451 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 6452 MemOpChains.push_back(Load.getValue(1)); 6453 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 6454 } 6455 break; 6456 } 6457 6458 // Non-varargs Altivec params generally go in registers, but have 6459 // stack space allocated at the end. 6460 if (VR_idx != NumVRs) { 6461 // Doesn't have GPR space allocated. 6462 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 6463 } else if (nAltivecParamsAtEnd==0) { 6464 // We are emitting Altivec params in order. 6465 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 6466 isPPC64, isTailCall, true, MemOpChains, 6467 TailCallArguments, dl); 6468 ArgOffset += 16; 6469 } 6470 break; 6471 } 6472 } 6473 // If all Altivec parameters fit in registers, as they usually do, 6474 // they get stack space following the non-Altivec parameters. We 6475 // don't track this here because nobody below needs it. 6476 // If there are more Altivec parameters than fit in registers emit 6477 // the stores here. 6478 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 6479 unsigned j = 0; 6480 // Offset is aligned; skip 1st 12 params which go in V registers. 6481 ArgOffset = ((ArgOffset+15)/16)*16; 6482 ArgOffset += 12*16; 6483 for (unsigned i = 0; i != NumOps; ++i) { 6484 SDValue Arg = OutVals[i]; 6485 EVT ArgType = Outs[i].VT; 6486 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 6487 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 6488 if (++j > NumVRs) { 6489 SDValue PtrOff; 6490 // We are emitting Altivec params in order. 6491 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 6492 isPPC64, isTailCall, true, MemOpChains, 6493 TailCallArguments, dl); 6494 ArgOffset += 16; 6495 } 6496 } 6497 } 6498 } 6499 6500 if (!MemOpChains.empty()) 6501 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 6502 6503 // On Darwin, R12 must contain the address of an indirect callee. This does 6504 // not mean the MTCTR instruction must use R12; it's easier to model this as 6505 // an extra parameter, so do that. 6506 if (!isTailCall && 6507 !isFunctionGlobalAddress(Callee) && 6508 !isa<ExternalSymbolSDNode>(Callee) && 6509 !isBLACompatibleAddress(Callee, DAG)) 6510 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 6511 PPC::R12), Callee)); 6512 6513 // Build a sequence of copy-to-reg nodes chained together with token chain 6514 // and flag operands which copy the outgoing args into the appropriate regs. 6515 SDValue InFlag; 6516 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 6517 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 6518 RegsToPass[i].second, InFlag); 6519 InFlag = Chain.getValue(1); 6520 } 6521 6522 if (isTailCall) 6523 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 6524 TailCallArguments); 6525 6526 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, 6527 /* unused except on PPC64 ELFv1 */ false, DAG, 6528 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 6529 NumBytes, Ins, InVals, CS); 6530 } 6531 6532 bool 6533 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 6534 MachineFunction &MF, bool isVarArg, 6535 const SmallVectorImpl<ISD::OutputArg> &Outs, 6536 LLVMContext &Context) const { 6537 SmallVector<CCValAssign, 16> RVLocs; 6538 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 6539 return CCInfo.CheckReturn( 6540 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold) 6541 ? RetCC_PPC_Cold 6542 : RetCC_PPC); 6543 } 6544 6545 SDValue 6546 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 6547 bool isVarArg, 6548 const SmallVectorImpl<ISD::OutputArg> &Outs, 6549 const SmallVectorImpl<SDValue> &OutVals, 6550 const SDLoc &dl, SelectionDAG &DAG) const { 6551 SmallVector<CCValAssign, 16> RVLocs; 6552 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 6553 *DAG.getContext()); 6554 CCInfo.AnalyzeReturn(Outs, 6555 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold) 6556 ? RetCC_PPC_Cold 6557 : RetCC_PPC); 6558 6559 SDValue Flag; 6560 SmallVector<SDValue, 4> RetOps(1, Chain); 6561 6562 // Copy the result values into the output registers. 6563 for (unsigned i = 0; i != RVLocs.size(); ++i) { 6564 CCValAssign &VA = RVLocs[i]; 6565 assert(VA.isRegLoc() && "Can only return in registers!"); 6566 6567 SDValue Arg = OutVals[i]; 6568 6569 switch (VA.getLocInfo()) { 6570 default: llvm_unreachable("Unknown loc info!"); 6571 case CCValAssign::Full: break; 6572 case CCValAssign::AExt: 6573 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 6574 break; 6575 case CCValAssign::ZExt: 6576 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 6577 break; 6578 case CCValAssign::SExt: 6579 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 6580 break; 6581 } 6582 6583 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 6584 Flag = Chain.getValue(1); 6585 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 6586 } 6587 6588 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 6589 const MCPhysReg *I = 6590 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); 6591 if (I) { 6592 for (; *I; ++I) { 6593 6594 if (PPC::G8RCRegClass.contains(*I)) 6595 RetOps.push_back(DAG.getRegister(*I, MVT::i64)); 6596 else if (PPC::F8RCRegClass.contains(*I)) 6597 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); 6598 else if (PPC::CRRCRegClass.contains(*I)) 6599 RetOps.push_back(DAG.getRegister(*I, MVT::i1)); 6600 else if (PPC::VRRCRegClass.contains(*I)) 6601 RetOps.push_back(DAG.getRegister(*I, MVT::Other)); 6602 else 6603 llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 6604 } 6605 } 6606 6607 RetOps[0] = Chain; // Update chain. 6608 6609 // Add the flag if we have it. 6610 if (Flag.getNode()) 6611 RetOps.push_back(Flag); 6612 6613 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); 6614 } 6615 6616 SDValue 6617 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, 6618 SelectionDAG &DAG) const { 6619 SDLoc dl(Op); 6620 6621 // Get the correct type for integers. 6622 EVT IntVT = Op.getValueType(); 6623 6624 // Get the inputs. 6625 SDValue Chain = Op.getOperand(0); 6626 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 6627 // Build a DYNAREAOFFSET node. 6628 SDValue Ops[2] = {Chain, FPSIdx}; 6629 SDVTList VTs = DAG.getVTList(IntVT); 6630 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops); 6631 } 6632 6633 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, 6634 SelectionDAG &DAG) const { 6635 // When we pop the dynamic allocation we need to restore the SP link. 6636 SDLoc dl(Op); 6637 6638 // Get the correct type for pointers. 6639 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6640 6641 // Construct the stack pointer operand. 6642 bool isPPC64 = Subtarget.isPPC64(); 6643 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 6644 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 6645 6646 // Get the operands for the STACKRESTORE. 6647 SDValue Chain = Op.getOperand(0); 6648 SDValue SaveSP = Op.getOperand(1); 6649 6650 // Load the old link SP. 6651 SDValue LoadLinkSP = 6652 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo()); 6653 6654 // Restore the stack pointer. 6655 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 6656 6657 // Store the old link SP. 6658 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo()); 6659 } 6660 6661 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const { 6662 MachineFunction &MF = DAG.getMachineFunction(); 6663 bool isPPC64 = Subtarget.isPPC64(); 6664 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6665 6666 // Get current frame pointer save index. The users of this index will be 6667 // primarily DYNALLOC instructions. 6668 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 6669 int RASI = FI->getReturnAddrSaveIndex(); 6670 6671 // If the frame pointer save index hasn't been defined yet. 6672 if (!RASI) { 6673 // Find out what the fix offset of the frame pointer save area. 6674 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset(); 6675 // Allocate the frame index for frame pointer save area. 6676 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false); 6677 // Save the result. 6678 FI->setReturnAddrSaveIndex(RASI); 6679 } 6680 return DAG.getFrameIndex(RASI, PtrVT); 6681 } 6682 6683 SDValue 6684 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 6685 MachineFunction &MF = DAG.getMachineFunction(); 6686 bool isPPC64 = Subtarget.isPPC64(); 6687 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6688 6689 // Get current frame pointer save index. The users of this index will be 6690 // primarily DYNALLOC instructions. 6691 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 6692 int FPSI = FI->getFramePointerSaveIndex(); 6693 6694 // If the frame pointer save index hasn't been defined yet. 6695 if (!FPSI) { 6696 // Find out what the fix offset of the frame pointer save area. 6697 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset(); 6698 // Allocate the frame index for frame pointer save area. 6699 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 6700 // Save the result. 6701 FI->setFramePointerSaveIndex(FPSI); 6702 } 6703 return DAG.getFrameIndex(FPSI, PtrVT); 6704 } 6705 6706 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 6707 SelectionDAG &DAG) const { 6708 // Get the inputs. 6709 SDValue Chain = Op.getOperand(0); 6710 SDValue Size = Op.getOperand(1); 6711 SDLoc dl(Op); 6712 6713 // Get the correct type for pointers. 6714 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6715 // Negate the size. 6716 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 6717 DAG.getConstant(0, dl, PtrVT), Size); 6718 // Construct a node for the frame pointer save index. 6719 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 6720 // Build a DYNALLOC node. 6721 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 6722 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 6723 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); 6724 } 6725 6726 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op, 6727 SelectionDAG &DAG) const { 6728 MachineFunction &MF = DAG.getMachineFunction(); 6729 6730 bool isPPC64 = Subtarget.isPPC64(); 6731 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6732 6733 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false); 6734 return DAG.getFrameIndex(FI, PtrVT); 6735 } 6736 6737 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, 6738 SelectionDAG &DAG) const { 6739 SDLoc DL(Op); 6740 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, 6741 DAG.getVTList(MVT::i32, MVT::Other), 6742 Op.getOperand(0), Op.getOperand(1)); 6743 } 6744 6745 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, 6746 SelectionDAG &DAG) const { 6747 SDLoc DL(Op); 6748 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, 6749 Op.getOperand(0), Op.getOperand(1)); 6750 } 6751 6752 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 6753 if (Op.getValueType().isVector()) 6754 return LowerVectorLoad(Op, DAG); 6755 6756 assert(Op.getValueType() == MVT::i1 && 6757 "Custom lowering only for i1 loads"); 6758 6759 // First, load 8 bits into 32 bits, then truncate to 1 bit. 6760 6761 SDLoc dl(Op); 6762 LoadSDNode *LD = cast<LoadSDNode>(Op); 6763 6764 SDValue Chain = LD->getChain(); 6765 SDValue BasePtr = LD->getBasePtr(); 6766 MachineMemOperand *MMO = LD->getMemOperand(); 6767 6768 SDValue NewLD = 6769 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain, 6770 BasePtr, MVT::i8, MMO); 6771 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); 6772 6773 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; 6774 return DAG.getMergeValues(Ops, dl); 6775 } 6776 6777 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 6778 if (Op.getOperand(1).getValueType().isVector()) 6779 return LowerVectorStore(Op, DAG); 6780 6781 assert(Op.getOperand(1).getValueType() == MVT::i1 && 6782 "Custom lowering only for i1 stores"); 6783 6784 // First, zero extend to 32 bits, then use a truncating store to 8 bits. 6785 6786 SDLoc dl(Op); 6787 StoreSDNode *ST = cast<StoreSDNode>(Op); 6788 6789 SDValue Chain = ST->getChain(); 6790 SDValue BasePtr = ST->getBasePtr(); 6791 SDValue Value = ST->getValue(); 6792 MachineMemOperand *MMO = ST->getMemOperand(); 6793 6794 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()), 6795 Value); 6796 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); 6797 } 6798 6799 // FIXME: Remove this once the ANDI glue bug is fixed: 6800 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { 6801 assert(Op.getValueType() == MVT::i1 && 6802 "Custom lowering only for i1 results"); 6803 6804 SDLoc DL(Op); 6805 return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, 6806 Op.getOperand(0)); 6807 } 6808 6809 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op, 6810 SelectionDAG &DAG) const { 6811 6812 // Implements a vector truncate that fits in a vector register as a shuffle. 6813 // We want to legalize vector truncates down to where the source fits in 6814 // a vector register (and target is therefore smaller than vector register 6815 // size). At that point legalization will try to custom lower the sub-legal 6816 // result and get here - where we can contain the truncate as a single target 6817 // operation. 6818 6819 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows: 6820 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2> 6821 // 6822 // We will implement it for big-endian ordering as this (where x denotes 6823 // undefined): 6824 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to 6825 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u> 6826 // 6827 // The same operation in little-endian ordering will be: 6828 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to 6829 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1> 6830 6831 assert(Op.getValueType().isVector() && "Vector type expected."); 6832 6833 SDLoc DL(Op); 6834 SDValue N1 = Op.getOperand(0); 6835 unsigned SrcSize = N1.getValueType().getSizeInBits(); 6836 assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector"); 6837 SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL); 6838 6839 EVT TrgVT = Op.getValueType(); 6840 unsigned TrgNumElts = TrgVT.getVectorNumElements(); 6841 EVT EltVT = TrgVT.getVectorElementType(); 6842 unsigned WideNumElts = 128 / EltVT.getSizeInBits(); 6843 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts); 6844 6845 // First list the elements we want to keep. 6846 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits(); 6847 SmallVector<int, 16> ShuffV; 6848 if (Subtarget.isLittleEndian()) 6849 for (unsigned i = 0; i < TrgNumElts; ++i) 6850 ShuffV.push_back(i * SizeMult); 6851 else 6852 for (unsigned i = 1; i <= TrgNumElts; ++i) 6853 ShuffV.push_back(i * SizeMult - 1); 6854 6855 // Populate the remaining elements with undefs. 6856 for (unsigned i = TrgNumElts; i < WideNumElts; ++i) 6857 // ShuffV.push_back(i + WideNumElts); 6858 ShuffV.push_back(WideNumElts + 1); 6859 6860 SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc); 6861 return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV); 6862 } 6863 6864 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 6865 /// possible. 6866 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 6867 // Not FP? Not a fsel. 6868 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 6869 !Op.getOperand(2).getValueType().isFloatingPoint()) 6870 return Op; 6871 6872 // We might be able to do better than this under some circumstances, but in 6873 // general, fsel-based lowering of select is a finite-math-only optimization. 6874 // For more information, see section F.3 of the 2.06 ISA specification. 6875 if (!DAG.getTarget().Options.NoInfsFPMath || 6876 !DAG.getTarget().Options.NoNaNsFPMath) 6877 return Op; 6878 // TODO: Propagate flags from the select rather than global settings. 6879 SDNodeFlags Flags; 6880 Flags.setNoInfs(true); 6881 Flags.setNoNaNs(true); 6882 6883 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 6884 6885 EVT ResVT = Op.getValueType(); 6886 EVT CmpVT = Op.getOperand(0).getValueType(); 6887 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6888 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 6889 SDLoc dl(Op); 6890 6891 // If the RHS of the comparison is a 0.0, we don't need to do the 6892 // subtraction at all. 6893 SDValue Sel1; 6894 if (isFloatingPointZero(RHS)) 6895 switch (CC) { 6896 default: break; // SETUO etc aren't handled by fsel. 6897 case ISD::SETNE: 6898 std::swap(TV, FV); 6899 LLVM_FALLTHROUGH; 6900 case ISD::SETEQ: 6901 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6902 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6903 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 6904 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 6905 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 6906 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6907 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); 6908 case ISD::SETULT: 6909 case ISD::SETLT: 6910 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 6911 LLVM_FALLTHROUGH; 6912 case ISD::SETOGE: 6913 case ISD::SETGE: 6914 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6915 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6916 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 6917 case ISD::SETUGT: 6918 case ISD::SETGT: 6919 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 6920 LLVM_FALLTHROUGH; 6921 case ISD::SETOLE: 6922 case ISD::SETLE: 6923 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6924 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6925 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6926 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 6927 } 6928 6929 SDValue Cmp; 6930 switch (CC) { 6931 default: break; // SETUO etc aren't handled by fsel. 6932 case ISD::SETNE: 6933 std::swap(TV, FV); 6934 LLVM_FALLTHROUGH; 6935 case ISD::SETEQ: 6936 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); 6937 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6938 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6939 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6940 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 6941 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 6942 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6943 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); 6944 case ISD::SETULT: 6945 case ISD::SETLT: 6946 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); 6947 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6948 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6949 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 6950 case ISD::SETOGE: 6951 case ISD::SETGE: 6952 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); 6953 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6954 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6955 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6956 case ISD::SETUGT: 6957 case ISD::SETGT: 6958 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags); 6959 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6960 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6961 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 6962 case ISD::SETOLE: 6963 case ISD::SETLE: 6964 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags); 6965 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6966 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6967 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6968 } 6969 return Op; 6970 } 6971 6972 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, 6973 SelectionDAG &DAG, 6974 const SDLoc &dl) const { 6975 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 6976 SDValue Src = Op.getOperand(0); 6977 if (Src.getValueType() == MVT::f32) 6978 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 6979 6980 SDValue Tmp; 6981 switch (Op.getSimpleValueType().SimpleTy) { 6982 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 6983 case MVT::i32: 6984 Tmp = DAG.getNode( 6985 Op.getOpcode() == ISD::FP_TO_SINT 6986 ? PPCISD::FCTIWZ 6987 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), 6988 dl, MVT::f64, Src); 6989 break; 6990 case MVT::i64: 6991 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 6992 "i64 FP_TO_UINT is supported only with FPCVT"); 6993 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 6994 PPCISD::FCTIDUZ, 6995 dl, MVT::f64, Src); 6996 break; 6997 } 6998 6999 // Convert the FP value to an int value through memory. 7000 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && 7001 (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); 7002 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); 7003 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); 7004 MachinePointerInfo MPI = 7005 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); 7006 7007 // Emit a store to the stack slot. 7008 SDValue Chain; 7009 if (i32Stack) { 7010 MachineFunction &MF = DAG.getMachineFunction(); 7011 MachineMemOperand *MMO = 7012 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); 7013 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; 7014 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 7015 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); 7016 } else 7017 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI); 7018 7019 // Result is a load from the stack slot. If loading 4 bytes, make sure to 7020 // add in a bias on big endian. 7021 if (Op.getValueType() == MVT::i32 && !i32Stack) { 7022 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 7023 DAG.getConstant(4, dl, FIPtr.getValueType())); 7024 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4); 7025 } 7026 7027 RLI.Chain = Chain; 7028 RLI.Ptr = FIPtr; 7029 RLI.MPI = MPI; 7030 } 7031 7032 /// Custom lowers floating point to integer conversions to use 7033 /// the direct move instructions available in ISA 2.07 to avoid the 7034 /// need for load/store combinations. 7035 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, 7036 SelectionDAG &DAG, 7037 const SDLoc &dl) const { 7038 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 7039 SDValue Src = Op.getOperand(0); 7040 7041 if (Src.getValueType() == MVT::f32) 7042 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 7043 7044 SDValue Tmp; 7045 switch (Op.getSimpleValueType().SimpleTy) { 7046 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 7047 case MVT::i32: 7048 Tmp = DAG.getNode( 7049 Op.getOpcode() == ISD::FP_TO_SINT 7050 ? PPCISD::FCTIWZ 7051 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), 7052 dl, MVT::f64, Src); 7053 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); 7054 break; 7055 case MVT::i64: 7056 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 7057 "i64 FP_TO_UINT is supported only with FPCVT"); 7058 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 7059 PPCISD::FCTIDUZ, 7060 dl, MVT::f64, Src); 7061 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); 7062 break; 7063 } 7064 return Tmp; 7065 } 7066 7067 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 7068 const SDLoc &dl) const { 7069 7070 // FP to INT conversions are legal for f128. 7071 if (EnableQuadPrecision && (Op->getOperand(0).getValueType() == MVT::f128)) 7072 return Op; 7073 7074 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on 7075 // PPC (the libcall is not available). 7076 if (Op.getOperand(0).getValueType() == MVT::ppcf128) { 7077 if (Op.getValueType() == MVT::i32) { 7078 if (Op.getOpcode() == ISD::FP_TO_SINT) { 7079 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 7080 MVT::f64, Op.getOperand(0), 7081 DAG.getIntPtrConstant(0, dl)); 7082 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 7083 MVT::f64, Op.getOperand(0), 7084 DAG.getIntPtrConstant(1, dl)); 7085 7086 // Add the two halves of the long double in round-to-zero mode. 7087 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); 7088 7089 // Now use a smaller FP_TO_SINT. 7090 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); 7091 } 7092 if (Op.getOpcode() == ISD::FP_TO_UINT) { 7093 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; 7094 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31)); 7095 SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128); 7096 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X 7097 // FIXME: generated code sucks. 7098 // TODO: Are there fast-math-flags to propagate to this FSUB? 7099 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, 7100 Op.getOperand(0), Tmp); 7101 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True); 7102 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, 7103 DAG.getConstant(0x80000000, dl, MVT::i32)); 7104 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, 7105 Op.getOperand(0)); 7106 return DAG.getSelectCC(dl, Op.getOperand(0), Tmp, True, False, 7107 ISD::SETGE); 7108 } 7109 } 7110 7111 return SDValue(); 7112 } 7113 7114 if (Subtarget.hasDirectMove() && Subtarget.isPPC64()) 7115 return LowerFP_TO_INTDirectMove(Op, DAG, dl); 7116 7117 ReuseLoadInfo RLI; 7118 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 7119 7120 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, 7121 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges); 7122 } 7123 7124 // We're trying to insert a regular store, S, and then a load, L. If the 7125 // incoming value, O, is a load, we might just be able to have our load use the 7126 // address used by O. However, we don't know if anything else will store to 7127 // that address before we can load from it. To prevent this situation, we need 7128 // to insert our load, L, into the chain as a peer of O. To do this, we give L 7129 // the same chain operand as O, we create a token factor from the chain results 7130 // of O and L, and we replace all uses of O's chain result with that token 7131 // factor (see spliceIntoChain below for this last part). 7132 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, 7133 ReuseLoadInfo &RLI, 7134 SelectionDAG &DAG, 7135 ISD::LoadExtType ET) const { 7136 SDLoc dl(Op); 7137 if (ET == ISD::NON_EXTLOAD && 7138 (Op.getOpcode() == ISD::FP_TO_UINT || 7139 Op.getOpcode() == ISD::FP_TO_SINT) && 7140 isOperationLegalOrCustom(Op.getOpcode(), 7141 Op.getOperand(0).getValueType())) { 7142 7143 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 7144 return true; 7145 } 7146 7147 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op); 7148 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() || 7149 LD->isNonTemporal()) 7150 return false; 7151 if (LD->getMemoryVT() != MemVT) 7152 return false; 7153 7154 RLI.Ptr = LD->getBasePtr(); 7155 if (LD->isIndexed() && !LD->getOffset().isUndef()) { 7156 assert(LD->getAddressingMode() == ISD::PRE_INC && 7157 "Non-pre-inc AM on PPC?"); 7158 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, 7159 LD->getOffset()); 7160 } 7161 7162 RLI.Chain = LD->getChain(); 7163 RLI.MPI = LD->getPointerInfo(); 7164 RLI.IsDereferenceable = LD->isDereferenceable(); 7165 RLI.IsInvariant = LD->isInvariant(); 7166 RLI.Alignment = LD->getAlignment(); 7167 RLI.AAInfo = LD->getAAInfo(); 7168 RLI.Ranges = LD->getRanges(); 7169 7170 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1); 7171 return true; 7172 } 7173 7174 // Given the head of the old chain, ResChain, insert a token factor containing 7175 // it and NewResChain, and make users of ResChain now be users of that token 7176 // factor. 7177 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead. 7178 void PPCTargetLowering::spliceIntoChain(SDValue ResChain, 7179 SDValue NewResChain, 7180 SelectionDAG &DAG) const { 7181 if (!ResChain) 7182 return; 7183 7184 SDLoc dl(NewResChain); 7185 7186 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 7187 NewResChain, DAG.getUNDEF(MVT::Other)); 7188 assert(TF.getNode() != NewResChain.getNode() && 7189 "A new TF really is required here"); 7190 7191 DAG.ReplaceAllUsesOfValueWith(ResChain, TF); 7192 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); 7193 } 7194 7195 /// Analyze profitability of direct move 7196 /// prefer float load to int load plus direct move 7197 /// when there is no integer use of int load 7198 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const { 7199 SDNode *Origin = Op.getOperand(0).getNode(); 7200 if (Origin->getOpcode() != ISD::LOAD) 7201 return true; 7202 7203 // If there is no LXSIBZX/LXSIHZX, like Power8, 7204 // prefer direct move if the memory size is 1 or 2 bytes. 7205 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand(); 7206 if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2) 7207 return true; 7208 7209 for (SDNode::use_iterator UI = Origin->use_begin(), 7210 UE = Origin->use_end(); 7211 UI != UE; ++UI) { 7212 7213 // Only look at the users of the loaded value. 7214 if (UI.getUse().get().getResNo() != 0) 7215 continue; 7216 7217 if (UI->getOpcode() != ISD::SINT_TO_FP && 7218 UI->getOpcode() != ISD::UINT_TO_FP) 7219 return true; 7220 } 7221 7222 return false; 7223 } 7224 7225 /// Custom lowers integer to floating point conversions to use 7226 /// the direct move instructions available in ISA 2.07 to avoid the 7227 /// need for load/store combinations. 7228 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, 7229 SelectionDAG &DAG, 7230 const SDLoc &dl) const { 7231 assert((Op.getValueType() == MVT::f32 || 7232 Op.getValueType() == MVT::f64) && 7233 "Invalid floating point type as target of conversion"); 7234 assert(Subtarget.hasFPCVT() && 7235 "Int to FP conversions with direct moves require FPCVT"); 7236 SDValue FP; 7237 SDValue Src = Op.getOperand(0); 7238 bool SinglePrec = Op.getValueType() == MVT::f32; 7239 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; 7240 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; 7241 unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) : 7242 (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU); 7243 7244 if (WordInt) { 7245 FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ, 7246 dl, MVT::f64, Src); 7247 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); 7248 } 7249 else { 7250 FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); 7251 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); 7252 } 7253 7254 return FP; 7255 } 7256 7257 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) { 7258 7259 EVT VecVT = Vec.getValueType(); 7260 assert(VecVT.isVector() && "Expected a vector type."); 7261 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width."); 7262 7263 EVT EltVT = VecVT.getVectorElementType(); 7264 unsigned WideNumElts = 128 / EltVT.getSizeInBits(); 7265 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts); 7266 7267 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements(); 7268 SmallVector<SDValue, 16> Ops(NumConcat); 7269 Ops[0] = Vec; 7270 SDValue UndefVec = DAG.getUNDEF(VecVT); 7271 for (unsigned i = 1; i < NumConcat; ++i) 7272 Ops[i] = UndefVec; 7273 7274 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops); 7275 } 7276 7277 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG, 7278 const SDLoc &dl) const { 7279 7280 unsigned Opc = Op.getOpcode(); 7281 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) && 7282 "Unexpected conversion type"); 7283 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && 7284 "Supports conversions to v2f64/v4f32 only."); 7285 7286 bool SignedConv = Opc == ISD::SINT_TO_FP; 7287 bool FourEltRes = Op.getValueType() == MVT::v4f32; 7288 7289 SDValue Wide = widenVec(DAG, Op.getOperand(0), dl); 7290 EVT WideVT = Wide.getValueType(); 7291 unsigned WideNumElts = WideVT.getVectorNumElements(); 7292 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64; 7293 7294 SmallVector<int, 16> ShuffV; 7295 for (unsigned i = 0; i < WideNumElts; ++i) 7296 ShuffV.push_back(i + WideNumElts); 7297 7298 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2; 7299 int SaveElts = FourEltRes ? 4 : 2; 7300 if (Subtarget.isLittleEndian()) 7301 for (int i = 0; i < SaveElts; i++) 7302 ShuffV[i * Stride] = i; 7303 else 7304 for (int i = 1; i <= SaveElts; i++) 7305 ShuffV[i * Stride - 1] = i - 1; 7306 7307 SDValue ShuffleSrc2 = 7308 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT); 7309 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV); 7310 unsigned ExtendOp = 7311 SignedConv ? (unsigned)PPCISD::SExtVElems : (unsigned)ISD::BITCAST; 7312 7313 SDValue Extend; 7314 if (!Subtarget.hasP9Altivec() && SignedConv) { 7315 Arrange = DAG.getBitcast(IntermediateVT, Arrange); 7316 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange, 7317 DAG.getValueType(Op.getOperand(0).getValueType())); 7318 } else 7319 Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange); 7320 7321 return DAG.getNode(Opc, dl, Op.getValueType(), Extend); 7322 } 7323 7324 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, 7325 SelectionDAG &DAG) const { 7326 SDLoc dl(Op); 7327 7328 EVT InVT = Op.getOperand(0).getValueType(); 7329 EVT OutVT = Op.getValueType(); 7330 if (OutVT.isVector() && OutVT.isFloatingPoint() && 7331 isOperationCustom(Op.getOpcode(), InVT)) 7332 return LowerINT_TO_FPVector(Op, DAG, dl); 7333 7334 // Conversions to f128 are legal. 7335 if (EnableQuadPrecision && (Op.getValueType() == MVT::f128)) 7336 return Op; 7337 7338 if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) { 7339 if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64) 7340 return SDValue(); 7341 7342 SDValue Value = Op.getOperand(0); 7343 // The values are now known to be -1 (false) or 1 (true). To convert this 7344 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 7345 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 7346 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 7347 7348 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 7349 7350 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 7351 7352 if (Op.getValueType() != MVT::v4f64) 7353 Value = DAG.getNode(ISD::FP_ROUND, dl, 7354 Op.getValueType(), Value, 7355 DAG.getIntPtrConstant(1, dl)); 7356 return Value; 7357 } 7358 7359 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 7360 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 7361 return SDValue(); 7362 7363 if (Op.getOperand(0).getValueType() == MVT::i1) 7364 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), 7365 DAG.getConstantFP(1.0, dl, Op.getValueType()), 7366 DAG.getConstantFP(0.0, dl, Op.getValueType())); 7367 7368 // If we have direct moves, we can do all the conversion, skip the store/load 7369 // however, without FPCVT we can't do most conversions. 7370 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) && 7371 Subtarget.isPPC64() && Subtarget.hasFPCVT()) 7372 return LowerINT_TO_FPDirectMove(Op, DAG, dl); 7373 7374 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 7375 "UINT_TO_FP is supported only with FPCVT"); 7376 7377 // If we have FCFIDS, then use it when converting to single-precision. 7378 // Otherwise, convert to double-precision and then round. 7379 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 7380 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS 7381 : PPCISD::FCFIDS) 7382 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU 7383 : PPCISD::FCFID); 7384 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 7385 ? MVT::f32 7386 : MVT::f64; 7387 7388 if (Op.getOperand(0).getValueType() == MVT::i64) { 7389 SDValue SINT = Op.getOperand(0); 7390 // When converting to single-precision, we actually need to convert 7391 // to double-precision first and then round to single-precision. 7392 // To avoid double-rounding effects during that operation, we have 7393 // to prepare the input operand. Bits that might be truncated when 7394 // converting to double-precision are replaced by a bit that won't 7395 // be lost at this stage, but is below the single-precision rounding 7396 // position. 7397 // 7398 // However, if -enable-unsafe-fp-math is in effect, accept double 7399 // rounding to avoid the extra overhead. 7400 if (Op.getValueType() == MVT::f32 && 7401 !Subtarget.hasFPCVT() && 7402 !DAG.getTarget().Options.UnsafeFPMath) { 7403 7404 // Twiddle input to make sure the low 11 bits are zero. (If this 7405 // is the case, we are guaranteed the value will fit into the 53 bit 7406 // mantissa of an IEEE double-precision value without rounding.) 7407 // If any of those low 11 bits were not zero originally, make sure 7408 // bit 12 (value 2048) is set instead, so that the final rounding 7409 // to single-precision gets the correct result. 7410 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, 7411 SINT, DAG.getConstant(2047, dl, MVT::i64)); 7412 Round = DAG.getNode(ISD::ADD, dl, MVT::i64, 7413 Round, DAG.getConstant(2047, dl, MVT::i64)); 7414 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); 7415 Round = DAG.getNode(ISD::AND, dl, MVT::i64, 7416 Round, DAG.getConstant(-2048, dl, MVT::i64)); 7417 7418 // However, we cannot use that value unconditionally: if the magnitude 7419 // of the input value is small, the bit-twiddling we did above might 7420 // end up visibly changing the output. Fortunately, in that case, we 7421 // don't need to twiddle bits since the original input will convert 7422 // exactly to double-precision floating-point already. Therefore, 7423 // construct a conditional to use the original value if the top 11 7424 // bits are all sign-bit copies, and use the rounded value computed 7425 // above otherwise. 7426 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, 7427 SINT, DAG.getConstant(53, dl, MVT::i32)); 7428 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, 7429 Cond, DAG.getConstant(1, dl, MVT::i64)); 7430 Cond = DAG.getSetCC(dl, MVT::i32, 7431 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT); 7432 7433 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); 7434 } 7435 7436 ReuseLoadInfo RLI; 7437 SDValue Bits; 7438 7439 MachineFunction &MF = DAG.getMachineFunction(); 7440 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { 7441 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, 7442 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges); 7443 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 7444 } else if (Subtarget.hasLFIWAX() && 7445 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) { 7446 MachineMemOperand *MMO = 7447 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 7448 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 7449 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 7450 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, 7451 DAG.getVTList(MVT::f64, MVT::Other), 7452 Ops, MVT::i32, MMO); 7453 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 7454 } else if (Subtarget.hasFPCVT() && 7455 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) { 7456 MachineMemOperand *MMO = 7457 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 7458 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 7459 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 7460 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl, 7461 DAG.getVTList(MVT::f64, MVT::Other), 7462 Ops, MVT::i32, MMO); 7463 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 7464 } else if (((Subtarget.hasLFIWAX() && 7465 SINT.getOpcode() == ISD::SIGN_EXTEND) || 7466 (Subtarget.hasFPCVT() && 7467 SINT.getOpcode() == ISD::ZERO_EXTEND)) && 7468 SINT.getOperand(0).getValueType() == MVT::i32) { 7469 MachineFrameInfo &MFI = MF.getFrameInfo(); 7470 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 7471 7472 int FrameIdx = MFI.CreateStackObject(4, 4, false); 7473 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7474 7475 SDValue Store = 7476 DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, 7477 MachinePointerInfo::getFixedStack( 7478 DAG.getMachineFunction(), FrameIdx)); 7479 7480 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 7481 "Expected an i32 store"); 7482 7483 RLI.Ptr = FIdx; 7484 RLI.Chain = Store; 7485 RLI.MPI = 7486 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 7487 RLI.Alignment = 4; 7488 7489 MachineMemOperand *MMO = 7490 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 7491 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 7492 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 7493 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ? 7494 PPCISD::LFIWZX : PPCISD::LFIWAX, 7495 dl, DAG.getVTList(MVT::f64, MVT::Other), 7496 Ops, MVT::i32, MMO); 7497 } else 7498 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); 7499 7500 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); 7501 7502 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 7503 FP = DAG.getNode(ISD::FP_ROUND, dl, 7504 MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); 7505 return FP; 7506 } 7507 7508 assert(Op.getOperand(0).getValueType() == MVT::i32 && 7509 "Unhandled INT_TO_FP type in custom expander!"); 7510 // Since we only generate this in 64-bit mode, we can take advantage of 7511 // 64-bit registers. In particular, sign extend the input value into the 7512 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 7513 // then lfd it and fcfid it. 7514 MachineFunction &MF = DAG.getMachineFunction(); 7515 MachineFrameInfo &MFI = MF.getFrameInfo(); 7516 EVT PtrVT = getPointerTy(MF.getDataLayout()); 7517 7518 SDValue Ld; 7519 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { 7520 ReuseLoadInfo RLI; 7521 bool ReusingLoad; 7522 if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, 7523 DAG))) { 7524 int FrameIdx = MFI.CreateStackObject(4, 4, false); 7525 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7526 7527 SDValue Store = 7528 DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 7529 MachinePointerInfo::getFixedStack( 7530 DAG.getMachineFunction(), FrameIdx)); 7531 7532 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 7533 "Expected an i32 store"); 7534 7535 RLI.Ptr = FIdx; 7536 RLI.Chain = Store; 7537 RLI.MPI = 7538 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 7539 RLI.Alignment = 4; 7540 } 7541 7542 MachineMemOperand *MMO = 7543 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 7544 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 7545 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 7546 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? 7547 PPCISD::LFIWZX : PPCISD::LFIWAX, 7548 dl, DAG.getVTList(MVT::f64, MVT::Other), 7549 Ops, MVT::i32, MMO); 7550 if (ReusingLoad) 7551 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); 7552 } else { 7553 assert(Subtarget.isPPC64() && 7554 "i32->FP without LFIWAX supported only on PPC64"); 7555 7556 int FrameIdx = MFI.CreateStackObject(8, 8, false); 7557 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7558 7559 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, 7560 Op.getOperand(0)); 7561 7562 // STD the extended value into the stack slot. 7563 SDValue Store = DAG.getStore( 7564 DAG.getEntryNode(), dl, Ext64, FIdx, 7565 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); 7566 7567 // Load the value as a double. 7568 Ld = DAG.getLoad( 7569 MVT::f64, dl, Store, FIdx, 7570 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); 7571 } 7572 7573 // FCFID it and return it. 7574 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); 7575 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 7576 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, 7577 DAG.getIntPtrConstant(0, dl)); 7578 return FP; 7579 } 7580 7581 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 7582 SelectionDAG &DAG) const { 7583 SDLoc dl(Op); 7584 /* 7585 The rounding mode is in bits 30:31 of FPSR, and has the following 7586 settings: 7587 00 Round to nearest 7588 01 Round to 0 7589 10 Round to +inf 7590 11 Round to -inf 7591 7592 FLT_ROUNDS, on the other hand, expects the following: 7593 -1 Undefined 7594 0 Round to 0 7595 1 Round to nearest 7596 2 Round to +inf 7597 3 Round to -inf 7598 7599 To perform the conversion, we do: 7600 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 7601 */ 7602 7603 MachineFunction &MF = DAG.getMachineFunction(); 7604 EVT VT = Op.getValueType(); 7605 EVT PtrVT = getPointerTy(MF.getDataLayout()); 7606 7607 // Save FP Control Word to register 7608 EVT NodeTys[] = { 7609 MVT::f64, // return register 7610 MVT::Glue // unused in this context 7611 }; 7612 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); 7613 7614 // Save FP register to stack slot 7615 int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false); 7616 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 7617 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot, 7618 MachinePointerInfo()); 7619 7620 // Load FP Control Word from low 32 bits of stack slot. 7621 SDValue Four = DAG.getConstant(4, dl, PtrVT); 7622 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 7623 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo()); 7624 7625 // Transform as necessary 7626 SDValue CWD1 = 7627 DAG.getNode(ISD::AND, dl, MVT::i32, 7628 CWD, DAG.getConstant(3, dl, MVT::i32)); 7629 SDValue CWD2 = 7630 DAG.getNode(ISD::SRL, dl, MVT::i32, 7631 DAG.getNode(ISD::AND, dl, MVT::i32, 7632 DAG.getNode(ISD::XOR, dl, MVT::i32, 7633 CWD, DAG.getConstant(3, dl, MVT::i32)), 7634 DAG.getConstant(3, dl, MVT::i32)), 7635 DAG.getConstant(1, dl, MVT::i32)); 7636 7637 SDValue RetVal = 7638 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 7639 7640 return DAG.getNode((VT.getSizeInBits() < 16 ? 7641 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 7642 } 7643 7644 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 7645 EVT VT = Op.getValueType(); 7646 unsigned BitWidth = VT.getSizeInBits(); 7647 SDLoc dl(Op); 7648 assert(Op.getNumOperands() == 3 && 7649 VT == Op.getOperand(1).getValueType() && 7650 "Unexpected SHL!"); 7651 7652 // Expand into a bunch of logical ops. Note that these ops 7653 // depend on the PPC behavior for oversized shift amounts. 7654 SDValue Lo = Op.getOperand(0); 7655 SDValue Hi = Op.getOperand(1); 7656 SDValue Amt = Op.getOperand(2); 7657 EVT AmtVT = Amt.getValueType(); 7658 7659 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 7660 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 7661 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 7662 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 7663 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 7664 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 7665 DAG.getConstant(-BitWidth, dl, AmtVT)); 7666 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 7667 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 7668 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 7669 SDValue OutOps[] = { OutLo, OutHi }; 7670 return DAG.getMergeValues(OutOps, dl); 7671 } 7672 7673 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 7674 EVT VT = Op.getValueType(); 7675 SDLoc dl(Op); 7676 unsigned BitWidth = VT.getSizeInBits(); 7677 assert(Op.getNumOperands() == 3 && 7678 VT == Op.getOperand(1).getValueType() && 7679 "Unexpected SRL!"); 7680 7681 // Expand into a bunch of logical ops. Note that these ops 7682 // depend on the PPC behavior for oversized shift amounts. 7683 SDValue Lo = Op.getOperand(0); 7684 SDValue Hi = Op.getOperand(1); 7685 SDValue Amt = Op.getOperand(2); 7686 EVT AmtVT = Amt.getValueType(); 7687 7688 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 7689 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 7690 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 7691 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 7692 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 7693 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 7694 DAG.getConstant(-BitWidth, dl, AmtVT)); 7695 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 7696 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 7697 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 7698 SDValue OutOps[] = { OutLo, OutHi }; 7699 return DAG.getMergeValues(OutOps, dl); 7700 } 7701 7702 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 7703 SDLoc dl(Op); 7704 EVT VT = Op.getValueType(); 7705 unsigned BitWidth = VT.getSizeInBits(); 7706 assert(Op.getNumOperands() == 3 && 7707 VT == Op.getOperand(1).getValueType() && 7708 "Unexpected SRA!"); 7709 7710 // Expand into a bunch of logical ops, followed by a select_cc. 7711 SDValue Lo = Op.getOperand(0); 7712 SDValue Hi = Op.getOperand(1); 7713 SDValue Amt = Op.getOperand(2); 7714 EVT AmtVT = Amt.getValueType(); 7715 7716 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 7717 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 7718 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 7719 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 7720 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 7721 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 7722 DAG.getConstant(-BitWidth, dl, AmtVT)); 7723 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 7724 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 7725 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT), 7726 Tmp4, Tmp6, ISD::SETLE); 7727 SDValue OutOps[] = { OutLo, OutHi }; 7728 return DAG.getMergeValues(OutOps, dl); 7729 } 7730 7731 //===----------------------------------------------------------------------===// 7732 // Vector related lowering. 7733 // 7734 7735 /// BuildSplatI - Build a canonical splati of Val with an element size of 7736 /// SplatSize. Cast the result to VT. 7737 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 7738 SelectionDAG &DAG, const SDLoc &dl) { 7739 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 7740 7741 static const MVT VTys[] = { // canonical VT to use for each size. 7742 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 7743 }; 7744 7745 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 7746 7747 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 7748 if (Val == -1) 7749 SplatSize = 1; 7750 7751 EVT CanonicalVT = VTys[SplatSize-1]; 7752 7753 // Build a canonical splat for this value. 7754 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT)); 7755 } 7756 7757 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the 7758 /// specified intrinsic ID. 7759 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, 7760 const SDLoc &dl, EVT DestVT = MVT::Other) { 7761 if (DestVT == MVT::Other) DestVT = Op.getValueType(); 7762 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7763 DAG.getConstant(IID, dl, MVT::i32), Op); 7764 } 7765 7766 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the 7767 /// specified intrinsic ID. 7768 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 7769 SelectionDAG &DAG, const SDLoc &dl, 7770 EVT DestVT = MVT::Other) { 7771 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 7772 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7773 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS); 7774 } 7775 7776 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 7777 /// specified intrinsic ID. 7778 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 7779 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl, 7780 EVT DestVT = MVT::Other) { 7781 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 7782 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7783 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2); 7784 } 7785 7786 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 7787 /// amount. The result has the specified value type. 7788 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, 7789 SelectionDAG &DAG, const SDLoc &dl) { 7790 // Force LHS/RHS to be the right type. 7791 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 7792 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 7793 7794 int Ops[16]; 7795 for (unsigned i = 0; i != 16; ++i) 7796 Ops[i] = i + Amt; 7797 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 7798 return DAG.getNode(ISD::BITCAST, dl, VT, T); 7799 } 7800 7801 /// Do we have an efficient pattern in a .td file for this node? 7802 /// 7803 /// \param V - pointer to the BuildVectorSDNode being matched 7804 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves? 7805 /// 7806 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR 7807 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where 7808 /// the opposite is true (expansion is beneficial) are: 7809 /// - The node builds a vector out of integers that are not 32 or 64-bits 7810 /// - The node builds a vector out of constants 7811 /// - The node is a "load-and-splat" 7812 /// In all other cases, we will choose to keep the BUILD_VECTOR. 7813 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, 7814 bool HasDirectMove, 7815 bool HasP8Vector) { 7816 EVT VecVT = V->getValueType(0); 7817 bool RightType = VecVT == MVT::v2f64 || 7818 (HasP8Vector && VecVT == MVT::v4f32) || 7819 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32)); 7820 if (!RightType) 7821 return false; 7822 7823 bool IsSplat = true; 7824 bool IsLoad = false; 7825 SDValue Op0 = V->getOperand(0); 7826 7827 // This function is called in a block that confirms the node is not a constant 7828 // splat. So a constant BUILD_VECTOR here means the vector is built out of 7829 // different constants. 7830 if (V->isConstant()) 7831 return false; 7832 for (int i = 0, e = V->getNumOperands(); i < e; ++i) { 7833 if (V->getOperand(i).isUndef()) 7834 return false; 7835 // We want to expand nodes that represent load-and-splat even if the 7836 // loaded value is a floating point truncation or conversion to int. 7837 if (V->getOperand(i).getOpcode() == ISD::LOAD || 7838 (V->getOperand(i).getOpcode() == ISD::FP_ROUND && 7839 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || 7840 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT && 7841 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || 7842 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT && 7843 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD)) 7844 IsLoad = true; 7845 // If the operands are different or the input is not a load and has more 7846 // uses than just this BV node, then it isn't a splat. 7847 if (V->getOperand(i) != Op0 || 7848 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode()))) 7849 IsSplat = false; 7850 } 7851 return !(IsSplat && IsLoad); 7852 } 7853 7854 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128. 7855 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { 7856 7857 SDLoc dl(Op); 7858 SDValue Op0 = Op->getOperand(0); 7859 7860 if (!EnableQuadPrecision || 7861 (Op.getValueType() != MVT::f128 ) || 7862 (Op0.getOpcode() != ISD::BUILD_PAIR) || 7863 (Op0.getOperand(0).getValueType() != MVT::i64) || 7864 (Op0.getOperand(1).getValueType() != MVT::i64)) 7865 return SDValue(); 7866 7867 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0), 7868 Op0.getOperand(1)); 7869 } 7870 7871 // If this is a case we can't handle, return null and let the default 7872 // expansion code take care of it. If we CAN select this case, and if it 7873 // selects to a single instruction, return Op. Otherwise, if we can codegen 7874 // this case more efficiently than a constant pool load, lower it to the 7875 // sequence of ops that should be used. 7876 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 7877 SelectionDAG &DAG) const { 7878 SDLoc dl(Op); 7879 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 7880 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 7881 7882 if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) { 7883 // We first build an i32 vector, load it into a QPX register, 7884 // then convert it to a floating-point vector and compare it 7885 // to a zero vector to get the boolean result. 7886 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 7887 int FrameIdx = MFI.CreateStackObject(16, 16, false); 7888 MachinePointerInfo PtrInfo = 7889 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 7890 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 7891 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7892 7893 assert(BVN->getNumOperands() == 4 && 7894 "BUILD_VECTOR for v4i1 does not have 4 operands"); 7895 7896 bool IsConst = true; 7897 for (unsigned i = 0; i < 4; ++i) { 7898 if (BVN->getOperand(i).isUndef()) continue; 7899 if (!isa<ConstantSDNode>(BVN->getOperand(i))) { 7900 IsConst = false; 7901 break; 7902 } 7903 } 7904 7905 if (IsConst) { 7906 Constant *One = 7907 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0); 7908 Constant *NegOne = 7909 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0); 7910 7911 Constant *CV[4]; 7912 for (unsigned i = 0; i < 4; ++i) { 7913 if (BVN->getOperand(i).isUndef()) 7914 CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext())); 7915 else if (isNullConstant(BVN->getOperand(i))) 7916 CV[i] = NegOne; 7917 else 7918 CV[i] = One; 7919 } 7920 7921 Constant *CP = ConstantVector::get(CV); 7922 SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), 7923 16 /* alignment */); 7924 7925 SDValue Ops[] = {DAG.getEntryNode(), CPIdx}; 7926 SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other}); 7927 return DAG.getMemIntrinsicNode( 7928 PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32, 7929 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 7930 } 7931 7932 SmallVector<SDValue, 4> Stores; 7933 for (unsigned i = 0; i < 4; ++i) { 7934 if (BVN->getOperand(i).isUndef()) continue; 7935 7936 unsigned Offset = 4*i; 7937 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 7938 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 7939 7940 unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize(); 7941 if (StoreSize > 4) { 7942 Stores.push_back( 7943 DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx, 7944 PtrInfo.getWithOffset(Offset), MVT::i32)); 7945 } else { 7946 SDValue StoreValue = BVN->getOperand(i); 7947 if (StoreSize < 4) 7948 StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue); 7949 7950 Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx, 7951 PtrInfo.getWithOffset(Offset))); 7952 } 7953 } 7954 7955 SDValue StoreChain; 7956 if (!Stores.empty()) 7957 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 7958 else 7959 StoreChain = DAG.getEntryNode(); 7960 7961 // Now load from v4i32 into the QPX register; this will extend it to 7962 // v4i64 but not yet convert it to a floating point. Nevertheless, this 7963 // is typed as v4f64 because the QPX register integer states are not 7964 // explicitly represented. 7965 7966 SDValue Ops[] = {StoreChain, 7967 DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32), 7968 FIdx}; 7969 SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other}); 7970 7971 SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, 7972 dl, VTs, Ops, MVT::v4i32, PtrInfo); 7973 LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 7974 DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32), 7975 LoadedVect); 7976 7977 SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64); 7978 7979 return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ); 7980 } 7981 7982 // All other QPX vectors are handled by generic code. 7983 if (Subtarget.hasQPX()) 7984 return SDValue(); 7985 7986 // Check if this is a splat of a constant value. 7987 APInt APSplatBits, APSplatUndef; 7988 unsigned SplatBitSize; 7989 bool HasAnyUndefs; 7990 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 7991 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || 7992 SplatBitSize > 32) { 7993 // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be 7994 // lowered to VSX instructions under certain conditions. 7995 // Without VSX, there is no pattern more efficient than expanding the node. 7996 if (Subtarget.hasVSX() && 7997 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(), 7998 Subtarget.hasP8Vector())) 7999 return Op; 8000 return SDValue(); 8001 } 8002 8003 unsigned SplatBits = APSplatBits.getZExtValue(); 8004 unsigned SplatUndef = APSplatUndef.getZExtValue(); 8005 unsigned SplatSize = SplatBitSize / 8; 8006 8007 // First, handle single instruction cases. 8008 8009 // All zeros? 8010 if (SplatBits == 0) { 8011 // Canonicalize all zero vectors to be v4i32. 8012 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 8013 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32); 8014 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 8015 } 8016 return Op; 8017 } 8018 8019 // We have XXSPLTIB for constant splats one byte wide 8020 if (Subtarget.hasP9Vector() && SplatSize == 1) { 8021 // This is a splat of 1-byte elements with some elements potentially undef. 8022 // Rather than trying to match undef in the SDAG patterns, ensure that all 8023 // elements are the same constant. 8024 if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) { 8025 SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits, 8026 dl, MVT::i32)); 8027 SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops); 8028 if (Op.getValueType() != MVT::v16i8) 8029 return DAG.getBitcast(Op.getValueType(), NewBV); 8030 return NewBV; 8031 } 8032 8033 // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll 8034 // detect that constant splats like v8i16: 0xABAB are really just splats 8035 // of a 1-byte constant. In this case, we need to convert the node to a 8036 // splat of v16i8 and a bitcast. 8037 if (Op.getValueType() != MVT::v16i8) 8038 return DAG.getBitcast(Op.getValueType(), 8039 DAG.getConstant(SplatBits, dl, MVT::v16i8)); 8040 8041 return Op; 8042 } 8043 8044 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 8045 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 8046 (32-SplatBitSize)); 8047 if (SextVal >= -16 && SextVal <= 15) 8048 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 8049 8050 // Two instruction sequences. 8051 8052 // If this value is in the range [-32,30] and is even, use: 8053 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) 8054 // If this value is in the range [17,31] and is odd, use: 8055 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) 8056 // If this value is in the range [-31,-17] and is odd, use: 8057 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) 8058 // Note the last two are three-instruction sequences. 8059 if (SextVal >= -32 && SextVal <= 31) { 8060 // To avoid having these optimizations undone by constant folding, 8061 // we convert to a pseudo that will be expanded later into one of 8062 // the above forms. 8063 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32); 8064 EVT VT = (SplatSize == 1 ? MVT::v16i8 : 8065 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); 8066 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32); 8067 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); 8068 if (VT == Op.getValueType()) 8069 return RetVal; 8070 else 8071 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); 8072 } 8073 8074 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 8075 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 8076 // for fneg/fabs. 8077 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 8078 // Make -1 and vspltisw -1: 8079 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 8080 8081 // Make the VSLW intrinsic, computing 0x8000_0000. 8082 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 8083 OnesV, DAG, dl); 8084 8085 // xor by OnesV to invert it. 8086 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 8087 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 8088 } 8089 8090 // Check to see if this is a wide variety of vsplti*, binop self cases. 8091 static const signed char SplatCsts[] = { 8092 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 8093 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 8094 }; 8095 8096 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 8097 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 8098 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 8099 int i = SplatCsts[idx]; 8100 8101 // Figure out what shift amount will be used by altivec if shifted by i in 8102 // this splat size. 8103 unsigned TypeShiftAmt = i & (SplatBitSize-1); 8104 8105 // vsplti + shl self. 8106 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { 8107 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 8108 static const unsigned IIDs[] = { // Intrinsic to use for each size. 8109 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 8110 Intrinsic::ppc_altivec_vslw 8111 }; 8112 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 8113 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 8114 } 8115 8116 // vsplti + srl self. 8117 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 8118 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 8119 static const unsigned IIDs[] = { // Intrinsic to use for each size. 8120 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 8121 Intrinsic::ppc_altivec_vsrw 8122 }; 8123 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 8124 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 8125 } 8126 8127 // vsplti + sra self. 8128 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 8129 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 8130 static const unsigned IIDs[] = { // Intrinsic to use for each size. 8131 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 8132 Intrinsic::ppc_altivec_vsraw 8133 }; 8134 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 8135 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 8136 } 8137 8138 // vsplti + rol self. 8139 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 8140 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 8141 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 8142 static const unsigned IIDs[] = { // Intrinsic to use for each size. 8143 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 8144 Intrinsic::ppc_altivec_vrlw 8145 }; 8146 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 8147 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 8148 } 8149 8150 // t = vsplti c, result = vsldoi t, t, 1 8151 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { 8152 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 8153 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1; 8154 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 8155 } 8156 // t = vsplti c, result = vsldoi t, t, 2 8157 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { 8158 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 8159 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2; 8160 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 8161 } 8162 // t = vsplti c, result = vsldoi t, t, 3 8163 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 8164 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 8165 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3; 8166 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 8167 } 8168 } 8169 8170 return SDValue(); 8171 } 8172 8173 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 8174 /// the specified operations to build the shuffle. 8175 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 8176 SDValue RHS, SelectionDAG &DAG, 8177 const SDLoc &dl) { 8178 unsigned OpNum = (PFEntry >> 26) & 0x0F; 8179 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 8180 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 8181 8182 enum { 8183 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 8184 OP_VMRGHW, 8185 OP_VMRGLW, 8186 OP_VSPLTISW0, 8187 OP_VSPLTISW1, 8188 OP_VSPLTISW2, 8189 OP_VSPLTISW3, 8190 OP_VSLDOI4, 8191 OP_VSLDOI8, 8192 OP_VSLDOI12 8193 }; 8194 8195 if (OpNum == OP_COPY) { 8196 if (LHSID == (1*9+2)*9+3) return LHS; 8197 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 8198 return RHS; 8199 } 8200 8201 SDValue OpLHS, OpRHS; 8202 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 8203 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 8204 8205 int ShufIdxs[16]; 8206 switch (OpNum) { 8207 default: llvm_unreachable("Unknown i32 permute!"); 8208 case OP_VMRGHW: 8209 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 8210 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 8211 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 8212 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 8213 break; 8214 case OP_VMRGLW: 8215 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 8216 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 8217 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 8218 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 8219 break; 8220 case OP_VSPLTISW0: 8221 for (unsigned i = 0; i != 16; ++i) 8222 ShufIdxs[i] = (i&3)+0; 8223 break; 8224 case OP_VSPLTISW1: 8225 for (unsigned i = 0; i != 16; ++i) 8226 ShufIdxs[i] = (i&3)+4; 8227 break; 8228 case OP_VSPLTISW2: 8229 for (unsigned i = 0; i != 16; ++i) 8230 ShufIdxs[i] = (i&3)+8; 8231 break; 8232 case OP_VSPLTISW3: 8233 for (unsigned i = 0; i != 16; ++i) 8234 ShufIdxs[i] = (i&3)+12; 8235 break; 8236 case OP_VSLDOI4: 8237 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 8238 case OP_VSLDOI8: 8239 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 8240 case OP_VSLDOI12: 8241 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 8242 } 8243 EVT VT = OpLHS.getValueType(); 8244 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 8245 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 8246 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 8247 return DAG.getNode(ISD::BITCAST, dl, VT, T); 8248 } 8249 8250 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled 8251 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default 8252 /// SDValue. 8253 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N, 8254 SelectionDAG &DAG) const { 8255 const unsigned BytesInVector = 16; 8256 bool IsLE = Subtarget.isLittleEndian(); 8257 SDLoc dl(N); 8258 SDValue V1 = N->getOperand(0); 8259 SDValue V2 = N->getOperand(1); 8260 unsigned ShiftElts = 0, InsertAtByte = 0; 8261 bool Swap = false; 8262 8263 // Shifts required to get the byte we want at element 7. 8264 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1, 8265 0, 15, 14, 13, 12, 11, 10, 9}; 8266 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0, 8267 1, 2, 3, 4, 5, 6, 7, 8}; 8268 8269 ArrayRef<int> Mask = N->getMask(); 8270 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; 8271 8272 // For each mask element, find out if we're just inserting something 8273 // from V2 into V1 or vice versa. 8274 // Possible permutations inserting an element from V2 into V1: 8275 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 8276 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 8277 // ... 8278 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X 8279 // Inserting from V1 into V2 will be similar, except mask range will be 8280 // [16,31]. 8281 8282 bool FoundCandidate = false; 8283 // If both vector operands for the shuffle are the same vector, the mask 8284 // will contain only elements from the first one and the second one will be 8285 // undef. 8286 unsigned VINSERTBSrcElem = IsLE ? 8 : 7; 8287 // Go through the mask of half-words to find an element that's being moved 8288 // from one vector to the other. 8289 for (unsigned i = 0; i < BytesInVector; ++i) { 8290 unsigned CurrentElement = Mask[i]; 8291 // If 2nd operand is undefined, we should only look for element 7 in the 8292 // Mask. 8293 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem) 8294 continue; 8295 8296 bool OtherElementsInOrder = true; 8297 // Examine the other elements in the Mask to see if they're in original 8298 // order. 8299 for (unsigned j = 0; j < BytesInVector; ++j) { 8300 if (j == i) 8301 continue; 8302 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be 8303 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined, 8304 // in which we always assume we're always picking from the 1st operand. 8305 int MaskOffset = 8306 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0; 8307 if (Mask[j] != OriginalOrder[j] + MaskOffset) { 8308 OtherElementsInOrder = false; 8309 break; 8310 } 8311 } 8312 // If other elements are in original order, we record the number of shifts 8313 // we need to get the element we want into element 7. Also record which byte 8314 // in the vector we should insert into. 8315 if (OtherElementsInOrder) { 8316 // If 2nd operand is undefined, we assume no shifts and no swapping. 8317 if (V2.isUndef()) { 8318 ShiftElts = 0; 8319 Swap = false; 8320 } else { 8321 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4. 8322 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF] 8323 : BigEndianShifts[CurrentElement & 0xF]; 8324 Swap = CurrentElement < BytesInVector; 8325 } 8326 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i; 8327 FoundCandidate = true; 8328 break; 8329 } 8330 } 8331 8332 if (!FoundCandidate) 8333 return SDValue(); 8334 8335 // Candidate found, construct the proper SDAG sequence with VINSERTB, 8336 // optionally with VECSHL if shift is required. 8337 if (Swap) 8338 std::swap(V1, V2); 8339 if (V2.isUndef()) 8340 V2 = V1; 8341 if (ShiftElts) { 8342 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2, 8343 DAG.getConstant(ShiftElts, dl, MVT::i32)); 8344 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl, 8345 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 8346 } 8347 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2, 8348 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 8349 } 8350 8351 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled 8352 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default 8353 /// SDValue. 8354 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N, 8355 SelectionDAG &DAG) const { 8356 const unsigned NumHalfWords = 8; 8357 const unsigned BytesInVector = NumHalfWords * 2; 8358 // Check that the shuffle is on half-words. 8359 if (!isNByteElemShuffleMask(N, 2, 1)) 8360 return SDValue(); 8361 8362 bool IsLE = Subtarget.isLittleEndian(); 8363 SDLoc dl(N); 8364 SDValue V1 = N->getOperand(0); 8365 SDValue V2 = N->getOperand(1); 8366 unsigned ShiftElts = 0, InsertAtByte = 0; 8367 bool Swap = false; 8368 8369 // Shifts required to get the half-word we want at element 3. 8370 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5}; 8371 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4}; 8372 8373 uint32_t Mask = 0; 8374 uint32_t OriginalOrderLow = 0x1234567; 8375 uint32_t OriginalOrderHigh = 0x89ABCDEF; 8376 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a 8377 // 32-bit space, only need 4-bit nibbles per element. 8378 for (unsigned i = 0; i < NumHalfWords; ++i) { 8379 unsigned MaskShift = (NumHalfWords - 1 - i) * 4; 8380 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift); 8381 } 8382 8383 // For each mask element, find out if we're just inserting something 8384 // from V2 into V1 or vice versa. Possible permutations inserting an element 8385 // from V2 into V1: 8386 // X, 1, 2, 3, 4, 5, 6, 7 8387 // 0, X, 2, 3, 4, 5, 6, 7 8388 // 0, 1, X, 3, 4, 5, 6, 7 8389 // 0, 1, 2, X, 4, 5, 6, 7 8390 // 0, 1, 2, 3, X, 5, 6, 7 8391 // 0, 1, 2, 3, 4, X, 6, 7 8392 // 0, 1, 2, 3, 4, 5, X, 7 8393 // 0, 1, 2, 3, 4, 5, 6, X 8394 // Inserting from V1 into V2 will be similar, except mask range will be [8,15]. 8395 8396 bool FoundCandidate = false; 8397 // Go through the mask of half-words to find an element that's being moved 8398 // from one vector to the other. 8399 for (unsigned i = 0; i < NumHalfWords; ++i) { 8400 unsigned MaskShift = (NumHalfWords - 1 - i) * 4; 8401 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF; 8402 uint32_t MaskOtherElts = ~(0xF << MaskShift); 8403 uint32_t TargetOrder = 0x0; 8404 8405 // If both vector operands for the shuffle are the same vector, the mask 8406 // will contain only elements from the first one and the second one will be 8407 // undef. 8408 if (V2.isUndef()) { 8409 ShiftElts = 0; 8410 unsigned VINSERTHSrcElem = IsLE ? 4 : 3; 8411 TargetOrder = OriginalOrderLow; 8412 Swap = false; 8413 // Skip if not the correct element or mask of other elements don't equal 8414 // to our expected order. 8415 if (MaskOneElt == VINSERTHSrcElem && 8416 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) { 8417 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2; 8418 FoundCandidate = true; 8419 break; 8420 } 8421 } else { // If both operands are defined. 8422 // Target order is [8,15] if the current mask is between [0,7]. 8423 TargetOrder = 8424 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow; 8425 // Skip if mask of other elements don't equal our expected order. 8426 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) { 8427 // We only need the last 3 bits for the number of shifts. 8428 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7] 8429 : BigEndianShifts[MaskOneElt & 0x7]; 8430 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2; 8431 Swap = MaskOneElt < NumHalfWords; 8432 FoundCandidate = true; 8433 break; 8434 } 8435 } 8436 } 8437 8438 if (!FoundCandidate) 8439 return SDValue(); 8440 8441 // Candidate found, construct the proper SDAG sequence with VINSERTH, 8442 // optionally with VECSHL if shift is required. 8443 if (Swap) 8444 std::swap(V1, V2); 8445 if (V2.isUndef()) 8446 V2 = V1; 8447 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); 8448 if (ShiftElts) { 8449 // Double ShiftElts because we're left shifting on v16i8 type. 8450 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2, 8451 DAG.getConstant(2 * ShiftElts, dl, MVT::i32)); 8452 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl); 8453 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2, 8454 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 8455 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); 8456 } 8457 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2); 8458 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2, 8459 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 8460 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); 8461 } 8462 8463 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 8464 /// is a shuffle we can handle in a single instruction, return it. Otherwise, 8465 /// return the code it can be lowered into. Worst case, it can always be 8466 /// lowered into a vperm. 8467 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 8468 SelectionDAG &DAG) const { 8469 SDLoc dl(Op); 8470 SDValue V1 = Op.getOperand(0); 8471 SDValue V2 = Op.getOperand(1); 8472 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 8473 EVT VT = Op.getValueType(); 8474 bool isLittleEndian = Subtarget.isLittleEndian(); 8475 8476 unsigned ShiftElts, InsertAtByte; 8477 bool Swap = false; 8478 if (Subtarget.hasP9Vector() && 8479 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, 8480 isLittleEndian)) { 8481 if (Swap) 8482 std::swap(V1, V2); 8483 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); 8484 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2); 8485 if (ShiftElts) { 8486 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2, 8487 DAG.getConstant(ShiftElts, dl, MVT::i32)); 8488 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl, 8489 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 8490 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); 8491 } 8492 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2, 8493 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 8494 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); 8495 } 8496 8497 if (Subtarget.hasP9Altivec()) { 8498 SDValue NewISDNode; 8499 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG))) 8500 return NewISDNode; 8501 8502 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG))) 8503 return NewISDNode; 8504 } 8505 8506 if (Subtarget.hasVSX() && 8507 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { 8508 if (Swap) 8509 std::swap(V1, V2); 8510 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); 8511 SDValue Conv2 = 8512 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2); 8513 8514 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2, 8515 DAG.getConstant(ShiftElts, dl, MVT::i32)); 8516 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl); 8517 } 8518 8519 if (Subtarget.hasVSX() && 8520 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { 8521 if (Swap) 8522 std::swap(V1, V2); 8523 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); 8524 SDValue Conv2 = 8525 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2); 8526 8527 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2, 8528 DAG.getConstant(ShiftElts, dl, MVT::i32)); 8529 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI); 8530 } 8531 8532 if (Subtarget.hasP9Vector()) { 8533 if (PPC::isXXBRHShuffleMask(SVOp)) { 8534 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); 8535 SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv); 8536 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord); 8537 } else if (PPC::isXXBRWShuffleMask(SVOp)) { 8538 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); 8539 SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv); 8540 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord); 8541 } else if (PPC::isXXBRDShuffleMask(SVOp)) { 8542 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); 8543 SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv); 8544 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord); 8545 } else if (PPC::isXXBRQShuffleMask(SVOp)) { 8546 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1); 8547 SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv); 8548 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord); 8549 } 8550 } 8551 8552 if (Subtarget.hasVSX()) { 8553 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { 8554 int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); 8555 8556 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); 8557 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, 8558 DAG.getConstant(SplatIdx, dl, MVT::i32)); 8559 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat); 8560 } 8561 8562 // Left shifts of 8 bytes are actually swaps. Convert accordingly. 8563 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) { 8564 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); 8565 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv); 8566 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap); 8567 } 8568 } 8569 8570 if (Subtarget.hasQPX()) { 8571 if (VT.getVectorNumElements() != 4) 8572 return SDValue(); 8573 8574 if (V2.isUndef()) V2 = V1; 8575 8576 int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp); 8577 if (AlignIdx != -1) { 8578 return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2, 8579 DAG.getConstant(AlignIdx, dl, MVT::i32)); 8580 } else if (SVOp->isSplat()) { 8581 int SplatIdx = SVOp->getSplatIndex(); 8582 if (SplatIdx >= 4) { 8583 std::swap(V1, V2); 8584 SplatIdx -= 4; 8585 } 8586 8587 return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1, 8588 DAG.getConstant(SplatIdx, dl, MVT::i32)); 8589 } 8590 8591 // Lower this into a qvgpci/qvfperm pair. 8592 8593 // Compute the qvgpci literal 8594 unsigned idx = 0; 8595 for (unsigned i = 0; i < 4; ++i) { 8596 int m = SVOp->getMaskElt(i); 8597 unsigned mm = m >= 0 ? (unsigned) m : i; 8598 idx |= mm << (3-i)*3; 8599 } 8600 8601 SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64, 8602 DAG.getConstant(idx, dl, MVT::i32)); 8603 return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3); 8604 } 8605 8606 // Cases that are handled by instructions that take permute immediates 8607 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 8608 // selected by the instruction selector. 8609 if (V2.isUndef()) { 8610 if (PPC::isSplatShuffleMask(SVOp, 1) || 8611 PPC::isSplatShuffleMask(SVOp, 2) || 8612 PPC::isSplatShuffleMask(SVOp, 4) || 8613 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || 8614 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || 8615 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || 8616 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || 8617 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || 8618 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || 8619 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || 8620 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || 8621 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || 8622 (Subtarget.hasP8Altivec() && ( 8623 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || 8624 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || 8625 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) { 8626 return Op; 8627 } 8628 } 8629 8630 // Altivec has a variety of "shuffle immediates" that take two vector inputs 8631 // and produce a fixed permutation. If any of these match, do not lower to 8632 // VPERM. 8633 unsigned int ShuffleKind = isLittleEndian ? 2 : 0; 8634 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || 8635 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || 8636 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || 8637 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || 8638 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || 8639 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || 8640 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || 8641 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || 8642 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || 8643 (Subtarget.hasP8Altivec() && ( 8644 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || 8645 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || 8646 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)))) 8647 return Op; 8648 8649 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 8650 // perfect shuffle table to emit an optimal matching sequence. 8651 ArrayRef<int> PermMask = SVOp->getMask(); 8652 8653 unsigned PFIndexes[4]; 8654 bool isFourElementShuffle = true; 8655 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 8656 unsigned EltNo = 8; // Start out undef. 8657 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 8658 if (PermMask[i*4+j] < 0) 8659 continue; // Undef, ignore it. 8660 8661 unsigned ByteSource = PermMask[i*4+j]; 8662 if ((ByteSource & 3) != j) { 8663 isFourElementShuffle = false; 8664 break; 8665 } 8666 8667 if (EltNo == 8) { 8668 EltNo = ByteSource/4; 8669 } else if (EltNo != ByteSource/4) { 8670 isFourElementShuffle = false; 8671 break; 8672 } 8673 } 8674 PFIndexes[i] = EltNo; 8675 } 8676 8677 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 8678 // perfect shuffle vector to determine if it is cost effective to do this as 8679 // discrete instructions, or whether we should use a vperm. 8680 // For now, we skip this for little endian until such time as we have a 8681 // little-endian perfect shuffle table. 8682 if (isFourElementShuffle && !isLittleEndian) { 8683 // Compute the index in the perfect shuffle table. 8684 unsigned PFTableIndex = 8685 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 8686 8687 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 8688 unsigned Cost = (PFEntry >> 30); 8689 8690 // Determining when to avoid vperm is tricky. Many things affect the cost 8691 // of vperm, particularly how many times the perm mask needs to be computed. 8692 // For example, if the perm mask can be hoisted out of a loop or is already 8693 // used (perhaps because there are multiple permutes with the same shuffle 8694 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 8695 // the loop requires an extra register. 8696 // 8697 // As a compromise, we only emit discrete instructions if the shuffle can be 8698 // generated in 3 or fewer operations. When we have loop information 8699 // available, if this block is within a loop, we should avoid using vperm 8700 // for 3-operation perms and use a constant pool load instead. 8701 if (Cost < 3) 8702 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 8703 } 8704 8705 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 8706 // vector that will get spilled to the constant pool. 8707 if (V2.isUndef()) V2 = V1; 8708 8709 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 8710 // that it is in input element units, not in bytes. Convert now. 8711 8712 // For little endian, the order of the input vectors is reversed, and 8713 // the permutation mask is complemented with respect to 31. This is 8714 // necessary to produce proper semantics with the big-endian-biased vperm 8715 // instruction. 8716 EVT EltVT = V1.getValueType().getVectorElementType(); 8717 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 8718 8719 SmallVector<SDValue, 16> ResultMask; 8720 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 8721 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 8722 8723 for (unsigned j = 0; j != BytesPerElement; ++j) 8724 if (isLittleEndian) 8725 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j), 8726 dl, MVT::i32)); 8727 else 8728 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl, 8729 MVT::i32)); 8730 } 8731 8732 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask); 8733 if (isLittleEndian) 8734 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 8735 V2, V1, VPermMask); 8736 else 8737 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 8738 V1, V2, VPermMask); 8739 } 8740 8741 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a 8742 /// vector comparison. If it is, return true and fill in Opc/isDot with 8743 /// information about the intrinsic. 8744 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, 8745 bool &isDot, const PPCSubtarget &Subtarget) { 8746 unsigned IntrinsicID = 8747 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 8748 CompareOpc = -1; 8749 isDot = false; 8750 switch (IntrinsicID) { 8751 default: 8752 return false; 8753 // Comparison predicates. 8754 case Intrinsic::ppc_altivec_vcmpbfp_p: 8755 CompareOpc = 966; 8756 isDot = true; 8757 break; 8758 case Intrinsic::ppc_altivec_vcmpeqfp_p: 8759 CompareOpc = 198; 8760 isDot = true; 8761 break; 8762 case Intrinsic::ppc_altivec_vcmpequb_p: 8763 CompareOpc = 6; 8764 isDot = true; 8765 break; 8766 case Intrinsic::ppc_altivec_vcmpequh_p: 8767 CompareOpc = 70; 8768 isDot = true; 8769 break; 8770 case Intrinsic::ppc_altivec_vcmpequw_p: 8771 CompareOpc = 134; 8772 isDot = true; 8773 break; 8774 case Intrinsic::ppc_altivec_vcmpequd_p: 8775 if (Subtarget.hasP8Altivec()) { 8776 CompareOpc = 199; 8777 isDot = true; 8778 } else 8779 return false; 8780 break; 8781 case Intrinsic::ppc_altivec_vcmpneb_p: 8782 case Intrinsic::ppc_altivec_vcmpneh_p: 8783 case Intrinsic::ppc_altivec_vcmpnew_p: 8784 case Intrinsic::ppc_altivec_vcmpnezb_p: 8785 case Intrinsic::ppc_altivec_vcmpnezh_p: 8786 case Intrinsic::ppc_altivec_vcmpnezw_p: 8787 if (Subtarget.hasP9Altivec()) { 8788 switch (IntrinsicID) { 8789 default: 8790 llvm_unreachable("Unknown comparison intrinsic."); 8791 case Intrinsic::ppc_altivec_vcmpneb_p: 8792 CompareOpc = 7; 8793 break; 8794 case Intrinsic::ppc_altivec_vcmpneh_p: 8795 CompareOpc = 71; 8796 break; 8797 case Intrinsic::ppc_altivec_vcmpnew_p: 8798 CompareOpc = 135; 8799 break; 8800 case Intrinsic::ppc_altivec_vcmpnezb_p: 8801 CompareOpc = 263; 8802 break; 8803 case Intrinsic::ppc_altivec_vcmpnezh_p: 8804 CompareOpc = 327; 8805 break; 8806 case Intrinsic::ppc_altivec_vcmpnezw_p: 8807 CompareOpc = 391; 8808 break; 8809 } 8810 isDot = true; 8811 } else 8812 return false; 8813 break; 8814 case Intrinsic::ppc_altivec_vcmpgefp_p: 8815 CompareOpc = 454; 8816 isDot = true; 8817 break; 8818 case Intrinsic::ppc_altivec_vcmpgtfp_p: 8819 CompareOpc = 710; 8820 isDot = true; 8821 break; 8822 case Intrinsic::ppc_altivec_vcmpgtsb_p: 8823 CompareOpc = 774; 8824 isDot = true; 8825 break; 8826 case Intrinsic::ppc_altivec_vcmpgtsh_p: 8827 CompareOpc = 838; 8828 isDot = true; 8829 break; 8830 case Intrinsic::ppc_altivec_vcmpgtsw_p: 8831 CompareOpc = 902; 8832 isDot = true; 8833 break; 8834 case Intrinsic::ppc_altivec_vcmpgtsd_p: 8835 if (Subtarget.hasP8Altivec()) { 8836 CompareOpc = 967; 8837 isDot = true; 8838 } else 8839 return false; 8840 break; 8841 case Intrinsic::ppc_altivec_vcmpgtub_p: 8842 CompareOpc = 518; 8843 isDot = true; 8844 break; 8845 case Intrinsic::ppc_altivec_vcmpgtuh_p: 8846 CompareOpc = 582; 8847 isDot = true; 8848 break; 8849 case Intrinsic::ppc_altivec_vcmpgtuw_p: 8850 CompareOpc = 646; 8851 isDot = true; 8852 break; 8853 case Intrinsic::ppc_altivec_vcmpgtud_p: 8854 if (Subtarget.hasP8Altivec()) { 8855 CompareOpc = 711; 8856 isDot = true; 8857 } else 8858 return false; 8859 break; 8860 8861 // VSX predicate comparisons use the same infrastructure 8862 case Intrinsic::ppc_vsx_xvcmpeqdp_p: 8863 case Intrinsic::ppc_vsx_xvcmpgedp_p: 8864 case Intrinsic::ppc_vsx_xvcmpgtdp_p: 8865 case Intrinsic::ppc_vsx_xvcmpeqsp_p: 8866 case Intrinsic::ppc_vsx_xvcmpgesp_p: 8867 case Intrinsic::ppc_vsx_xvcmpgtsp_p: 8868 if (Subtarget.hasVSX()) { 8869 switch (IntrinsicID) { 8870 case Intrinsic::ppc_vsx_xvcmpeqdp_p: 8871 CompareOpc = 99; 8872 break; 8873 case Intrinsic::ppc_vsx_xvcmpgedp_p: 8874 CompareOpc = 115; 8875 break; 8876 case Intrinsic::ppc_vsx_xvcmpgtdp_p: 8877 CompareOpc = 107; 8878 break; 8879 case Intrinsic::ppc_vsx_xvcmpeqsp_p: 8880 CompareOpc = 67; 8881 break; 8882 case Intrinsic::ppc_vsx_xvcmpgesp_p: 8883 CompareOpc = 83; 8884 break; 8885 case Intrinsic::ppc_vsx_xvcmpgtsp_p: 8886 CompareOpc = 75; 8887 break; 8888 } 8889 isDot = true; 8890 } else 8891 return false; 8892 break; 8893 8894 // Normal Comparisons. 8895 case Intrinsic::ppc_altivec_vcmpbfp: 8896 CompareOpc = 966; 8897 break; 8898 case Intrinsic::ppc_altivec_vcmpeqfp: 8899 CompareOpc = 198; 8900 break; 8901 case Intrinsic::ppc_altivec_vcmpequb: 8902 CompareOpc = 6; 8903 break; 8904 case Intrinsic::ppc_altivec_vcmpequh: 8905 CompareOpc = 70; 8906 break; 8907 case Intrinsic::ppc_altivec_vcmpequw: 8908 CompareOpc = 134; 8909 break; 8910 case Intrinsic::ppc_altivec_vcmpequd: 8911 if (Subtarget.hasP8Altivec()) 8912 CompareOpc = 199; 8913 else 8914 return false; 8915 break; 8916 case Intrinsic::ppc_altivec_vcmpneb: 8917 case Intrinsic::ppc_altivec_vcmpneh: 8918 case Intrinsic::ppc_altivec_vcmpnew: 8919 case Intrinsic::ppc_altivec_vcmpnezb: 8920 case Intrinsic::ppc_altivec_vcmpnezh: 8921 case Intrinsic::ppc_altivec_vcmpnezw: 8922 if (Subtarget.hasP9Altivec()) 8923 switch (IntrinsicID) { 8924 default: 8925 llvm_unreachable("Unknown comparison intrinsic."); 8926 case Intrinsic::ppc_altivec_vcmpneb: 8927 CompareOpc = 7; 8928 break; 8929 case Intrinsic::ppc_altivec_vcmpneh: 8930 CompareOpc = 71; 8931 break; 8932 case Intrinsic::ppc_altivec_vcmpnew: 8933 CompareOpc = 135; 8934 break; 8935 case Intrinsic::ppc_altivec_vcmpnezb: 8936 CompareOpc = 263; 8937 break; 8938 case Intrinsic::ppc_altivec_vcmpnezh: 8939 CompareOpc = 327; 8940 break; 8941 case Intrinsic::ppc_altivec_vcmpnezw: 8942 CompareOpc = 391; 8943 break; 8944 } 8945 else 8946 return false; 8947 break; 8948 case Intrinsic::ppc_altivec_vcmpgefp: 8949 CompareOpc = 454; 8950 break; 8951 case Intrinsic::ppc_altivec_vcmpgtfp: 8952 CompareOpc = 710; 8953 break; 8954 case Intrinsic::ppc_altivec_vcmpgtsb: 8955 CompareOpc = 774; 8956 break; 8957 case Intrinsic::ppc_altivec_vcmpgtsh: 8958 CompareOpc = 838; 8959 break; 8960 case Intrinsic::ppc_altivec_vcmpgtsw: 8961 CompareOpc = 902; 8962 break; 8963 case Intrinsic::ppc_altivec_vcmpgtsd: 8964 if (Subtarget.hasP8Altivec()) 8965 CompareOpc = 967; 8966 else 8967 return false; 8968 break; 8969 case Intrinsic::ppc_altivec_vcmpgtub: 8970 CompareOpc = 518; 8971 break; 8972 case Intrinsic::ppc_altivec_vcmpgtuh: 8973 CompareOpc = 582; 8974 break; 8975 case Intrinsic::ppc_altivec_vcmpgtuw: 8976 CompareOpc = 646; 8977 break; 8978 case Intrinsic::ppc_altivec_vcmpgtud: 8979 if (Subtarget.hasP8Altivec()) 8980 CompareOpc = 711; 8981 else 8982 return false; 8983 break; 8984 } 8985 return true; 8986 } 8987 8988 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 8989 /// lower, do it, otherwise return null. 8990 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 8991 SelectionDAG &DAG) const { 8992 unsigned IntrinsicID = 8993 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 8994 8995 SDLoc dl(Op); 8996 8997 if (IntrinsicID == Intrinsic::thread_pointer) { 8998 // Reads the thread pointer register, used for __builtin_thread_pointer. 8999 if (Subtarget.isPPC64()) 9000 return DAG.getRegister(PPC::X13, MVT::i64); 9001 return DAG.getRegister(PPC::R2, MVT::i32); 9002 } 9003 9004 // If this is a lowered altivec predicate compare, CompareOpc is set to the 9005 // opcode number of the comparison. 9006 int CompareOpc; 9007 bool isDot; 9008 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget)) 9009 return SDValue(); // Don't custom lower most intrinsics. 9010 9011 // If this is a non-dot comparison, make the VCMP node and we are done. 9012 if (!isDot) { 9013 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 9014 Op.getOperand(1), Op.getOperand(2), 9015 DAG.getConstant(CompareOpc, dl, MVT::i32)); 9016 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 9017 } 9018 9019 // Create the PPCISD altivec 'dot' comparison node. 9020 SDValue Ops[] = { 9021 Op.getOperand(2), // LHS 9022 Op.getOperand(3), // RHS 9023 DAG.getConstant(CompareOpc, dl, MVT::i32) 9024 }; 9025 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; 9026 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 9027 9028 // Now that we have the comparison, emit a copy from the CR to a GPR. 9029 // This is flagged to the above dot comparison. 9030 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, 9031 DAG.getRegister(PPC::CR6, MVT::i32), 9032 CompNode.getValue(1)); 9033 9034 // Unpack the result based on how the target uses it. 9035 unsigned BitNo; // Bit # of CR6. 9036 bool InvertBit; // Invert result? 9037 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 9038 default: // Can't happen, don't crash on invalid number though. 9039 case 0: // Return the value of the EQ bit of CR6. 9040 BitNo = 0; InvertBit = false; 9041 break; 9042 case 1: // Return the inverted value of the EQ bit of CR6. 9043 BitNo = 0; InvertBit = true; 9044 break; 9045 case 2: // Return the value of the LT bit of CR6. 9046 BitNo = 2; InvertBit = false; 9047 break; 9048 case 3: // Return the inverted value of the LT bit of CR6. 9049 BitNo = 2; InvertBit = true; 9050 break; 9051 } 9052 9053 // Shift the bit into the low position. 9054 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 9055 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32)); 9056 // Isolate the bit. 9057 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 9058 DAG.getConstant(1, dl, MVT::i32)); 9059 9060 // If we are supposed to, toggle the bit. 9061 if (InvertBit) 9062 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 9063 DAG.getConstant(1, dl, MVT::i32)); 9064 return Flags; 9065 } 9066 9067 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, 9068 SelectionDAG &DAG) const { 9069 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to 9070 // the beginning of the argument list. 9071 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1; 9072 SDLoc DL(Op); 9073 switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) { 9074 case Intrinsic::ppc_cfence: { 9075 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."); 9076 assert(Subtarget.isPPC64() && "Only 64-bit is supported for now."); 9077 return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other, 9078 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 9079 Op.getOperand(ArgStart + 1)), 9080 Op.getOperand(0)), 9081 0); 9082 } 9083 default: 9084 break; 9085 } 9086 return SDValue(); 9087 } 9088 9089 SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const { 9090 // Check for a DIV with the same operands as this REM. 9091 for (auto UI : Op.getOperand(1)->uses()) { 9092 if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) || 9093 (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV)) 9094 if (UI->getOperand(0) == Op.getOperand(0) && 9095 UI->getOperand(1) == Op.getOperand(1)) 9096 return SDValue(); 9097 } 9098 return Op; 9099 } 9100 9101 // Lower scalar BSWAP64 to xxbrd. 9102 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const { 9103 SDLoc dl(Op); 9104 // MTVSRDD 9105 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0), 9106 Op.getOperand(0)); 9107 // XXBRD 9108 Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op); 9109 // MFVSRD 9110 int VectorIndex = 0; 9111 if (Subtarget.isLittleEndian()) 9112 VectorIndex = 1; 9113 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op, 9114 DAG.getTargetConstant(VectorIndex, dl, MVT::i32)); 9115 return Op; 9116 } 9117 9118 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be 9119 // compared to a value that is atomically loaded (atomic loads zero-extend). 9120 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, 9121 SelectionDAG &DAG) const { 9122 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && 9123 "Expecting an atomic compare-and-swap here."); 9124 SDLoc dl(Op); 9125 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode()); 9126 EVT MemVT = AtomicNode->getMemoryVT(); 9127 if (MemVT.getSizeInBits() >= 32) 9128 return Op; 9129 9130 SDValue CmpOp = Op.getOperand(2); 9131 // If this is already correctly zero-extended, leave it alone. 9132 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits()); 9133 if (DAG.MaskedValueIsZero(CmpOp, HighBits)) 9134 return Op; 9135 9136 // Clear the high bits of the compare operand. 9137 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1; 9138 SDValue NewCmpOp = 9139 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp, 9140 DAG.getConstant(MaskVal, dl, MVT::i32)); 9141 9142 // Replace the existing compare operand with the properly zero-extended one. 9143 SmallVector<SDValue, 4> Ops; 9144 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++) 9145 Ops.push_back(AtomicNode->getOperand(i)); 9146 Ops[2] = NewCmpOp; 9147 MachineMemOperand *MMO = AtomicNode->getMemOperand(); 9148 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other); 9149 auto NodeTy = 9150 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16; 9151 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO); 9152 } 9153 9154 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 9155 SelectionDAG &DAG) const { 9156 SDLoc dl(Op); 9157 // Create a stack slot that is 16-byte aligned. 9158 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 9159 int FrameIdx = MFI.CreateStackObject(16, 16, false); 9160 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 9161 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 9162 9163 // Store the input value into Value#0 of the stack slot. 9164 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 9165 MachinePointerInfo()); 9166 // Load it out. 9167 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo()); 9168 } 9169 9170 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, 9171 SelectionDAG &DAG) const { 9172 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && 9173 "Should only be called for ISD::INSERT_VECTOR_ELT"); 9174 9175 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 9176 // We have legal lowering for constant indices but not for variable ones. 9177 if (!C) 9178 return SDValue(); 9179 9180 EVT VT = Op.getValueType(); 9181 SDLoc dl(Op); 9182 SDValue V1 = Op.getOperand(0); 9183 SDValue V2 = Op.getOperand(1); 9184 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types. 9185 if (VT == MVT::v8i16 || VT == MVT::v16i8) { 9186 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2); 9187 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8; 9188 unsigned InsertAtElement = C->getZExtValue(); 9189 unsigned InsertAtByte = InsertAtElement * BytesInEachElement; 9190 if (Subtarget.isLittleEndian()) { 9191 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte; 9192 } 9193 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz, 9194 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 9195 } 9196 return Op; 9197 } 9198 9199 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, 9200 SelectionDAG &DAG) const { 9201 SDLoc dl(Op); 9202 SDNode *N = Op.getNode(); 9203 9204 assert(N->getOperand(0).getValueType() == MVT::v4i1 && 9205 "Unknown extract_vector_elt type"); 9206 9207 SDValue Value = N->getOperand(0); 9208 9209 // The first part of this is like the store lowering except that we don't 9210 // need to track the chain. 9211 9212 // The values are now known to be -1 (false) or 1 (true). To convert this 9213 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 9214 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 9215 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 9216 9217 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to 9218 // understand how to form the extending load. 9219 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 9220 9221 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 9222 9223 // Now convert to an integer and store. 9224 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 9225 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), 9226 Value); 9227 9228 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 9229 int FrameIdx = MFI.CreateStackObject(16, 16, false); 9230 MachinePointerInfo PtrInfo = 9231 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 9232 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 9233 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 9234 9235 SDValue StoreChain = DAG.getEntryNode(); 9236 SDValue Ops[] = {StoreChain, 9237 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), 9238 Value, FIdx}; 9239 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); 9240 9241 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, 9242 dl, VTs, Ops, MVT::v4i32, PtrInfo); 9243 9244 // Extract the value requested. 9245 unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 9246 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 9247 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 9248 9249 SDValue IntVal = 9250 DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset)); 9251 9252 if (!Subtarget.useCRBits()) 9253 return IntVal; 9254 9255 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal); 9256 } 9257 9258 /// Lowering for QPX v4i1 loads 9259 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, 9260 SelectionDAG &DAG) const { 9261 SDLoc dl(Op); 9262 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode()); 9263 SDValue LoadChain = LN->getChain(); 9264 SDValue BasePtr = LN->getBasePtr(); 9265 9266 if (Op.getValueType() == MVT::v4f64 || 9267 Op.getValueType() == MVT::v4f32) { 9268 EVT MemVT = LN->getMemoryVT(); 9269 unsigned Alignment = LN->getAlignment(); 9270 9271 // If this load is properly aligned, then it is legal. 9272 if (Alignment >= MemVT.getStoreSize()) 9273 return Op; 9274 9275 EVT ScalarVT = Op.getValueType().getScalarType(), 9276 ScalarMemVT = MemVT.getScalarType(); 9277 unsigned Stride = ScalarMemVT.getStoreSize(); 9278 9279 SDValue Vals[4], LoadChains[4]; 9280 for (unsigned Idx = 0; Idx < 4; ++Idx) { 9281 SDValue Load; 9282 if (ScalarVT != ScalarMemVT) 9283 Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain, 9284 BasePtr, 9285 LN->getPointerInfo().getWithOffset(Idx * Stride), 9286 ScalarMemVT, MinAlign(Alignment, Idx * Stride), 9287 LN->getMemOperand()->getFlags(), LN->getAAInfo()); 9288 else 9289 Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr, 9290 LN->getPointerInfo().getWithOffset(Idx * Stride), 9291 MinAlign(Alignment, Idx * Stride), 9292 LN->getMemOperand()->getFlags(), LN->getAAInfo()); 9293 9294 if (Idx == 0 && LN->isIndexed()) { 9295 assert(LN->getAddressingMode() == ISD::PRE_INC && 9296 "Unknown addressing mode on vector load"); 9297 Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(), 9298 LN->getAddressingMode()); 9299 } 9300 9301 Vals[Idx] = Load; 9302 LoadChains[Idx] = Load.getValue(1); 9303 9304 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 9305 DAG.getConstant(Stride, dl, 9306 BasePtr.getValueType())); 9307 } 9308 9309 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 9310 SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals); 9311 9312 if (LN->isIndexed()) { 9313 SDValue RetOps[] = { Value, Vals[0].getValue(1), TF }; 9314 return DAG.getMergeValues(RetOps, dl); 9315 } 9316 9317 SDValue RetOps[] = { Value, TF }; 9318 return DAG.getMergeValues(RetOps, dl); 9319 } 9320 9321 assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower"); 9322 assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported"); 9323 9324 // To lower v4i1 from a byte array, we load the byte elements of the 9325 // vector and then reuse the BUILD_VECTOR logic. 9326 9327 SDValue VectElmts[4], VectElmtChains[4]; 9328 for (unsigned i = 0; i < 4; ++i) { 9329 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); 9330 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); 9331 9332 VectElmts[i] = DAG.getExtLoad( 9333 ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx, 9334 LN->getPointerInfo().getWithOffset(i), MVT::i8, 9335 /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo()); 9336 VectElmtChains[i] = VectElmts[i].getValue(1); 9337 } 9338 9339 LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains); 9340 SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts); 9341 9342 SDValue RVals[] = { Value, LoadChain }; 9343 return DAG.getMergeValues(RVals, dl); 9344 } 9345 9346 /// Lowering for QPX v4i1 stores 9347 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, 9348 SelectionDAG &DAG) const { 9349 SDLoc dl(Op); 9350 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode()); 9351 SDValue StoreChain = SN->getChain(); 9352 SDValue BasePtr = SN->getBasePtr(); 9353 SDValue Value = SN->getValue(); 9354 9355 if (Value.getValueType() == MVT::v4f64 || 9356 Value.getValueType() == MVT::v4f32) { 9357 EVT MemVT = SN->getMemoryVT(); 9358 unsigned Alignment = SN->getAlignment(); 9359 9360 // If this store is properly aligned, then it is legal. 9361 if (Alignment >= MemVT.getStoreSize()) 9362 return Op; 9363 9364 EVT ScalarVT = Value.getValueType().getScalarType(), 9365 ScalarMemVT = MemVT.getScalarType(); 9366 unsigned Stride = ScalarMemVT.getStoreSize(); 9367 9368 SDValue Stores[4]; 9369 for (unsigned Idx = 0; Idx < 4; ++Idx) { 9370 SDValue Ex = DAG.getNode( 9371 ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, 9372 DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout()))); 9373 SDValue Store; 9374 if (ScalarVT != ScalarMemVT) 9375 Store = 9376 DAG.getTruncStore(StoreChain, dl, Ex, BasePtr, 9377 SN->getPointerInfo().getWithOffset(Idx * Stride), 9378 ScalarMemVT, MinAlign(Alignment, Idx * Stride), 9379 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 9380 else 9381 Store = DAG.getStore(StoreChain, dl, Ex, BasePtr, 9382 SN->getPointerInfo().getWithOffset(Idx * Stride), 9383 MinAlign(Alignment, Idx * Stride), 9384 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 9385 9386 if (Idx == 0 && SN->isIndexed()) { 9387 assert(SN->getAddressingMode() == ISD::PRE_INC && 9388 "Unknown addressing mode on vector store"); 9389 Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(), 9390 SN->getAddressingMode()); 9391 } 9392 9393 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 9394 DAG.getConstant(Stride, dl, 9395 BasePtr.getValueType())); 9396 Stores[Idx] = Store; 9397 } 9398 9399 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 9400 9401 if (SN->isIndexed()) { 9402 SDValue RetOps[] = { TF, Stores[0].getValue(1) }; 9403 return DAG.getMergeValues(RetOps, dl); 9404 } 9405 9406 return TF; 9407 } 9408 9409 assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported"); 9410 assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower"); 9411 9412 // The values are now known to be -1 (false) or 1 (true). To convert this 9413 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 9414 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 9415 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 9416 9417 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to 9418 // understand how to form the extending load. 9419 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 9420 9421 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 9422 9423 // Now convert to an integer and store. 9424 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 9425 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), 9426 Value); 9427 9428 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 9429 int FrameIdx = MFI.CreateStackObject(16, 16, false); 9430 MachinePointerInfo PtrInfo = 9431 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 9432 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 9433 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 9434 9435 SDValue Ops[] = {StoreChain, 9436 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), 9437 Value, FIdx}; 9438 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); 9439 9440 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, 9441 dl, VTs, Ops, MVT::v4i32, PtrInfo); 9442 9443 // Move data into the byte array. 9444 SDValue Loads[4], LoadChains[4]; 9445 for (unsigned i = 0; i < 4; ++i) { 9446 unsigned Offset = 4*i; 9447 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 9448 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 9449 9450 Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, 9451 PtrInfo.getWithOffset(Offset)); 9452 LoadChains[i] = Loads[i].getValue(1); 9453 } 9454 9455 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 9456 9457 SDValue Stores[4]; 9458 for (unsigned i = 0; i < 4; ++i) { 9459 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); 9460 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); 9461 9462 Stores[i] = DAG.getTruncStore( 9463 StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i), 9464 MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(), 9465 SN->getAAInfo()); 9466 } 9467 9468 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 9469 9470 return StoreChain; 9471 } 9472 9473 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 9474 SDLoc dl(Op); 9475 if (Op.getValueType() == MVT::v4i32) { 9476 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 9477 9478 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 9479 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 9480 9481 SDValue RHSSwap = // = vrlw RHS, 16 9482 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 9483 9484 // Shrinkify inputs to v8i16. 9485 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 9486 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 9487 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 9488 9489 // Low parts multiplied together, generating 32-bit results (we ignore the 9490 // top parts). 9491 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 9492 LHS, RHS, DAG, dl, MVT::v4i32); 9493 9494 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 9495 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 9496 // Shift the high parts up 16 bits. 9497 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 9498 Neg16, DAG, dl); 9499 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 9500 } else if (Op.getValueType() == MVT::v8i16) { 9501 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 9502 9503 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 9504 9505 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 9506 LHS, RHS, Zero, DAG, dl); 9507 } else if (Op.getValueType() == MVT::v16i8) { 9508 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 9509 bool isLittleEndian = Subtarget.isLittleEndian(); 9510 9511 // Multiply the even 8-bit parts, producing 16-bit sums. 9512 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 9513 LHS, RHS, DAG, dl, MVT::v8i16); 9514 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 9515 9516 // Multiply the odd 8-bit parts, producing 16-bit sums. 9517 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 9518 LHS, RHS, DAG, dl, MVT::v8i16); 9519 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 9520 9521 // Merge the results together. Because vmuleub and vmuloub are 9522 // instructions with a big-endian bias, we must reverse the 9523 // element numbering and reverse the meaning of "odd" and "even" 9524 // when generating little endian code. 9525 int Ops[16]; 9526 for (unsigned i = 0; i != 8; ++i) { 9527 if (isLittleEndian) { 9528 Ops[i*2 ] = 2*i; 9529 Ops[i*2+1] = 2*i+16; 9530 } else { 9531 Ops[i*2 ] = 2*i+1; 9532 Ops[i*2+1] = 2*i+1+16; 9533 } 9534 } 9535 if (isLittleEndian) 9536 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); 9537 else 9538 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 9539 } else { 9540 llvm_unreachable("Unknown mul to lower!"); 9541 } 9542 } 9543 9544 SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { 9545 9546 assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS"); 9547 9548 EVT VT = Op.getValueType(); 9549 assert(VT.isVector() && 9550 "Only set vector abs as custom, scalar abs shouldn't reach here!"); 9551 assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || 9552 VT == MVT::v16i8) && 9553 "Unexpected vector element type!"); 9554 assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && 9555 "Current subtarget doesn't support smax v2i64!"); 9556 9557 // For vector abs, it can be lowered to: 9558 // abs x 9559 // ==> 9560 // y = -x 9561 // smax(x, y) 9562 9563 SDLoc dl(Op); 9564 SDValue X = Op.getOperand(0); 9565 SDValue Zero = DAG.getConstant(0, dl, VT); 9566 SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X); 9567 9568 // SMAX patch https://reviews.llvm.org/D47332 9569 // hasn't landed yet, so use intrinsic first here. 9570 // TODO: Should use SMAX directly once SMAX patch landed 9571 Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw; 9572 if (VT == MVT::v2i64) 9573 BifID = Intrinsic::ppc_altivec_vmaxsd; 9574 else if (VT == MVT::v8i16) 9575 BifID = Intrinsic::ppc_altivec_vmaxsh; 9576 else if (VT == MVT::v16i8) 9577 BifID = Intrinsic::ppc_altivec_vmaxsb; 9578 9579 return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT); 9580 } 9581 9582 /// LowerOperation - Provide custom lowering hooks for some operations. 9583 /// 9584 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 9585 switch (Op.getOpcode()) { 9586 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 9587 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 9588 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 9589 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 9590 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 9591 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 9592 case ISD::SETCC: return LowerSETCC(Op, DAG); 9593 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 9594 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 9595 9596 // Variable argument lowering. 9597 case ISD::VASTART: return LowerVASTART(Op, DAG); 9598 case ISD::VAARG: return LowerVAARG(Op, DAG); 9599 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 9600 9601 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG); 9602 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 9603 case ISD::GET_DYNAMIC_AREA_OFFSET: 9604 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); 9605 9606 // Exception handling lowering. 9607 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG); 9608 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); 9609 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); 9610 9611 case ISD::LOAD: return LowerLOAD(Op, DAG); 9612 case ISD::STORE: return LowerSTORE(Op, DAG); 9613 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); 9614 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 9615 case ISD::FP_TO_UINT: 9616 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op)); 9617 case ISD::UINT_TO_FP: 9618 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 9619 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 9620 9621 // Lower 64-bit shifts. 9622 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 9623 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 9624 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 9625 9626 // Vector-related lowering. 9627 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 9628 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 9629 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 9630 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 9631 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 9632 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 9633 case ISD::MUL: return LowerMUL(Op, DAG); 9634 case ISD::ABS: return LowerABS(Op, DAG); 9635 9636 // For counter-based loop handling. 9637 case ISD::INTRINSIC_W_CHAIN: return SDValue(); 9638 9639 case ISD::BITCAST: return LowerBITCAST(Op, DAG); 9640 9641 // Frame & Return address. 9642 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 9643 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 9644 9645 case ISD::INTRINSIC_VOID: 9646 return LowerINTRINSIC_VOID(Op, DAG); 9647 case ISD::SREM: 9648 case ISD::UREM: 9649 return LowerREM(Op, DAG); 9650 case ISD::BSWAP: 9651 return LowerBSWAP(Op, DAG); 9652 case ISD::ATOMIC_CMP_SWAP: 9653 return LowerATOMIC_CMP_SWAP(Op, DAG); 9654 } 9655 } 9656 9657 void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 9658 SmallVectorImpl<SDValue>&Results, 9659 SelectionDAG &DAG) const { 9660 SDLoc dl(N); 9661 switch (N->getOpcode()) { 9662 default: 9663 llvm_unreachable("Do not know how to custom type legalize this operation!"); 9664 case ISD::READCYCLECOUNTER: { 9665 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 9666 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); 9667 9668 Results.push_back(RTB); 9669 Results.push_back(RTB.getValue(1)); 9670 Results.push_back(RTB.getValue(2)); 9671 break; 9672 } 9673 case ISD::INTRINSIC_W_CHAIN: { 9674 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 9675 Intrinsic::ppc_is_decremented_ctr_nonzero) 9676 break; 9677 9678 assert(N->getValueType(0) == MVT::i1 && 9679 "Unexpected result type for CTR decrement intrinsic"); 9680 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), 9681 N->getValueType(0)); 9682 SDVTList VTs = DAG.getVTList(SVT, MVT::Other); 9683 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), 9684 N->getOperand(1)); 9685 9686 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt)); 9687 Results.push_back(NewInt.getValue(1)); 9688 break; 9689 } 9690 case ISD::VAARG: { 9691 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64()) 9692 return; 9693 9694 EVT VT = N->getValueType(0); 9695 9696 if (VT == MVT::i64) { 9697 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG); 9698 9699 Results.push_back(NewNode); 9700 Results.push_back(NewNode.getValue(1)); 9701 } 9702 return; 9703 } 9704 case ISD::FP_TO_SINT: 9705 case ISD::FP_TO_UINT: 9706 // LowerFP_TO_INT() can only handle f32 and f64. 9707 if (N->getOperand(0).getValueType() == MVT::ppcf128) 9708 return; 9709 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 9710 return; 9711 case ISD::TRUNCATE: { 9712 EVT TrgVT = N->getValueType(0); 9713 if (TrgVT.isVector() && 9714 isOperationCustom(N->getOpcode(), TrgVT) && 9715 N->getOperand(0).getValueType().getSizeInBits() <= 128) 9716 Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG)); 9717 return; 9718 } 9719 case ISD::BITCAST: 9720 // Don't handle bitcast here. 9721 return; 9722 } 9723 } 9724 9725 //===----------------------------------------------------------------------===// 9726 // Other Lowering Code 9727 //===----------------------------------------------------------------------===// 9728 9729 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { 9730 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 9731 Function *Func = Intrinsic::getDeclaration(M, Id); 9732 return Builder.CreateCall(Func, {}); 9733 } 9734 9735 // The mappings for emitLeading/TrailingFence is taken from 9736 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html 9737 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 9738 Instruction *Inst, 9739 AtomicOrdering Ord) const { 9740 if (Ord == AtomicOrdering::SequentiallyConsistent) 9741 return callIntrinsic(Builder, Intrinsic::ppc_sync); 9742 if (isReleaseOrStronger(Ord)) 9743 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 9744 return nullptr; 9745 } 9746 9747 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 9748 Instruction *Inst, 9749 AtomicOrdering Ord) const { 9750 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) { 9751 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and 9752 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html 9753 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. 9754 if (isa<LoadInst>(Inst) && Subtarget.isPPC64()) 9755 return Builder.CreateCall( 9756 Intrinsic::getDeclaration( 9757 Builder.GetInsertBlock()->getParent()->getParent(), 9758 Intrinsic::ppc_cfence, {Inst->getType()}), 9759 {Inst}); 9760 // FIXME: Can use isync for rmw operation. 9761 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 9762 } 9763 return nullptr; 9764 } 9765 9766 MachineBasicBlock * 9767 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, 9768 unsigned AtomicSize, 9769 unsigned BinOpcode, 9770 unsigned CmpOpcode, 9771 unsigned CmpPred) const { 9772 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 9773 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 9774 9775 auto LoadMnemonic = PPC::LDARX; 9776 auto StoreMnemonic = PPC::STDCX; 9777 switch (AtomicSize) { 9778 default: 9779 llvm_unreachable("Unexpected size of atomic entity"); 9780 case 1: 9781 LoadMnemonic = PPC::LBARX; 9782 StoreMnemonic = PPC::STBCX; 9783 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); 9784 break; 9785 case 2: 9786 LoadMnemonic = PPC::LHARX; 9787 StoreMnemonic = PPC::STHCX; 9788 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); 9789 break; 9790 case 4: 9791 LoadMnemonic = PPC::LWARX; 9792 StoreMnemonic = PPC::STWCX; 9793 break; 9794 case 8: 9795 LoadMnemonic = PPC::LDARX; 9796 StoreMnemonic = PPC::STDCX; 9797 break; 9798 } 9799 9800 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 9801 MachineFunction *F = BB->getParent(); 9802 MachineFunction::iterator It = ++BB->getIterator(); 9803 9804 unsigned dest = MI.getOperand(0).getReg(); 9805 unsigned ptrA = MI.getOperand(1).getReg(); 9806 unsigned ptrB = MI.getOperand(2).getReg(); 9807 unsigned incr = MI.getOperand(3).getReg(); 9808 DebugLoc dl = MI.getDebugLoc(); 9809 9810 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 9811 MachineBasicBlock *loop2MBB = 9812 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; 9813 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 9814 F->insert(It, loopMBB); 9815 if (CmpOpcode) 9816 F->insert(It, loop2MBB); 9817 F->insert(It, exitMBB); 9818 exitMBB->splice(exitMBB->begin(), BB, 9819 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9820 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 9821 9822 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9823 unsigned TmpReg = (!BinOpcode) ? incr : 9824 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass 9825 : &PPC::GPRCRegClass); 9826 9827 // thisMBB: 9828 // ... 9829 // fallthrough --> loopMBB 9830 BB->addSuccessor(loopMBB); 9831 9832 // loopMBB: 9833 // l[wd]arx dest, ptr 9834 // add r0, dest, incr 9835 // st[wd]cx. r0, ptr 9836 // bne- loopMBB 9837 // fallthrough --> exitMBB 9838 9839 // For max/min... 9840 // loopMBB: 9841 // l[wd]arx dest, ptr 9842 // cmpl?[wd] incr, dest 9843 // bgt exitMBB 9844 // loop2MBB: 9845 // st[wd]cx. dest, ptr 9846 // bne- loopMBB 9847 // fallthrough --> exitMBB 9848 9849 BB = loopMBB; 9850 BuildMI(BB, dl, TII->get(LoadMnemonic), dest) 9851 .addReg(ptrA).addReg(ptrB); 9852 if (BinOpcode) 9853 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 9854 if (CmpOpcode) { 9855 // Signed comparisons of byte or halfword values must be sign-extended. 9856 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) { 9857 unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 9858 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH), 9859 ExtReg).addReg(dest); 9860 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) 9861 .addReg(incr).addReg(ExtReg); 9862 } else 9863 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) 9864 .addReg(incr).addReg(dest); 9865 9866 BuildMI(BB, dl, TII->get(PPC::BCC)) 9867 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); 9868 BB->addSuccessor(loop2MBB); 9869 BB->addSuccessor(exitMBB); 9870 BB = loop2MBB; 9871 } 9872 BuildMI(BB, dl, TII->get(StoreMnemonic)) 9873 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 9874 BuildMI(BB, dl, TII->get(PPC::BCC)) 9875 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 9876 BB->addSuccessor(loopMBB); 9877 BB->addSuccessor(exitMBB); 9878 9879 // exitMBB: 9880 // ... 9881 BB = exitMBB; 9882 return BB; 9883 } 9884 9885 MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( 9886 MachineInstr &MI, MachineBasicBlock *BB, 9887 bool is8bit, // operation 9888 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const { 9889 // If we support part-word atomic mnemonics, just use them 9890 if (Subtarget.hasPartwordAtomics()) 9891 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode, 9892 CmpPred); 9893 9894 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 9895 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 9896 // In 64 bit mode we have to use 64 bits for addresses, even though the 9897 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 9898 // registers without caring whether they're 32 or 64, but here we're 9899 // doing actual arithmetic on the addresses. 9900 bool is64bit = Subtarget.isPPC64(); 9901 bool isLittleEndian = Subtarget.isLittleEndian(); 9902 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 9903 9904 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 9905 MachineFunction *F = BB->getParent(); 9906 MachineFunction::iterator It = ++BB->getIterator(); 9907 9908 unsigned dest = MI.getOperand(0).getReg(); 9909 unsigned ptrA = MI.getOperand(1).getReg(); 9910 unsigned ptrB = MI.getOperand(2).getReg(); 9911 unsigned incr = MI.getOperand(3).getReg(); 9912 DebugLoc dl = MI.getDebugLoc(); 9913 9914 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 9915 MachineBasicBlock *loop2MBB = 9916 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; 9917 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 9918 F->insert(It, loopMBB); 9919 if (CmpOpcode) 9920 F->insert(It, loop2MBB); 9921 F->insert(It, exitMBB); 9922 exitMBB->splice(exitMBB->begin(), BB, 9923 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9924 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 9925 9926 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9927 const TargetRegisterClass *RC = 9928 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 9929 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; 9930 9931 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 9932 unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC); 9933 unsigned ShiftReg = 9934 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC); 9935 unsigned Incr2Reg = RegInfo.createVirtualRegister(GPRC); 9936 unsigned MaskReg = RegInfo.createVirtualRegister(GPRC); 9937 unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC); 9938 unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC); 9939 unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC); 9940 unsigned Tmp3Reg = RegInfo.createVirtualRegister(GPRC); 9941 unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC); 9942 unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC); 9943 unsigned Ptr1Reg; 9944 unsigned TmpReg = 9945 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC); 9946 9947 // thisMBB: 9948 // ... 9949 // fallthrough --> loopMBB 9950 BB->addSuccessor(loopMBB); 9951 9952 // The 4-byte load must be aligned, while a char or short may be 9953 // anywhere in the word. Hence all this nasty bookkeeping code. 9954 // add ptr1, ptrA, ptrB [copy if ptrA==0] 9955 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 9956 // xori shift, shift1, 24 [16] 9957 // rlwinm ptr, ptr1, 0, 0, 29 9958 // slw incr2, incr, shift 9959 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 9960 // slw mask, mask2, shift 9961 // loopMBB: 9962 // lwarx tmpDest, ptr 9963 // add tmp, tmpDest, incr2 9964 // andc tmp2, tmpDest, mask 9965 // and tmp3, tmp, mask 9966 // or tmp4, tmp3, tmp2 9967 // stwcx. tmp4, ptr 9968 // bne- loopMBB 9969 // fallthrough --> exitMBB 9970 // srw dest, tmpDest, shift 9971 if (ptrA != ZeroReg) { 9972 Ptr1Reg = RegInfo.createVirtualRegister(RC); 9973 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 9974 .addReg(ptrA) 9975 .addReg(ptrB); 9976 } else { 9977 Ptr1Reg = ptrB; 9978 } 9979 // We need use 32-bit subregister to avoid mismatch register class in 64-bit 9980 // mode. 9981 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg) 9982 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0) 9983 .addImm(3) 9984 .addImm(27) 9985 .addImm(is8bit ? 28 : 27); 9986 if (!isLittleEndian) 9987 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg) 9988 .addReg(Shift1Reg) 9989 .addImm(is8bit ? 24 : 16); 9990 if (is64bit) 9991 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 9992 .addReg(Ptr1Reg) 9993 .addImm(0) 9994 .addImm(61); 9995 else 9996 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 9997 .addReg(Ptr1Reg) 9998 .addImm(0) 9999 .addImm(0) 10000 .addImm(29); 10001 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg); 10002 if (is8bit) 10003 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 10004 else { 10005 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 10006 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 10007 .addReg(Mask3Reg) 10008 .addImm(65535); 10009 } 10010 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 10011 .addReg(Mask2Reg) 10012 .addReg(ShiftReg); 10013 10014 BB = loopMBB; 10015 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 10016 .addReg(ZeroReg) 10017 .addReg(PtrReg); 10018 if (BinOpcode) 10019 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 10020 .addReg(Incr2Reg) 10021 .addReg(TmpDestReg); 10022 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg) 10023 .addReg(TmpDestReg) 10024 .addReg(MaskReg); 10025 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg); 10026 if (CmpOpcode) { 10027 // For unsigned comparisons, we can directly compare the shifted values. 10028 // For signed comparisons we shift and sign extend. 10029 unsigned SReg = RegInfo.createVirtualRegister(GPRC); 10030 BuildMI(BB, dl, TII->get(PPC::AND), SReg) 10031 .addReg(TmpDestReg) 10032 .addReg(MaskReg); 10033 unsigned ValueReg = SReg; 10034 unsigned CmpReg = Incr2Reg; 10035 if (CmpOpcode == PPC::CMPW) { 10036 ValueReg = RegInfo.createVirtualRegister(GPRC); 10037 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) 10038 .addReg(SReg) 10039 .addReg(ShiftReg); 10040 unsigned ValueSReg = RegInfo.createVirtualRegister(GPRC); 10041 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg) 10042 .addReg(ValueReg); 10043 ValueReg = ValueSReg; 10044 CmpReg = incr; 10045 } 10046 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) 10047 .addReg(CmpReg) 10048 .addReg(ValueReg); 10049 BuildMI(BB, dl, TII->get(PPC::BCC)) 10050 .addImm(CmpPred) 10051 .addReg(PPC::CR0) 10052 .addMBB(exitMBB); 10053 BB->addSuccessor(loop2MBB); 10054 BB->addSuccessor(exitMBB); 10055 BB = loop2MBB; 10056 } 10057 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg); 10058 BuildMI(BB, dl, TII->get(PPC::STWCX)) 10059 .addReg(Tmp4Reg) 10060 .addReg(ZeroReg) 10061 .addReg(PtrReg); 10062 BuildMI(BB, dl, TII->get(PPC::BCC)) 10063 .addImm(PPC::PRED_NE) 10064 .addReg(PPC::CR0) 10065 .addMBB(loopMBB); 10066 BB->addSuccessor(loopMBB); 10067 BB->addSuccessor(exitMBB); 10068 10069 // exitMBB: 10070 // ... 10071 BB = exitMBB; 10072 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest) 10073 .addReg(TmpDestReg) 10074 .addReg(ShiftReg); 10075 return BB; 10076 } 10077 10078 llvm::MachineBasicBlock * 10079 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, 10080 MachineBasicBlock *MBB) const { 10081 DebugLoc DL = MI.getDebugLoc(); 10082 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 10083 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 10084 10085 MachineFunction *MF = MBB->getParent(); 10086 MachineRegisterInfo &MRI = MF->getRegInfo(); 10087 10088 const BasicBlock *BB = MBB->getBasicBlock(); 10089 MachineFunction::iterator I = ++MBB->getIterator(); 10090 10091 unsigned DstReg = MI.getOperand(0).getReg(); 10092 const TargetRegisterClass *RC = MRI.getRegClass(DstReg); 10093 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); 10094 unsigned mainDstReg = MRI.createVirtualRegister(RC); 10095 unsigned restoreDstReg = MRI.createVirtualRegister(RC); 10096 10097 MVT PVT = getPointerTy(MF->getDataLayout()); 10098 assert((PVT == MVT::i64 || PVT == MVT::i32) && 10099 "Invalid Pointer Size!"); 10100 // For v = setjmp(buf), we generate 10101 // 10102 // thisMBB: 10103 // SjLjSetup mainMBB 10104 // bl mainMBB 10105 // v_restore = 1 10106 // b sinkMBB 10107 // 10108 // mainMBB: 10109 // buf[LabelOffset] = LR 10110 // v_main = 0 10111 // 10112 // sinkMBB: 10113 // v = phi(main, restore) 10114 // 10115 10116 MachineBasicBlock *thisMBB = MBB; 10117 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); 10118 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); 10119 MF->insert(I, mainMBB); 10120 MF->insert(I, sinkMBB); 10121 10122 MachineInstrBuilder MIB; 10123 10124 // Transfer the remainder of BB and its successor edges to sinkMBB. 10125 sinkMBB->splice(sinkMBB->begin(), MBB, 10126 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 10127 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 10128 10129 // Note that the structure of the jmp_buf used here is not compatible 10130 // with that used by libc, and is not designed to be. Specifically, it 10131 // stores only those 'reserved' registers that LLVM does not otherwise 10132 // understand how to spill. Also, by convention, by the time this 10133 // intrinsic is called, Clang has already stored the frame address in the 10134 // first slot of the buffer and stack address in the third. Following the 10135 // X86 target code, we'll store the jump address in the second slot. We also 10136 // need to save the TOC pointer (R2) to handle jumps between shared 10137 // libraries, and that will be stored in the fourth slot. The thread 10138 // identifier (R13) is not affected. 10139 10140 // thisMBB: 10141 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 10142 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 10143 const int64_t BPOffset = 4 * PVT.getStoreSize(); 10144 10145 // Prepare IP either in reg. 10146 const TargetRegisterClass *PtrRC = getRegClassFor(PVT); 10147 unsigned LabelReg = MRI.createVirtualRegister(PtrRC); 10148 unsigned BufReg = MI.getOperand(1).getReg(); 10149 10150 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { 10151 setUsesTOCBasePtr(*MBB->getParent()); 10152 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) 10153 .addReg(PPC::X2) 10154 .addImm(TOCOffset) 10155 .addReg(BufReg) 10156 .cloneMemRefs(MI); 10157 } 10158 10159 // Naked functions never have a base pointer, and so we use r1. For all 10160 // other functions, this decision must be delayed until during PEI. 10161 unsigned BaseReg; 10162 if (MF->getFunction().hasFnAttribute(Attribute::Naked)) 10163 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; 10164 else 10165 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; 10166 10167 MIB = BuildMI(*thisMBB, MI, DL, 10168 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) 10169 .addReg(BaseReg) 10170 .addImm(BPOffset) 10171 .addReg(BufReg) 10172 .cloneMemRefs(MI); 10173 10174 // Setup 10175 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); 10176 MIB.addRegMask(TRI->getNoPreservedMask()); 10177 10178 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); 10179 10180 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) 10181 .addMBB(mainMBB); 10182 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); 10183 10184 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero()); 10185 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne()); 10186 10187 // mainMBB: 10188 // mainDstReg = 0 10189 MIB = 10190 BuildMI(mainMBB, DL, 10191 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); 10192 10193 // Store IP 10194 if (Subtarget.isPPC64()) { 10195 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) 10196 .addReg(LabelReg) 10197 .addImm(LabelOffset) 10198 .addReg(BufReg); 10199 } else { 10200 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) 10201 .addReg(LabelReg) 10202 .addImm(LabelOffset) 10203 .addReg(BufReg); 10204 } 10205 MIB.cloneMemRefs(MI); 10206 10207 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); 10208 mainMBB->addSuccessor(sinkMBB); 10209 10210 // sinkMBB: 10211 BuildMI(*sinkMBB, sinkMBB->begin(), DL, 10212 TII->get(PPC::PHI), DstReg) 10213 .addReg(mainDstReg).addMBB(mainMBB) 10214 .addReg(restoreDstReg).addMBB(thisMBB); 10215 10216 MI.eraseFromParent(); 10217 return sinkMBB; 10218 } 10219 10220 MachineBasicBlock * 10221 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, 10222 MachineBasicBlock *MBB) const { 10223 DebugLoc DL = MI.getDebugLoc(); 10224 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 10225 10226 MachineFunction *MF = MBB->getParent(); 10227 MachineRegisterInfo &MRI = MF->getRegInfo(); 10228 10229 MVT PVT = getPointerTy(MF->getDataLayout()); 10230 assert((PVT == MVT::i64 || PVT == MVT::i32) && 10231 "Invalid Pointer Size!"); 10232 10233 const TargetRegisterClass *RC = 10234 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 10235 unsigned Tmp = MRI.createVirtualRegister(RC); 10236 // Since FP is only updated here but NOT referenced, it's treated as GPR. 10237 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; 10238 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; 10239 unsigned BP = 10240 (PVT == MVT::i64) 10241 ? PPC::X30 10242 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29 10243 : PPC::R30); 10244 10245 MachineInstrBuilder MIB; 10246 10247 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 10248 const int64_t SPOffset = 2 * PVT.getStoreSize(); 10249 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 10250 const int64_t BPOffset = 4 * PVT.getStoreSize(); 10251 10252 unsigned BufReg = MI.getOperand(0).getReg(); 10253 10254 // Reload FP (the jumped-to function may not have had a 10255 // frame pointer, and if so, then its r31 will be restored 10256 // as necessary). 10257 if (PVT == MVT::i64) { 10258 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) 10259 .addImm(0) 10260 .addReg(BufReg); 10261 } else { 10262 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) 10263 .addImm(0) 10264 .addReg(BufReg); 10265 } 10266 MIB.cloneMemRefs(MI); 10267 10268 // Reload IP 10269 if (PVT == MVT::i64) { 10270 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) 10271 .addImm(LabelOffset) 10272 .addReg(BufReg); 10273 } else { 10274 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) 10275 .addImm(LabelOffset) 10276 .addReg(BufReg); 10277 } 10278 MIB.cloneMemRefs(MI); 10279 10280 // Reload SP 10281 if (PVT == MVT::i64) { 10282 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) 10283 .addImm(SPOffset) 10284 .addReg(BufReg); 10285 } else { 10286 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) 10287 .addImm(SPOffset) 10288 .addReg(BufReg); 10289 } 10290 MIB.cloneMemRefs(MI); 10291 10292 // Reload BP 10293 if (PVT == MVT::i64) { 10294 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) 10295 .addImm(BPOffset) 10296 .addReg(BufReg); 10297 } else { 10298 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) 10299 .addImm(BPOffset) 10300 .addReg(BufReg); 10301 } 10302 MIB.cloneMemRefs(MI); 10303 10304 // Reload TOC 10305 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { 10306 setUsesTOCBasePtr(*MBB->getParent()); 10307 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) 10308 .addImm(TOCOffset) 10309 .addReg(BufReg) 10310 .cloneMemRefs(MI); 10311 } 10312 10313 // Jump 10314 BuildMI(*MBB, MI, DL, 10315 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); 10316 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); 10317 10318 MI.eraseFromParent(); 10319 return MBB; 10320 } 10321 10322 MachineBasicBlock * 10323 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 10324 MachineBasicBlock *BB) const { 10325 if (MI.getOpcode() == TargetOpcode::STACKMAP || 10326 MI.getOpcode() == TargetOpcode::PATCHPOINT) { 10327 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() && 10328 MI.getOpcode() == TargetOpcode::PATCHPOINT) { 10329 // Call lowering should have added an r2 operand to indicate a dependence 10330 // on the TOC base pointer value. It can't however, because there is no 10331 // way to mark the dependence as implicit there, and so the stackmap code 10332 // will confuse it with a regular operand. Instead, add the dependence 10333 // here. 10334 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true)); 10335 } 10336 10337 return emitPatchPoint(MI, BB); 10338 } 10339 10340 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 || 10341 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) { 10342 return emitEHSjLjSetJmp(MI, BB); 10343 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 || 10344 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) { 10345 return emitEHSjLjLongJmp(MI, BB); 10346 } 10347 10348 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 10349 10350 // To "insert" these instructions we actually have to insert their 10351 // control-flow patterns. 10352 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 10353 MachineFunction::iterator It = ++BB->getIterator(); 10354 10355 MachineFunction *F = BB->getParent(); 10356 10357 if (MI.getOpcode() == PPC::SELECT_CC_I4 || 10358 MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 || 10359 MI.getOpcode() == PPC::SELECT_I8) { 10360 SmallVector<MachineOperand, 2> Cond; 10361 if (MI.getOpcode() == PPC::SELECT_CC_I4 || 10362 MI.getOpcode() == PPC::SELECT_CC_I8) 10363 Cond.push_back(MI.getOperand(4)); 10364 else 10365 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); 10366 Cond.push_back(MI.getOperand(1)); 10367 10368 DebugLoc dl = MI.getDebugLoc(); 10369 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond, 10370 MI.getOperand(2).getReg(), MI.getOperand(3).getReg()); 10371 } else if (MI.getOpcode() == PPC::SELECT_CC_I4 || 10372 MI.getOpcode() == PPC::SELECT_CC_I8 || 10373 MI.getOpcode() == PPC::SELECT_CC_F4 || 10374 MI.getOpcode() == PPC::SELECT_CC_F8 || 10375 MI.getOpcode() == PPC::SELECT_CC_F16 || 10376 MI.getOpcode() == PPC::SELECT_CC_QFRC || 10377 MI.getOpcode() == PPC::SELECT_CC_QSRC || 10378 MI.getOpcode() == PPC::SELECT_CC_QBRC || 10379 MI.getOpcode() == PPC::SELECT_CC_VRRC || 10380 MI.getOpcode() == PPC::SELECT_CC_VSFRC || 10381 MI.getOpcode() == PPC::SELECT_CC_VSSRC || 10382 MI.getOpcode() == PPC::SELECT_CC_VSRC || 10383 MI.getOpcode() == PPC::SELECT_CC_SPE4 || 10384 MI.getOpcode() == PPC::SELECT_CC_SPE || 10385 MI.getOpcode() == PPC::SELECT_I4 || 10386 MI.getOpcode() == PPC::SELECT_I8 || 10387 MI.getOpcode() == PPC::SELECT_F4 || 10388 MI.getOpcode() == PPC::SELECT_F8 || 10389 MI.getOpcode() == PPC::SELECT_F16 || 10390 MI.getOpcode() == PPC::SELECT_QFRC || 10391 MI.getOpcode() == PPC::SELECT_QSRC || 10392 MI.getOpcode() == PPC::SELECT_QBRC || 10393 MI.getOpcode() == PPC::SELECT_SPE || 10394 MI.getOpcode() == PPC::SELECT_SPE4 || 10395 MI.getOpcode() == PPC::SELECT_VRRC || 10396 MI.getOpcode() == PPC::SELECT_VSFRC || 10397 MI.getOpcode() == PPC::SELECT_VSSRC || 10398 MI.getOpcode() == PPC::SELECT_VSRC) { 10399 // The incoming instruction knows the destination vreg to set, the 10400 // condition code register to branch on, the true/false values to 10401 // select between, and a branch opcode to use. 10402 10403 // thisMBB: 10404 // ... 10405 // TrueVal = ... 10406 // cmpTY ccX, r1, r2 10407 // bCC copy1MBB 10408 // fallthrough --> copy0MBB 10409 MachineBasicBlock *thisMBB = BB; 10410 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 10411 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 10412 DebugLoc dl = MI.getDebugLoc(); 10413 F->insert(It, copy0MBB); 10414 F->insert(It, sinkMBB); 10415 10416 // Transfer the remainder of BB and its successor edges to sinkMBB. 10417 sinkMBB->splice(sinkMBB->begin(), BB, 10418 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 10419 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 10420 10421 // Next, add the true and fallthrough blocks as its successors. 10422 BB->addSuccessor(copy0MBB); 10423 BB->addSuccessor(sinkMBB); 10424 10425 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 || 10426 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || 10427 MI.getOpcode() == PPC::SELECT_F16 || 10428 MI.getOpcode() == PPC::SELECT_SPE4 || 10429 MI.getOpcode() == PPC::SELECT_SPE || 10430 MI.getOpcode() == PPC::SELECT_QFRC || 10431 MI.getOpcode() == PPC::SELECT_QSRC || 10432 MI.getOpcode() == PPC::SELECT_QBRC || 10433 MI.getOpcode() == PPC::SELECT_VRRC || 10434 MI.getOpcode() == PPC::SELECT_VSFRC || 10435 MI.getOpcode() == PPC::SELECT_VSSRC || 10436 MI.getOpcode() == PPC::SELECT_VSRC) { 10437 BuildMI(BB, dl, TII->get(PPC::BC)) 10438 .addReg(MI.getOperand(1).getReg()) 10439 .addMBB(sinkMBB); 10440 } else { 10441 unsigned SelectPred = MI.getOperand(4).getImm(); 10442 BuildMI(BB, dl, TII->get(PPC::BCC)) 10443 .addImm(SelectPred) 10444 .addReg(MI.getOperand(1).getReg()) 10445 .addMBB(sinkMBB); 10446 } 10447 10448 // copy0MBB: 10449 // %FalseValue = ... 10450 // # fallthrough to sinkMBB 10451 BB = copy0MBB; 10452 10453 // Update machine-CFG edges 10454 BB->addSuccessor(sinkMBB); 10455 10456 // sinkMBB: 10457 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 10458 // ... 10459 BB = sinkMBB; 10460 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg()) 10461 .addReg(MI.getOperand(3).getReg()) 10462 .addMBB(copy0MBB) 10463 .addReg(MI.getOperand(2).getReg()) 10464 .addMBB(thisMBB); 10465 } else if (MI.getOpcode() == PPC::ReadTB) { 10466 // To read the 64-bit time-base register on a 32-bit target, we read the 10467 // two halves. Should the counter have wrapped while it was being read, we 10468 // need to try again. 10469 // ... 10470 // readLoop: 10471 // mfspr Rx,TBU # load from TBU 10472 // mfspr Ry,TB # load from TB 10473 // mfspr Rz,TBU # load from TBU 10474 // cmpw crX,Rx,Rz # check if 'old'='new' 10475 // bne readLoop # branch if they're not equal 10476 // ... 10477 10478 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); 10479 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 10480 DebugLoc dl = MI.getDebugLoc(); 10481 F->insert(It, readMBB); 10482 F->insert(It, sinkMBB); 10483 10484 // Transfer the remainder of BB and its successor edges to sinkMBB. 10485 sinkMBB->splice(sinkMBB->begin(), BB, 10486 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 10487 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 10488 10489 BB->addSuccessor(readMBB); 10490 BB = readMBB; 10491 10492 MachineRegisterInfo &RegInfo = F->getRegInfo(); 10493 unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 10494 unsigned LoReg = MI.getOperand(0).getReg(); 10495 unsigned HiReg = MI.getOperand(1).getReg(); 10496 10497 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); 10498 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); 10499 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); 10500 10501 unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 10502 10503 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) 10504 .addReg(HiReg) 10505 .addReg(ReadAgainReg); 10506 BuildMI(BB, dl, TII->get(PPC::BCC)) 10507 .addImm(PPC::PRED_NE) 10508 .addReg(CmpReg) 10509 .addMBB(readMBB); 10510 10511 BB->addSuccessor(readMBB); 10512 BB->addSuccessor(sinkMBB); 10513 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 10514 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 10515 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 10516 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 10517 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 10518 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); 10519 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 10520 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); 10521 10522 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 10523 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 10524 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 10525 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 10526 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 10527 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); 10528 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 10529 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); 10530 10531 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 10532 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 10533 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 10534 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 10535 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 10536 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); 10537 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 10538 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); 10539 10540 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 10541 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 10542 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 10543 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 10544 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 10545 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); 10546 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 10547 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); 10548 10549 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 10550 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); 10551 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 10552 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); 10553 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 10554 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); 10555 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 10556 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); 10557 10558 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 10559 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 10560 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 10561 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 10562 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 10563 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); 10564 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 10565 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); 10566 10567 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8) 10568 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE); 10569 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16) 10570 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE); 10571 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32) 10572 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE); 10573 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64) 10574 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE); 10575 10576 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8) 10577 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE); 10578 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16) 10579 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE); 10580 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32) 10581 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE); 10582 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64) 10583 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE); 10584 10585 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8) 10586 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE); 10587 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16) 10588 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE); 10589 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32) 10590 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE); 10591 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64) 10592 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE); 10593 10594 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8) 10595 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE); 10596 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16) 10597 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE); 10598 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32) 10599 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE); 10600 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64) 10601 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE); 10602 10603 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8) 10604 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 10605 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16) 10606 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 10607 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32) 10608 BB = EmitAtomicBinary(MI, BB, 4, 0); 10609 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64) 10610 BB = EmitAtomicBinary(MI, BB, 8, 0); 10611 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 10612 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || 10613 (Subtarget.hasPartwordAtomics() && 10614 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || 10615 (Subtarget.hasPartwordAtomics() && 10616 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { 10617 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 10618 10619 auto LoadMnemonic = PPC::LDARX; 10620 auto StoreMnemonic = PPC::STDCX; 10621 switch (MI.getOpcode()) { 10622 default: 10623 llvm_unreachable("Compare and swap of unknown size"); 10624 case PPC::ATOMIC_CMP_SWAP_I8: 10625 LoadMnemonic = PPC::LBARX; 10626 StoreMnemonic = PPC::STBCX; 10627 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); 10628 break; 10629 case PPC::ATOMIC_CMP_SWAP_I16: 10630 LoadMnemonic = PPC::LHARX; 10631 StoreMnemonic = PPC::STHCX; 10632 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); 10633 break; 10634 case PPC::ATOMIC_CMP_SWAP_I32: 10635 LoadMnemonic = PPC::LWARX; 10636 StoreMnemonic = PPC::STWCX; 10637 break; 10638 case PPC::ATOMIC_CMP_SWAP_I64: 10639 LoadMnemonic = PPC::LDARX; 10640 StoreMnemonic = PPC::STDCX; 10641 break; 10642 } 10643 unsigned dest = MI.getOperand(0).getReg(); 10644 unsigned ptrA = MI.getOperand(1).getReg(); 10645 unsigned ptrB = MI.getOperand(2).getReg(); 10646 unsigned oldval = MI.getOperand(3).getReg(); 10647 unsigned newval = MI.getOperand(4).getReg(); 10648 DebugLoc dl = MI.getDebugLoc(); 10649 10650 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 10651 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 10652 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 10653 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 10654 F->insert(It, loop1MBB); 10655 F->insert(It, loop2MBB); 10656 F->insert(It, midMBB); 10657 F->insert(It, exitMBB); 10658 exitMBB->splice(exitMBB->begin(), BB, 10659 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 10660 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 10661 10662 // thisMBB: 10663 // ... 10664 // fallthrough --> loopMBB 10665 BB->addSuccessor(loop1MBB); 10666 10667 // loop1MBB: 10668 // l[bhwd]arx dest, ptr 10669 // cmp[wd] dest, oldval 10670 // bne- midMBB 10671 // loop2MBB: 10672 // st[bhwd]cx. newval, ptr 10673 // bne- loopMBB 10674 // b exitBB 10675 // midMBB: 10676 // st[bhwd]cx. dest, ptr 10677 // exitBB: 10678 BB = loop1MBB; 10679 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB); 10680 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 10681 .addReg(oldval) 10682 .addReg(dest); 10683 BuildMI(BB, dl, TII->get(PPC::BCC)) 10684 .addImm(PPC::PRED_NE) 10685 .addReg(PPC::CR0) 10686 .addMBB(midMBB); 10687 BB->addSuccessor(loop2MBB); 10688 BB->addSuccessor(midMBB); 10689 10690 BB = loop2MBB; 10691 BuildMI(BB, dl, TII->get(StoreMnemonic)) 10692 .addReg(newval) 10693 .addReg(ptrA) 10694 .addReg(ptrB); 10695 BuildMI(BB, dl, TII->get(PPC::BCC)) 10696 .addImm(PPC::PRED_NE) 10697 .addReg(PPC::CR0) 10698 .addMBB(loop1MBB); 10699 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 10700 BB->addSuccessor(loop1MBB); 10701 BB->addSuccessor(exitMBB); 10702 10703 BB = midMBB; 10704 BuildMI(BB, dl, TII->get(StoreMnemonic)) 10705 .addReg(dest) 10706 .addReg(ptrA) 10707 .addReg(ptrB); 10708 BB->addSuccessor(exitMBB); 10709 10710 // exitMBB: 10711 // ... 10712 BB = exitMBB; 10713 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 10714 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 10715 // We must use 64-bit registers for addresses when targeting 64-bit, 10716 // since we're actually doing arithmetic on them. Other registers 10717 // can be 32-bit. 10718 bool is64bit = Subtarget.isPPC64(); 10719 bool isLittleEndian = Subtarget.isLittleEndian(); 10720 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 10721 10722 unsigned dest = MI.getOperand(0).getReg(); 10723 unsigned ptrA = MI.getOperand(1).getReg(); 10724 unsigned ptrB = MI.getOperand(2).getReg(); 10725 unsigned oldval = MI.getOperand(3).getReg(); 10726 unsigned newval = MI.getOperand(4).getReg(); 10727 DebugLoc dl = MI.getDebugLoc(); 10728 10729 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 10730 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 10731 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 10732 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 10733 F->insert(It, loop1MBB); 10734 F->insert(It, loop2MBB); 10735 F->insert(It, midMBB); 10736 F->insert(It, exitMBB); 10737 exitMBB->splice(exitMBB->begin(), BB, 10738 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 10739 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 10740 10741 MachineRegisterInfo &RegInfo = F->getRegInfo(); 10742 const TargetRegisterClass *RC = 10743 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 10744 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; 10745 10746 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 10747 unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC); 10748 unsigned ShiftReg = 10749 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC); 10750 unsigned NewVal2Reg = RegInfo.createVirtualRegister(GPRC); 10751 unsigned NewVal3Reg = RegInfo.createVirtualRegister(GPRC); 10752 unsigned OldVal2Reg = RegInfo.createVirtualRegister(GPRC); 10753 unsigned OldVal3Reg = RegInfo.createVirtualRegister(GPRC); 10754 unsigned MaskReg = RegInfo.createVirtualRegister(GPRC); 10755 unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC); 10756 unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC); 10757 unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC); 10758 unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC); 10759 unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC); 10760 unsigned Ptr1Reg; 10761 unsigned TmpReg = RegInfo.createVirtualRegister(GPRC); 10762 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 10763 // thisMBB: 10764 // ... 10765 // fallthrough --> loopMBB 10766 BB->addSuccessor(loop1MBB); 10767 10768 // The 4-byte load must be aligned, while a char or short may be 10769 // anywhere in the word. Hence all this nasty bookkeeping code. 10770 // add ptr1, ptrA, ptrB [copy if ptrA==0] 10771 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 10772 // xori shift, shift1, 24 [16] 10773 // rlwinm ptr, ptr1, 0, 0, 29 10774 // slw newval2, newval, shift 10775 // slw oldval2, oldval,shift 10776 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 10777 // slw mask, mask2, shift 10778 // and newval3, newval2, mask 10779 // and oldval3, oldval2, mask 10780 // loop1MBB: 10781 // lwarx tmpDest, ptr 10782 // and tmp, tmpDest, mask 10783 // cmpw tmp, oldval3 10784 // bne- midMBB 10785 // loop2MBB: 10786 // andc tmp2, tmpDest, mask 10787 // or tmp4, tmp2, newval3 10788 // stwcx. tmp4, ptr 10789 // bne- loop1MBB 10790 // b exitBB 10791 // midMBB: 10792 // stwcx. tmpDest, ptr 10793 // exitBB: 10794 // srw dest, tmpDest, shift 10795 if (ptrA != ZeroReg) { 10796 Ptr1Reg = RegInfo.createVirtualRegister(RC); 10797 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 10798 .addReg(ptrA) 10799 .addReg(ptrB); 10800 } else { 10801 Ptr1Reg = ptrB; 10802 } 10803 10804 // We need use 32-bit subregister to avoid mismatch register class in 64-bit 10805 // mode. 10806 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg) 10807 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0) 10808 .addImm(3) 10809 .addImm(27) 10810 .addImm(is8bit ? 28 : 27); 10811 if (!isLittleEndian) 10812 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg) 10813 .addReg(Shift1Reg) 10814 .addImm(is8bit ? 24 : 16); 10815 if (is64bit) 10816 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 10817 .addReg(Ptr1Reg) 10818 .addImm(0) 10819 .addImm(61); 10820 else 10821 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 10822 .addReg(Ptr1Reg) 10823 .addImm(0) 10824 .addImm(0) 10825 .addImm(29); 10826 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 10827 .addReg(newval) 10828 .addReg(ShiftReg); 10829 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 10830 .addReg(oldval) 10831 .addReg(ShiftReg); 10832 if (is8bit) 10833 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 10834 else { 10835 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 10836 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 10837 .addReg(Mask3Reg) 10838 .addImm(65535); 10839 } 10840 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 10841 .addReg(Mask2Reg) 10842 .addReg(ShiftReg); 10843 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 10844 .addReg(NewVal2Reg) 10845 .addReg(MaskReg); 10846 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 10847 .addReg(OldVal2Reg) 10848 .addReg(MaskReg); 10849 10850 BB = loop1MBB; 10851 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 10852 .addReg(ZeroReg) 10853 .addReg(PtrReg); 10854 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg) 10855 .addReg(TmpDestReg) 10856 .addReg(MaskReg); 10857 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 10858 .addReg(TmpReg) 10859 .addReg(OldVal3Reg); 10860 BuildMI(BB, dl, TII->get(PPC::BCC)) 10861 .addImm(PPC::PRED_NE) 10862 .addReg(PPC::CR0) 10863 .addMBB(midMBB); 10864 BB->addSuccessor(loop2MBB); 10865 BB->addSuccessor(midMBB); 10866 10867 BB = loop2MBB; 10868 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg) 10869 .addReg(TmpDestReg) 10870 .addReg(MaskReg); 10871 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg) 10872 .addReg(Tmp2Reg) 10873 .addReg(NewVal3Reg); 10874 BuildMI(BB, dl, TII->get(PPC::STWCX)) 10875 .addReg(Tmp4Reg) 10876 .addReg(ZeroReg) 10877 .addReg(PtrReg); 10878 BuildMI(BB, dl, TII->get(PPC::BCC)) 10879 .addImm(PPC::PRED_NE) 10880 .addReg(PPC::CR0) 10881 .addMBB(loop1MBB); 10882 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 10883 BB->addSuccessor(loop1MBB); 10884 BB->addSuccessor(exitMBB); 10885 10886 BB = midMBB; 10887 BuildMI(BB, dl, TII->get(PPC::STWCX)) 10888 .addReg(TmpDestReg) 10889 .addReg(ZeroReg) 10890 .addReg(PtrReg); 10891 BB->addSuccessor(exitMBB); 10892 10893 // exitMBB: 10894 // ... 10895 BB = exitMBB; 10896 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest) 10897 .addReg(TmpReg) 10898 .addReg(ShiftReg); 10899 } else if (MI.getOpcode() == PPC::FADDrtz) { 10900 // This pseudo performs an FADD with rounding mode temporarily forced 10901 // to round-to-zero. We emit this via custom inserter since the FPSCR 10902 // is not modeled at the SelectionDAG level. 10903 unsigned Dest = MI.getOperand(0).getReg(); 10904 unsigned Src1 = MI.getOperand(1).getReg(); 10905 unsigned Src2 = MI.getOperand(2).getReg(); 10906 DebugLoc dl = MI.getDebugLoc(); 10907 10908 MachineRegisterInfo &RegInfo = F->getRegInfo(); 10909 unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 10910 10911 // Save FPSCR value. 10912 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); 10913 10914 // Set rounding mode to round-to-zero. 10915 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); 10916 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); 10917 10918 // Perform addition. 10919 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); 10920 10921 // Restore FPSCR value. 10922 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); 10923 } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || 10924 MI.getOpcode() == PPC::ANDIo_1_GT_BIT || 10925 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 10926 MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) { 10927 unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 10928 MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) 10929 ? PPC::ANDIo8 10930 : PPC::ANDIo; 10931 bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || 10932 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8); 10933 10934 MachineRegisterInfo &RegInfo = F->getRegInfo(); 10935 unsigned Dest = RegInfo.createVirtualRegister( 10936 Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass); 10937 10938 DebugLoc dl = MI.getDebugLoc(); 10939 BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) 10940 .addReg(MI.getOperand(1).getReg()) 10941 .addImm(1); 10942 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), 10943 MI.getOperand(0).getReg()) 10944 .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); 10945 } else if (MI.getOpcode() == PPC::TCHECK_RET) { 10946 DebugLoc Dl = MI.getDebugLoc(); 10947 MachineRegisterInfo &RegInfo = F->getRegInfo(); 10948 unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 10949 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); 10950 return BB; 10951 } else { 10952 llvm_unreachable("Unexpected instr type to insert"); 10953 } 10954 10955 MI.eraseFromParent(); // The pseudo instruction is gone now. 10956 return BB; 10957 } 10958 10959 //===----------------------------------------------------------------------===// 10960 // Target Optimization Hooks 10961 //===----------------------------------------------------------------------===// 10962 10963 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) { 10964 // For the estimates, convergence is quadratic, so we essentially double the 10965 // number of digits correct after every iteration. For both FRE and FRSQRTE, 10966 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), 10967 // this is 2^-14. IEEE float has 23 digits and double has 52 digits. 10968 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; 10969 if (VT.getScalarType() == MVT::f64) 10970 RefinementSteps++; 10971 return RefinementSteps; 10972 } 10973 10974 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, 10975 int Enabled, int &RefinementSteps, 10976 bool &UseOneConstNR, 10977 bool Reciprocal) const { 10978 EVT VT = Operand.getValueType(); 10979 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || 10980 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || 10981 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 10982 (VT == MVT::v2f64 && Subtarget.hasVSX()) || 10983 (VT == MVT::v4f32 && Subtarget.hasQPX()) || 10984 (VT == MVT::v4f64 && Subtarget.hasQPX())) { 10985 if (RefinementSteps == ReciprocalEstimate::Unspecified) 10986 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); 10987 10988 UseOneConstNR = true; 10989 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); 10990 } 10991 return SDValue(); 10992 } 10993 10994 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG, 10995 int Enabled, 10996 int &RefinementSteps) const { 10997 EVT VT = Operand.getValueType(); 10998 if ((VT == MVT::f32 && Subtarget.hasFRES()) || 10999 (VT == MVT::f64 && Subtarget.hasFRE()) || 11000 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 11001 (VT == MVT::v2f64 && Subtarget.hasVSX()) || 11002 (VT == MVT::v4f32 && Subtarget.hasQPX()) || 11003 (VT == MVT::v4f64 && Subtarget.hasQPX())) { 11004 if (RefinementSteps == ReciprocalEstimate::Unspecified) 11005 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); 11006 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); 11007 } 11008 return SDValue(); 11009 } 11010 11011 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const { 11012 // Note: This functionality is used only when unsafe-fp-math is enabled, and 11013 // on cores with reciprocal estimates (which are used when unsafe-fp-math is 11014 // enabled for division), this functionality is redundant with the default 11015 // combiner logic (once the division -> reciprocal/multiply transformation 11016 // has taken place). As a result, this matters more for older cores than for 11017 // newer ones. 11018 11019 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 11020 // reciprocal if there are two or more FDIVs (for embedded cores with only 11021 // one FP pipeline) for three or more FDIVs (for generic OOO cores). 11022 switch (Subtarget.getDarwinDirective()) { 11023 default: 11024 return 3; 11025 case PPC::DIR_440: 11026 case PPC::DIR_A2: 11027 case PPC::DIR_E500: 11028 case PPC::DIR_E500mc: 11029 case PPC::DIR_E5500: 11030 return 2; 11031 } 11032 } 11033 11034 // isConsecutiveLSLoc needs to work even if all adds have not yet been 11035 // collapsed, and so we need to look through chains of them. 11036 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, 11037 int64_t& Offset, SelectionDAG &DAG) { 11038 if (DAG.isBaseWithConstantOffset(Loc)) { 11039 Base = Loc.getOperand(0); 11040 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue(); 11041 11042 // The base might itself be a base plus an offset, and if so, accumulate 11043 // that as well. 11044 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG); 11045 } 11046 } 11047 11048 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, 11049 unsigned Bytes, int Dist, 11050 SelectionDAG &DAG) { 11051 if (VT.getSizeInBits() / 8 != Bytes) 11052 return false; 11053 11054 SDValue BaseLoc = Base->getBasePtr(); 11055 if (Loc.getOpcode() == ISD::FrameIndex) { 11056 if (BaseLoc.getOpcode() != ISD::FrameIndex) 11057 return false; 11058 const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 11059 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 11060 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 11061 int FS = MFI.getObjectSize(FI); 11062 int BFS = MFI.getObjectSize(BFI); 11063 if (FS != BFS || FS != (int)Bytes) return false; 11064 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes); 11065 } 11066 11067 SDValue Base1 = Loc, Base2 = BaseLoc; 11068 int64_t Offset1 = 0, Offset2 = 0; 11069 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG); 11070 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG); 11071 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes)) 11072 return true; 11073 11074 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 11075 const GlobalValue *GV1 = nullptr; 11076 const GlobalValue *GV2 = nullptr; 11077 Offset1 = 0; 11078 Offset2 = 0; 11079 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); 11080 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); 11081 if (isGA1 && isGA2 && GV1 == GV2) 11082 return Offset1 == (Offset2 + Dist*Bytes); 11083 return false; 11084 } 11085 11086 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does 11087 // not enforce equality of the chain operands. 11088 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, 11089 unsigned Bytes, int Dist, 11090 SelectionDAG &DAG) { 11091 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) { 11092 EVT VT = LS->getMemoryVT(); 11093 SDValue Loc = LS->getBasePtr(); 11094 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); 11095 } 11096 11097 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 11098 EVT VT; 11099 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 11100 default: return false; 11101 case Intrinsic::ppc_qpx_qvlfd: 11102 case Intrinsic::ppc_qpx_qvlfda: 11103 VT = MVT::v4f64; 11104 break; 11105 case Intrinsic::ppc_qpx_qvlfs: 11106 case Intrinsic::ppc_qpx_qvlfsa: 11107 VT = MVT::v4f32; 11108 break; 11109 case Intrinsic::ppc_qpx_qvlfcd: 11110 case Intrinsic::ppc_qpx_qvlfcda: 11111 VT = MVT::v2f64; 11112 break; 11113 case Intrinsic::ppc_qpx_qvlfcs: 11114 case Intrinsic::ppc_qpx_qvlfcsa: 11115 VT = MVT::v2f32; 11116 break; 11117 case Intrinsic::ppc_qpx_qvlfiwa: 11118 case Intrinsic::ppc_qpx_qvlfiwz: 11119 case Intrinsic::ppc_altivec_lvx: 11120 case Intrinsic::ppc_altivec_lvxl: 11121 case Intrinsic::ppc_vsx_lxvw4x: 11122 case Intrinsic::ppc_vsx_lxvw4x_be: 11123 VT = MVT::v4i32; 11124 break; 11125 case Intrinsic::ppc_vsx_lxvd2x: 11126 case Intrinsic::ppc_vsx_lxvd2x_be: 11127 VT = MVT::v2f64; 11128 break; 11129 case Intrinsic::ppc_altivec_lvebx: 11130 VT = MVT::i8; 11131 break; 11132 case Intrinsic::ppc_altivec_lvehx: 11133 VT = MVT::i16; 11134 break; 11135 case Intrinsic::ppc_altivec_lvewx: 11136 VT = MVT::i32; 11137 break; 11138 } 11139 11140 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); 11141 } 11142 11143 if (N->getOpcode() == ISD::INTRINSIC_VOID) { 11144 EVT VT; 11145 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 11146 default: return false; 11147 case Intrinsic::ppc_qpx_qvstfd: 11148 case Intrinsic::ppc_qpx_qvstfda: 11149 VT = MVT::v4f64; 11150 break; 11151 case Intrinsic::ppc_qpx_qvstfs: 11152 case Intrinsic::ppc_qpx_qvstfsa: 11153 VT = MVT::v4f32; 11154 break; 11155 case Intrinsic::ppc_qpx_qvstfcd: 11156 case Intrinsic::ppc_qpx_qvstfcda: 11157 VT = MVT::v2f64; 11158 break; 11159 case Intrinsic::ppc_qpx_qvstfcs: 11160 case Intrinsic::ppc_qpx_qvstfcsa: 11161 VT = MVT::v2f32; 11162 break; 11163 case Intrinsic::ppc_qpx_qvstfiw: 11164 case Intrinsic::ppc_qpx_qvstfiwa: 11165 case Intrinsic::ppc_altivec_stvx: 11166 case Intrinsic::ppc_altivec_stvxl: 11167 case Intrinsic::ppc_vsx_stxvw4x: 11168 VT = MVT::v4i32; 11169 break; 11170 case Intrinsic::ppc_vsx_stxvd2x: 11171 VT = MVT::v2f64; 11172 break; 11173 case Intrinsic::ppc_vsx_stxvw4x_be: 11174 VT = MVT::v4i32; 11175 break; 11176 case Intrinsic::ppc_vsx_stxvd2x_be: 11177 VT = MVT::v2f64; 11178 break; 11179 case Intrinsic::ppc_altivec_stvebx: 11180 VT = MVT::i8; 11181 break; 11182 case Intrinsic::ppc_altivec_stvehx: 11183 VT = MVT::i16; 11184 break; 11185 case Intrinsic::ppc_altivec_stvewx: 11186 VT = MVT::i32; 11187 break; 11188 } 11189 11190 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); 11191 } 11192 11193 return false; 11194 } 11195 11196 // Return true is there is a nearyby consecutive load to the one provided 11197 // (regardless of alignment). We search up and down the chain, looking though 11198 // token factors and other loads (but nothing else). As a result, a true result 11199 // indicates that it is safe to create a new consecutive load adjacent to the 11200 // load provided. 11201 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { 11202 SDValue Chain = LD->getChain(); 11203 EVT VT = LD->getMemoryVT(); 11204 11205 SmallSet<SDNode *, 16> LoadRoots; 11206 SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); 11207 SmallSet<SDNode *, 16> Visited; 11208 11209 // First, search up the chain, branching to follow all token-factor operands. 11210 // If we find a consecutive load, then we're done, otherwise, record all 11211 // nodes just above the top-level loads and token factors. 11212 while (!Queue.empty()) { 11213 SDNode *ChainNext = Queue.pop_back_val(); 11214 if (!Visited.insert(ChainNext).second) 11215 continue; 11216 11217 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) { 11218 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 11219 return true; 11220 11221 if (!Visited.count(ChainLD->getChain().getNode())) 11222 Queue.push_back(ChainLD->getChain().getNode()); 11223 } else if (ChainNext->getOpcode() == ISD::TokenFactor) { 11224 for (const SDUse &O : ChainNext->ops()) 11225 if (!Visited.count(O.getNode())) 11226 Queue.push_back(O.getNode()); 11227 } else 11228 LoadRoots.insert(ChainNext); 11229 } 11230 11231 // Second, search down the chain, starting from the top-level nodes recorded 11232 // in the first phase. These top-level nodes are the nodes just above all 11233 // loads and token factors. Starting with their uses, recursively look though 11234 // all loads (just the chain uses) and token factors to find a consecutive 11235 // load. 11236 Visited.clear(); 11237 Queue.clear(); 11238 11239 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), 11240 IE = LoadRoots.end(); I != IE; ++I) { 11241 Queue.push_back(*I); 11242 11243 while (!Queue.empty()) { 11244 SDNode *LoadRoot = Queue.pop_back_val(); 11245 if (!Visited.insert(LoadRoot).second) 11246 continue; 11247 11248 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot)) 11249 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 11250 return true; 11251 11252 for (SDNode::use_iterator UI = LoadRoot->use_begin(), 11253 UE = LoadRoot->use_end(); UI != UE; ++UI) 11254 if (((isa<MemSDNode>(*UI) && 11255 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) || 11256 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) 11257 Queue.push_back(*UI); 11258 } 11259 } 11260 11261 return false; 11262 } 11263 11264 /// This function is called when we have proved that a SETCC node can be replaced 11265 /// by subtraction (and other supporting instructions) so that the result of 11266 /// comparison is kept in a GPR instead of CR. This function is purely for 11267 /// codegen purposes and has some flags to guide the codegen process. 11268 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, 11269 bool Swap, SDLoc &DL, SelectionDAG &DAG) { 11270 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); 11271 11272 // Zero extend the operands to the largest legal integer. Originally, they 11273 // must be of a strictly smaller size. 11274 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0), 11275 DAG.getConstant(Size, DL, MVT::i32)); 11276 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1), 11277 DAG.getConstant(Size, DL, MVT::i32)); 11278 11279 // Swap if needed. Depends on the condition code. 11280 if (Swap) 11281 std::swap(Op0, Op1); 11282 11283 // Subtract extended integers. 11284 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1); 11285 11286 // Move the sign bit to the least significant position and zero out the rest. 11287 // Now the least significant bit carries the result of original comparison. 11288 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode, 11289 DAG.getConstant(Size - 1, DL, MVT::i32)); 11290 auto Final = Shifted; 11291 11292 // Complement the result if needed. Based on the condition code. 11293 if (Complement) 11294 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted, 11295 DAG.getConstant(1, DL, MVT::i64)); 11296 11297 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final); 11298 } 11299 11300 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N, 11301 DAGCombinerInfo &DCI) const { 11302 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); 11303 11304 SelectionDAG &DAG = DCI.DAG; 11305 SDLoc DL(N); 11306 11307 // Size of integers being compared has a critical role in the following 11308 // analysis, so we prefer to do this when all types are legal. 11309 if (!DCI.isAfterLegalizeDAG()) 11310 return SDValue(); 11311 11312 // If all users of SETCC extend its value to a legal integer type 11313 // then we replace SETCC with a subtraction 11314 for (SDNode::use_iterator UI = N->use_begin(), 11315 UE = N->use_end(); UI != UE; ++UI) { 11316 if (UI->getOpcode() != ISD::ZERO_EXTEND) 11317 return SDValue(); 11318 } 11319 11320 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 11321 auto OpSize = N->getOperand(0).getValueSizeInBits(); 11322 11323 unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits(); 11324 11325 if (OpSize < Size) { 11326 switch (CC) { 11327 default: break; 11328 case ISD::SETULT: 11329 return generateEquivalentSub(N, Size, false, false, DL, DAG); 11330 case ISD::SETULE: 11331 return generateEquivalentSub(N, Size, true, true, DL, DAG); 11332 case ISD::SETUGT: 11333 return generateEquivalentSub(N, Size, false, true, DL, DAG); 11334 case ISD::SETUGE: 11335 return generateEquivalentSub(N, Size, true, false, DL, DAG); 11336 } 11337 } 11338 11339 return SDValue(); 11340 } 11341 11342 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, 11343 DAGCombinerInfo &DCI) const { 11344 SelectionDAG &DAG = DCI.DAG; 11345 SDLoc dl(N); 11346 11347 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits"); 11348 // If we're tracking CR bits, we need to be careful that we don't have: 11349 // trunc(binary-ops(zext(x), zext(y))) 11350 // or 11351 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) 11352 // such that we're unnecessarily moving things into GPRs when it would be 11353 // better to keep them in CR bits. 11354 11355 // Note that trunc here can be an actual i1 trunc, or can be the effective 11356 // truncation that comes from a setcc or select_cc. 11357 if (N->getOpcode() == ISD::TRUNCATE && 11358 N->getValueType(0) != MVT::i1) 11359 return SDValue(); 11360 11361 if (N->getOperand(0).getValueType() != MVT::i32 && 11362 N->getOperand(0).getValueType() != MVT::i64) 11363 return SDValue(); 11364 11365 if (N->getOpcode() == ISD::SETCC || 11366 N->getOpcode() == ISD::SELECT_CC) { 11367 // If we're looking at a comparison, then we need to make sure that the 11368 // high bits (all except for the first) don't matter the result. 11369 ISD::CondCode CC = 11370 cast<CondCodeSDNode>(N->getOperand( 11371 N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); 11372 unsigned OpBits = N->getOperand(0).getValueSizeInBits(); 11373 11374 if (ISD::isSignedIntSetCC(CC)) { 11375 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || 11376 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) 11377 return SDValue(); 11378 } else if (ISD::isUnsignedIntSetCC(CC)) { 11379 if (!DAG.MaskedValueIsZero(N->getOperand(0), 11380 APInt::getHighBitsSet(OpBits, OpBits-1)) || 11381 !DAG.MaskedValueIsZero(N->getOperand(1), 11382 APInt::getHighBitsSet(OpBits, OpBits-1))) 11383 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI) 11384 : SDValue()); 11385 } else { 11386 // This is neither a signed nor an unsigned comparison, just make sure 11387 // that the high bits are equal. 11388 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0)); 11389 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1)); 11390 11391 // We don't really care about what is known about the first bit (if 11392 // anything), so clear it in all masks prior to comparing them. 11393 Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0); 11394 Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0); 11395 11396 if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One) 11397 return SDValue(); 11398 } 11399 } 11400 11401 // We now know that the higher-order bits are irrelevant, we just need to 11402 // make sure that all of the intermediate operations are bit operations, and 11403 // all inputs are extensions. 11404 if (N->getOperand(0).getOpcode() != ISD::AND && 11405 N->getOperand(0).getOpcode() != ISD::OR && 11406 N->getOperand(0).getOpcode() != ISD::XOR && 11407 N->getOperand(0).getOpcode() != ISD::SELECT && 11408 N->getOperand(0).getOpcode() != ISD::SELECT_CC && 11409 N->getOperand(0).getOpcode() != ISD::TRUNCATE && 11410 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && 11411 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && 11412 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) 11413 return SDValue(); 11414 11415 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && 11416 N->getOperand(1).getOpcode() != ISD::AND && 11417 N->getOperand(1).getOpcode() != ISD::OR && 11418 N->getOperand(1).getOpcode() != ISD::XOR && 11419 N->getOperand(1).getOpcode() != ISD::SELECT && 11420 N->getOperand(1).getOpcode() != ISD::SELECT_CC && 11421 N->getOperand(1).getOpcode() != ISD::TRUNCATE && 11422 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && 11423 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && 11424 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) 11425 return SDValue(); 11426 11427 SmallVector<SDValue, 4> Inputs; 11428 SmallVector<SDValue, 8> BinOps, PromOps; 11429 SmallPtrSet<SDNode *, 16> Visited; 11430 11431 for (unsigned i = 0; i < 2; ++i) { 11432 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 11433 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 11434 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 11435 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || 11436 isa<ConstantSDNode>(N->getOperand(i))) 11437 Inputs.push_back(N->getOperand(i)); 11438 else 11439 BinOps.push_back(N->getOperand(i)); 11440 11441 if (N->getOpcode() == ISD::TRUNCATE) 11442 break; 11443 } 11444 11445 // Visit all inputs, collect all binary operations (and, or, xor and 11446 // select) that are all fed by extensions. 11447 while (!BinOps.empty()) { 11448 SDValue BinOp = BinOps.back(); 11449 BinOps.pop_back(); 11450 11451 if (!Visited.insert(BinOp.getNode()).second) 11452 continue; 11453 11454 PromOps.push_back(BinOp); 11455 11456 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 11457 // The condition of the select is not promoted. 11458 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 11459 continue; 11460 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 11461 continue; 11462 11463 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 11464 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 11465 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 11466 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || 11467 isa<ConstantSDNode>(BinOp.getOperand(i))) { 11468 Inputs.push_back(BinOp.getOperand(i)); 11469 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 11470 BinOp.getOperand(i).getOpcode() == ISD::OR || 11471 BinOp.getOperand(i).getOpcode() == ISD::XOR || 11472 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 11473 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || 11474 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 11475 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 11476 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 11477 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { 11478 BinOps.push_back(BinOp.getOperand(i)); 11479 } else { 11480 // We have an input that is not an extension or another binary 11481 // operation; we'll abort this transformation. 11482 return SDValue(); 11483 } 11484 } 11485 } 11486 11487 // Make sure that this is a self-contained cluster of operations (which 11488 // is not quite the same thing as saying that everything has only one 11489 // use). 11490 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 11491 if (isa<ConstantSDNode>(Inputs[i])) 11492 continue; 11493 11494 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 11495 UE = Inputs[i].getNode()->use_end(); 11496 UI != UE; ++UI) { 11497 SDNode *User = *UI; 11498 if (User != N && !Visited.count(User)) 11499 return SDValue(); 11500 11501 // Make sure that we're not going to promote the non-output-value 11502 // operand(s) or SELECT or SELECT_CC. 11503 // FIXME: Although we could sometimes handle this, and it does occur in 11504 // practice that one of the condition inputs to the select is also one of 11505 // the outputs, we currently can't deal with this. 11506 if (User->getOpcode() == ISD::SELECT) { 11507 if (User->getOperand(0) == Inputs[i]) 11508 return SDValue(); 11509 } else if (User->getOpcode() == ISD::SELECT_CC) { 11510 if (User->getOperand(0) == Inputs[i] || 11511 User->getOperand(1) == Inputs[i]) 11512 return SDValue(); 11513 } 11514 } 11515 } 11516 11517 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 11518 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 11519 UE = PromOps[i].getNode()->use_end(); 11520 UI != UE; ++UI) { 11521 SDNode *User = *UI; 11522 if (User != N && !Visited.count(User)) 11523 return SDValue(); 11524 11525 // Make sure that we're not going to promote the non-output-value 11526 // operand(s) or SELECT or SELECT_CC. 11527 // FIXME: Although we could sometimes handle this, and it does occur in 11528 // practice that one of the condition inputs to the select is also one of 11529 // the outputs, we currently can't deal with this. 11530 if (User->getOpcode() == ISD::SELECT) { 11531 if (User->getOperand(0) == PromOps[i]) 11532 return SDValue(); 11533 } else if (User->getOpcode() == ISD::SELECT_CC) { 11534 if (User->getOperand(0) == PromOps[i] || 11535 User->getOperand(1) == PromOps[i]) 11536 return SDValue(); 11537 } 11538 } 11539 } 11540 11541 // Replace all inputs with the extension operand. 11542 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 11543 // Constants may have users outside the cluster of to-be-promoted nodes, 11544 // and so we need to replace those as we do the promotions. 11545 if (isa<ConstantSDNode>(Inputs[i])) 11546 continue; 11547 else 11548 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); 11549 } 11550 11551 std::list<HandleSDNode> PromOpHandles; 11552 for (auto &PromOp : PromOps) 11553 PromOpHandles.emplace_back(PromOp); 11554 11555 // Replace all operations (these are all the same, but have a different 11556 // (i1) return type). DAG.getNode will validate that the types of 11557 // a binary operator match, so go through the list in reverse so that 11558 // we've likely promoted both operands first. Any intermediate truncations or 11559 // extensions disappear. 11560 while (!PromOpHandles.empty()) { 11561 SDValue PromOp = PromOpHandles.back().getValue(); 11562 PromOpHandles.pop_back(); 11563 11564 if (PromOp.getOpcode() == ISD::TRUNCATE || 11565 PromOp.getOpcode() == ISD::SIGN_EXTEND || 11566 PromOp.getOpcode() == ISD::ZERO_EXTEND || 11567 PromOp.getOpcode() == ISD::ANY_EXTEND) { 11568 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) && 11569 PromOp.getOperand(0).getValueType() != MVT::i1) { 11570 // The operand is not yet ready (see comment below). 11571 PromOpHandles.emplace_front(PromOp); 11572 continue; 11573 } 11574 11575 SDValue RepValue = PromOp.getOperand(0); 11576 if (isa<ConstantSDNode>(RepValue)) 11577 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); 11578 11579 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); 11580 continue; 11581 } 11582 11583 unsigned C; 11584 switch (PromOp.getOpcode()) { 11585 default: C = 0; break; 11586 case ISD::SELECT: C = 1; break; 11587 case ISD::SELECT_CC: C = 2; break; 11588 } 11589 11590 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 11591 PromOp.getOperand(C).getValueType() != MVT::i1) || 11592 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 11593 PromOp.getOperand(C+1).getValueType() != MVT::i1)) { 11594 // The to-be-promoted operands of this node have not yet been 11595 // promoted (this should be rare because we're going through the 11596 // list backward, but if one of the operands has several users in 11597 // this cluster of to-be-promoted nodes, it is possible). 11598 PromOpHandles.emplace_front(PromOp); 11599 continue; 11600 } 11601 11602 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 11603 PromOp.getNode()->op_end()); 11604 11605 // If there are any constant inputs, make sure they're replaced now. 11606 for (unsigned i = 0; i < 2; ++i) 11607 if (isa<ConstantSDNode>(Ops[C+i])) 11608 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); 11609 11610 DAG.ReplaceAllUsesOfValueWith(PromOp, 11611 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); 11612 } 11613 11614 // Now we're left with the initial truncation itself. 11615 if (N->getOpcode() == ISD::TRUNCATE) 11616 return N->getOperand(0); 11617 11618 // Otherwise, this is a comparison. The operands to be compared have just 11619 // changed type (to i1), but everything else is the same. 11620 return SDValue(N, 0); 11621 } 11622 11623 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, 11624 DAGCombinerInfo &DCI) const { 11625 SelectionDAG &DAG = DCI.DAG; 11626 SDLoc dl(N); 11627 11628 // If we're tracking CR bits, we need to be careful that we don't have: 11629 // zext(binary-ops(trunc(x), trunc(y))) 11630 // or 11631 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) 11632 // such that we're unnecessarily moving things into CR bits that can more 11633 // efficiently stay in GPRs. Note that if we're not certain that the high 11634 // bits are set as required by the final extension, we still may need to do 11635 // some masking to get the proper behavior. 11636 11637 // This same functionality is important on PPC64 when dealing with 11638 // 32-to-64-bit extensions; these occur often when 32-bit values are used as 11639 // the return values of functions. Because it is so similar, it is handled 11640 // here as well. 11641 11642 if (N->getValueType(0) != MVT::i32 && 11643 N->getValueType(0) != MVT::i64) 11644 return SDValue(); 11645 11646 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) || 11647 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64()))) 11648 return SDValue(); 11649 11650 if (N->getOperand(0).getOpcode() != ISD::AND && 11651 N->getOperand(0).getOpcode() != ISD::OR && 11652 N->getOperand(0).getOpcode() != ISD::XOR && 11653 N->getOperand(0).getOpcode() != ISD::SELECT && 11654 N->getOperand(0).getOpcode() != ISD::SELECT_CC) 11655 return SDValue(); 11656 11657 SmallVector<SDValue, 4> Inputs; 11658 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps; 11659 SmallPtrSet<SDNode *, 16> Visited; 11660 11661 // Visit all inputs, collect all binary operations (and, or, xor and 11662 // select) that are all fed by truncations. 11663 while (!BinOps.empty()) { 11664 SDValue BinOp = BinOps.back(); 11665 BinOps.pop_back(); 11666 11667 if (!Visited.insert(BinOp.getNode()).second) 11668 continue; 11669 11670 PromOps.push_back(BinOp); 11671 11672 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 11673 // The condition of the select is not promoted. 11674 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 11675 continue; 11676 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 11677 continue; 11678 11679 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 11680 isa<ConstantSDNode>(BinOp.getOperand(i))) { 11681 Inputs.push_back(BinOp.getOperand(i)); 11682 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 11683 BinOp.getOperand(i).getOpcode() == ISD::OR || 11684 BinOp.getOperand(i).getOpcode() == ISD::XOR || 11685 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 11686 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { 11687 BinOps.push_back(BinOp.getOperand(i)); 11688 } else { 11689 // We have an input that is not a truncation or another binary 11690 // operation; we'll abort this transformation. 11691 return SDValue(); 11692 } 11693 } 11694 } 11695 11696 // The operands of a select that must be truncated when the select is 11697 // promoted because the operand is actually part of the to-be-promoted set. 11698 DenseMap<SDNode *, EVT> SelectTruncOp[2]; 11699 11700 // Make sure that this is a self-contained cluster of operations (which 11701 // is not quite the same thing as saying that everything has only one 11702 // use). 11703 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 11704 if (isa<ConstantSDNode>(Inputs[i])) 11705 continue; 11706 11707 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 11708 UE = Inputs[i].getNode()->use_end(); 11709 UI != UE; ++UI) { 11710 SDNode *User = *UI; 11711 if (User != N && !Visited.count(User)) 11712 return SDValue(); 11713 11714 // If we're going to promote the non-output-value operand(s) or SELECT or 11715 // SELECT_CC, record them for truncation. 11716 if (User->getOpcode() == ISD::SELECT) { 11717 if (User->getOperand(0) == Inputs[i]) 11718 SelectTruncOp[0].insert(std::make_pair(User, 11719 User->getOperand(0).getValueType())); 11720 } else if (User->getOpcode() == ISD::SELECT_CC) { 11721 if (User->getOperand(0) == Inputs[i]) 11722 SelectTruncOp[0].insert(std::make_pair(User, 11723 User->getOperand(0).getValueType())); 11724 if (User->getOperand(1) == Inputs[i]) 11725 SelectTruncOp[1].insert(std::make_pair(User, 11726 User->getOperand(1).getValueType())); 11727 } 11728 } 11729 } 11730 11731 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 11732 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 11733 UE = PromOps[i].getNode()->use_end(); 11734 UI != UE; ++UI) { 11735 SDNode *User = *UI; 11736 if (User != N && !Visited.count(User)) 11737 return SDValue(); 11738 11739 // If we're going to promote the non-output-value operand(s) or SELECT or 11740 // SELECT_CC, record them for truncation. 11741 if (User->getOpcode() == ISD::SELECT) { 11742 if (User->getOperand(0) == PromOps[i]) 11743 SelectTruncOp[0].insert(std::make_pair(User, 11744 User->getOperand(0).getValueType())); 11745 } else if (User->getOpcode() == ISD::SELECT_CC) { 11746 if (User->getOperand(0) == PromOps[i]) 11747 SelectTruncOp[0].insert(std::make_pair(User, 11748 User->getOperand(0).getValueType())); 11749 if (User->getOperand(1) == PromOps[i]) 11750 SelectTruncOp[1].insert(std::make_pair(User, 11751 User->getOperand(1).getValueType())); 11752 } 11753 } 11754 } 11755 11756 unsigned PromBits = N->getOperand(0).getValueSizeInBits(); 11757 bool ReallyNeedsExt = false; 11758 if (N->getOpcode() != ISD::ANY_EXTEND) { 11759 // If all of the inputs are not already sign/zero extended, then 11760 // we'll still need to do that at the end. 11761 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 11762 if (isa<ConstantSDNode>(Inputs[i])) 11763 continue; 11764 11765 unsigned OpBits = 11766 Inputs[i].getOperand(0).getValueSizeInBits(); 11767 assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); 11768 11769 if ((N->getOpcode() == ISD::ZERO_EXTEND && 11770 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), 11771 APInt::getHighBitsSet(OpBits, 11772 OpBits-PromBits))) || 11773 (N->getOpcode() == ISD::SIGN_EXTEND && 11774 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < 11775 (OpBits-(PromBits-1)))) { 11776 ReallyNeedsExt = true; 11777 break; 11778 } 11779 } 11780 } 11781 11782 // Replace all inputs, either with the truncation operand, or a 11783 // truncation or extension to the final output type. 11784 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 11785 // Constant inputs need to be replaced with the to-be-promoted nodes that 11786 // use them because they might have users outside of the cluster of 11787 // promoted nodes. 11788 if (isa<ConstantSDNode>(Inputs[i])) 11789 continue; 11790 11791 SDValue InSrc = Inputs[i].getOperand(0); 11792 if (Inputs[i].getValueType() == N->getValueType(0)) 11793 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); 11794 else if (N->getOpcode() == ISD::SIGN_EXTEND) 11795 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 11796 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); 11797 else if (N->getOpcode() == ISD::ZERO_EXTEND) 11798 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 11799 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); 11800 else 11801 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 11802 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); 11803 } 11804 11805 std::list<HandleSDNode> PromOpHandles; 11806 for (auto &PromOp : PromOps) 11807 PromOpHandles.emplace_back(PromOp); 11808 11809 // Replace all operations (these are all the same, but have a different 11810 // (promoted) return type). DAG.getNode will validate that the types of 11811 // a binary operator match, so go through the list in reverse so that 11812 // we've likely promoted both operands first. 11813 while (!PromOpHandles.empty()) { 11814 SDValue PromOp = PromOpHandles.back().getValue(); 11815 PromOpHandles.pop_back(); 11816 11817 unsigned C; 11818 switch (PromOp.getOpcode()) { 11819 default: C = 0; break; 11820 case ISD::SELECT: C = 1; break; 11821 case ISD::SELECT_CC: C = 2; break; 11822 } 11823 11824 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 11825 PromOp.getOperand(C).getValueType() != N->getValueType(0)) || 11826 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 11827 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { 11828 // The to-be-promoted operands of this node have not yet been 11829 // promoted (this should be rare because we're going through the 11830 // list backward, but if one of the operands has several users in 11831 // this cluster of to-be-promoted nodes, it is possible). 11832 PromOpHandles.emplace_front(PromOp); 11833 continue; 11834 } 11835 11836 // For SELECT and SELECT_CC nodes, we do a similar check for any 11837 // to-be-promoted comparison inputs. 11838 if (PromOp.getOpcode() == ISD::SELECT || 11839 PromOp.getOpcode() == ISD::SELECT_CC) { 11840 if ((SelectTruncOp[0].count(PromOp.getNode()) && 11841 PromOp.getOperand(0).getValueType() != N->getValueType(0)) || 11842 (SelectTruncOp[1].count(PromOp.getNode()) && 11843 PromOp.getOperand(1).getValueType() != N->getValueType(0))) { 11844 PromOpHandles.emplace_front(PromOp); 11845 continue; 11846 } 11847 } 11848 11849 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 11850 PromOp.getNode()->op_end()); 11851 11852 // If this node has constant inputs, then they'll need to be promoted here. 11853 for (unsigned i = 0; i < 2; ++i) { 11854 if (!isa<ConstantSDNode>(Ops[C+i])) 11855 continue; 11856 if (Ops[C+i].getValueType() == N->getValueType(0)) 11857 continue; 11858 11859 if (N->getOpcode() == ISD::SIGN_EXTEND) 11860 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 11861 else if (N->getOpcode() == ISD::ZERO_EXTEND) 11862 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 11863 else 11864 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 11865 } 11866 11867 // If we've promoted the comparison inputs of a SELECT or SELECT_CC, 11868 // truncate them again to the original value type. 11869 if (PromOp.getOpcode() == ISD::SELECT || 11870 PromOp.getOpcode() == ISD::SELECT_CC) { 11871 auto SI0 = SelectTruncOp[0].find(PromOp.getNode()); 11872 if (SI0 != SelectTruncOp[0].end()) 11873 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]); 11874 auto SI1 = SelectTruncOp[1].find(PromOp.getNode()); 11875 if (SI1 != SelectTruncOp[1].end()) 11876 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]); 11877 } 11878 11879 DAG.ReplaceAllUsesOfValueWith(PromOp, 11880 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); 11881 } 11882 11883 // Now we're left with the initial extension itself. 11884 if (!ReallyNeedsExt) 11885 return N->getOperand(0); 11886 11887 // To zero extend, just mask off everything except for the first bit (in the 11888 // i1 case). 11889 if (N->getOpcode() == ISD::ZERO_EXTEND) 11890 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), 11891 DAG.getConstant(APInt::getLowBitsSet( 11892 N->getValueSizeInBits(0), PromBits), 11893 dl, N->getValueType(0))); 11894 11895 assert(N->getOpcode() == ISD::SIGN_EXTEND && 11896 "Invalid extension type"); 11897 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout()); 11898 SDValue ShiftCst = 11899 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); 11900 return DAG.getNode( 11901 ISD::SRA, dl, N->getValueType(0), 11902 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst), 11903 ShiftCst); 11904 } 11905 11906 SDValue PPCTargetLowering::combineSetCC(SDNode *N, 11907 DAGCombinerInfo &DCI) const { 11908 assert(N->getOpcode() == ISD::SETCC && 11909 "Should be called with a SETCC node"); 11910 11911 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 11912 if (CC == ISD::SETNE || CC == ISD::SETEQ) { 11913 SDValue LHS = N->getOperand(0); 11914 SDValue RHS = N->getOperand(1); 11915 11916 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS. 11917 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) && 11918 LHS.hasOneUse()) 11919 std::swap(LHS, RHS); 11920 11921 // x == 0-y --> x+y == 0 11922 // x != 0-y --> x+y != 0 11923 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) && 11924 RHS.hasOneUse()) { 11925 SDLoc DL(N); 11926 SelectionDAG &DAG = DCI.DAG; 11927 EVT VT = N->getValueType(0); 11928 EVT OpVT = LHS.getValueType(); 11929 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1)); 11930 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC); 11931 } 11932 } 11933 11934 return DAGCombineTruncBoolExt(N, DCI); 11935 } 11936 11937 // Is this an extending load from an f32 to an f64? 11938 static bool isFPExtLoad(SDValue Op) { 11939 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode())) 11940 return LD->getExtensionType() == ISD::EXTLOAD && 11941 Op.getValueType() == MVT::f64; 11942 return false; 11943 } 11944 11945 /// Reduces the number of fp-to-int conversion when building a vector. 11946 /// 11947 /// If this vector is built out of floating to integer conversions, 11948 /// transform it to a vector built out of floating point values followed by a 11949 /// single floating to integer conversion of the vector. 11950 /// Namely (build_vector (fptosi $A), (fptosi $B), ...) 11951 /// becomes (fptosi (build_vector ($A, $B, ...))) 11952 SDValue PPCTargetLowering:: 11953 combineElementTruncationToVectorTruncation(SDNode *N, 11954 DAGCombinerInfo &DCI) const { 11955 assert(N->getOpcode() == ISD::BUILD_VECTOR && 11956 "Should be called with a BUILD_VECTOR node"); 11957 11958 SelectionDAG &DAG = DCI.DAG; 11959 SDLoc dl(N); 11960 11961 SDValue FirstInput = N->getOperand(0); 11962 assert(FirstInput.getOpcode() == PPCISD::MFVSR && 11963 "The input operand must be an fp-to-int conversion."); 11964 11965 // This combine happens after legalization so the fp_to_[su]i nodes are 11966 // already converted to PPCSISD nodes. 11967 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode(); 11968 if (FirstConversion == PPCISD::FCTIDZ || 11969 FirstConversion == PPCISD::FCTIDUZ || 11970 FirstConversion == PPCISD::FCTIWZ || 11971 FirstConversion == PPCISD::FCTIWUZ) { 11972 bool IsSplat = true; 11973 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ || 11974 FirstConversion == PPCISD::FCTIWUZ; 11975 EVT SrcVT = FirstInput.getOperand(0).getValueType(); 11976 SmallVector<SDValue, 4> Ops; 11977 EVT TargetVT = N->getValueType(0); 11978 for (int i = 0, e = N->getNumOperands(); i < e; ++i) { 11979 SDValue NextOp = N->getOperand(i); 11980 if (NextOp.getOpcode() != PPCISD::MFVSR) 11981 return SDValue(); 11982 unsigned NextConversion = NextOp.getOperand(0).getOpcode(); 11983 if (NextConversion != FirstConversion) 11984 return SDValue(); 11985 // If we are converting to 32-bit integers, we need to add an FP_ROUND. 11986 // This is not valid if the input was originally double precision. It is 11987 // also not profitable to do unless this is an extending load in which 11988 // case doing this combine will allow us to combine consecutive loads. 11989 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0))) 11990 return SDValue(); 11991 if (N->getOperand(i) != FirstInput) 11992 IsSplat = false; 11993 } 11994 11995 // If this is a splat, we leave it as-is since there will be only a single 11996 // fp-to-int conversion followed by a splat of the integer. This is better 11997 // for 32-bit and smaller ints and neutral for 64-bit ints. 11998 if (IsSplat) 11999 return SDValue(); 12000 12001 // Now that we know we have the right type of node, get its operands 12002 for (int i = 0, e = N->getNumOperands(); i < e; ++i) { 12003 SDValue In = N->getOperand(i).getOperand(0); 12004 if (Is32Bit) { 12005 // For 32-bit values, we need to add an FP_ROUND node (if we made it 12006 // here, we know that all inputs are extending loads so this is safe). 12007 if (In.isUndef()) 12008 Ops.push_back(DAG.getUNDEF(SrcVT)); 12009 else { 12010 SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl, 12011 MVT::f32, In.getOperand(0), 12012 DAG.getIntPtrConstant(1, dl)); 12013 Ops.push_back(Trunc); 12014 } 12015 } else 12016 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0)); 12017 } 12018 12019 unsigned Opcode; 12020 if (FirstConversion == PPCISD::FCTIDZ || 12021 FirstConversion == PPCISD::FCTIWZ) 12022 Opcode = ISD::FP_TO_SINT; 12023 else 12024 Opcode = ISD::FP_TO_UINT; 12025 12026 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32; 12027 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops); 12028 return DAG.getNode(Opcode, dl, TargetVT, BV); 12029 } 12030 return SDValue(); 12031 } 12032 12033 /// Reduce the number of loads when building a vector. 12034 /// 12035 /// Building a vector out of multiple loads can be converted to a load 12036 /// of the vector type if the loads are consecutive. If the loads are 12037 /// consecutive but in descending order, a shuffle is added at the end 12038 /// to reorder the vector. 12039 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) { 12040 assert(N->getOpcode() == ISD::BUILD_VECTOR && 12041 "Should be called with a BUILD_VECTOR node"); 12042 12043 SDLoc dl(N); 12044 bool InputsAreConsecutiveLoads = true; 12045 bool InputsAreReverseConsecutive = true; 12046 unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8; 12047 SDValue FirstInput = N->getOperand(0); 12048 bool IsRoundOfExtLoad = false; 12049 12050 if (FirstInput.getOpcode() == ISD::FP_ROUND && 12051 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) { 12052 LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0)); 12053 IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD; 12054 } 12055 // Not a build vector of (possibly fp_rounded) loads. 12056 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) || 12057 N->getNumOperands() == 1) 12058 return SDValue(); 12059 12060 for (int i = 1, e = N->getNumOperands(); i < e; ++i) { 12061 // If any inputs are fp_round(extload), they all must be. 12062 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND) 12063 return SDValue(); 12064 12065 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) : 12066 N->getOperand(i); 12067 if (NextInput.getOpcode() != ISD::LOAD) 12068 return SDValue(); 12069 12070 SDValue PreviousInput = 12071 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1); 12072 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput); 12073 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput); 12074 12075 // If any inputs are fp_round(extload), they all must be. 12076 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD) 12077 return SDValue(); 12078 12079 if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG)) 12080 InputsAreConsecutiveLoads = false; 12081 if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG)) 12082 InputsAreReverseConsecutive = false; 12083 12084 // Exit early if the loads are neither consecutive nor reverse consecutive. 12085 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive) 12086 return SDValue(); 12087 } 12088 12089 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && 12090 "The loads cannot be both consecutive and reverse consecutive."); 12091 12092 SDValue FirstLoadOp = 12093 IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput; 12094 SDValue LastLoadOp = 12095 IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) : 12096 N->getOperand(N->getNumOperands()-1); 12097 12098 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp); 12099 LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp); 12100 if (InputsAreConsecutiveLoads) { 12101 assert(LD1 && "Input needs to be a LoadSDNode."); 12102 return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(), 12103 LD1->getBasePtr(), LD1->getPointerInfo(), 12104 LD1->getAlignment()); 12105 } 12106 if (InputsAreReverseConsecutive) { 12107 assert(LDL && "Input needs to be a LoadSDNode."); 12108 SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), 12109 LDL->getBasePtr(), LDL->getPointerInfo(), 12110 LDL->getAlignment()); 12111 SmallVector<int, 16> Ops; 12112 for (int i = N->getNumOperands() - 1; i >= 0; i--) 12113 Ops.push_back(i); 12114 12115 return DAG.getVectorShuffle(N->getValueType(0), dl, Load, 12116 DAG.getUNDEF(N->getValueType(0)), Ops); 12117 } 12118 return SDValue(); 12119 } 12120 12121 // This function adds the required vector_shuffle needed to get 12122 // the elements of the vector extract in the correct position 12123 // as specified by the CorrectElems encoding. 12124 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, 12125 SDValue Input, uint64_t Elems, 12126 uint64_t CorrectElems) { 12127 SDLoc dl(N); 12128 12129 unsigned NumElems = Input.getValueType().getVectorNumElements(); 12130 SmallVector<int, 16> ShuffleMask(NumElems, -1); 12131 12132 // Knowing the element indices being extracted from the original 12133 // vector and the order in which they're being inserted, just put 12134 // them at element indices required for the instruction. 12135 for (unsigned i = 0; i < N->getNumOperands(); i++) { 12136 if (DAG.getDataLayout().isLittleEndian()) 12137 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF; 12138 else 12139 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4; 12140 CorrectElems = CorrectElems >> 8; 12141 Elems = Elems >> 8; 12142 } 12143 12144 SDValue Shuffle = 12145 DAG.getVectorShuffle(Input.getValueType(), dl, Input, 12146 DAG.getUNDEF(Input.getValueType()), ShuffleMask); 12147 12148 EVT Ty = N->getValueType(0); 12149 SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle); 12150 return BV; 12151 } 12152 12153 // Look for build vector patterns where input operands come from sign 12154 // extended vector_extract elements of specific indices. If the correct indices 12155 // aren't used, add a vector shuffle to fix up the indices and create a new 12156 // PPCISD:SExtVElems node which selects the vector sign extend instructions 12157 // during instruction selection. 12158 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) { 12159 // This array encodes the indices that the vector sign extend instructions 12160 // extract from when extending from one type to another for both BE and LE. 12161 // The right nibble of each byte corresponds to the LE incides. 12162 // and the left nibble of each byte corresponds to the BE incides. 12163 // For example: 0x3074B8FC byte->word 12164 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC 12165 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF 12166 // For example: 0x000070F8 byte->double word 12167 // For LE: the allowed indices are: 0x0,0x8 12168 // For BE: the allowed indices are: 0x7,0xF 12169 uint64_t TargetElems[] = { 12170 0x3074B8FC, // b->w 12171 0x000070F8, // b->d 12172 0x10325476, // h->w 12173 0x00003074, // h->d 12174 0x00001032, // w->d 12175 }; 12176 12177 uint64_t Elems = 0; 12178 int Index; 12179 SDValue Input; 12180 12181 auto isSExtOfVecExtract = [&](SDValue Op) -> bool { 12182 if (!Op) 12183 return false; 12184 if (Op.getOpcode() != ISD::SIGN_EXTEND && 12185 Op.getOpcode() != ISD::SIGN_EXTEND_INREG) 12186 return false; 12187 12188 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value 12189 // of the right width. 12190 SDValue Extract = Op.getOperand(0); 12191 if (Extract.getOpcode() == ISD::ANY_EXTEND) 12192 Extract = Extract.getOperand(0); 12193 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 12194 return false; 12195 12196 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1)); 12197 if (!ExtOp) 12198 return false; 12199 12200 Index = ExtOp->getZExtValue(); 12201 if (Input && Input != Extract.getOperand(0)) 12202 return false; 12203 12204 if (!Input) 12205 Input = Extract.getOperand(0); 12206 12207 Elems = Elems << 8; 12208 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4; 12209 Elems |= Index; 12210 12211 return true; 12212 }; 12213 12214 // If the build vector operands aren't sign extended vector extracts, 12215 // of the same input vector, then return. 12216 for (unsigned i = 0; i < N->getNumOperands(); i++) { 12217 if (!isSExtOfVecExtract(N->getOperand(i))) { 12218 return SDValue(); 12219 } 12220 } 12221 12222 // If the vector extract indicies are not correct, add the appropriate 12223 // vector_shuffle. 12224 int TgtElemArrayIdx; 12225 int InputSize = Input.getValueType().getScalarSizeInBits(); 12226 int OutputSize = N->getValueType(0).getScalarSizeInBits(); 12227 if (InputSize + OutputSize == 40) 12228 TgtElemArrayIdx = 0; 12229 else if (InputSize + OutputSize == 72) 12230 TgtElemArrayIdx = 1; 12231 else if (InputSize + OutputSize == 48) 12232 TgtElemArrayIdx = 2; 12233 else if (InputSize + OutputSize == 80) 12234 TgtElemArrayIdx = 3; 12235 else if (InputSize + OutputSize == 96) 12236 TgtElemArrayIdx = 4; 12237 else 12238 return SDValue(); 12239 12240 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx]; 12241 CorrectElems = DAG.getDataLayout().isLittleEndian() 12242 ? CorrectElems & 0x0F0F0F0F0F0F0F0F 12243 : CorrectElems & 0xF0F0F0F0F0F0F0F0; 12244 if (Elems != CorrectElems) { 12245 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems); 12246 } 12247 12248 // Regular lowering will catch cases where a shuffle is not needed. 12249 return SDValue(); 12250 } 12251 12252 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, 12253 DAGCombinerInfo &DCI) const { 12254 assert(N->getOpcode() == ISD::BUILD_VECTOR && 12255 "Should be called with a BUILD_VECTOR node"); 12256 12257 SelectionDAG &DAG = DCI.DAG; 12258 SDLoc dl(N); 12259 12260 if (!Subtarget.hasVSX()) 12261 return SDValue(); 12262 12263 // The target independent DAG combiner will leave a build_vector of 12264 // float-to-int conversions intact. We can generate MUCH better code for 12265 // a float-to-int conversion of a vector of floats. 12266 SDValue FirstInput = N->getOperand(0); 12267 if (FirstInput.getOpcode() == PPCISD::MFVSR) { 12268 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI); 12269 if (Reduced) 12270 return Reduced; 12271 } 12272 12273 // If we're building a vector out of consecutive loads, just load that 12274 // vector type. 12275 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG); 12276 if (Reduced) 12277 return Reduced; 12278 12279 // If we're building a vector out of extended elements from another vector 12280 // we have P9 vector integer extend instructions. The code assumes legal 12281 // input types (i.e. it can't handle things like v4i16) so do not run before 12282 // legalization. 12283 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) { 12284 Reduced = combineBVOfVecSExt(N, DAG); 12285 if (Reduced) 12286 return Reduced; 12287 } 12288 12289 12290 if (N->getValueType(0) != MVT::v2f64) 12291 return SDValue(); 12292 12293 // Looking for: 12294 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1)) 12295 if (FirstInput.getOpcode() != ISD::SINT_TO_FP && 12296 FirstInput.getOpcode() != ISD::UINT_TO_FP) 12297 return SDValue(); 12298 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP && 12299 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP) 12300 return SDValue(); 12301 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode()) 12302 return SDValue(); 12303 12304 SDValue Ext1 = FirstInput.getOperand(0); 12305 SDValue Ext2 = N->getOperand(1).getOperand(0); 12306 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || 12307 Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 12308 return SDValue(); 12309 12310 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1)); 12311 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1)); 12312 if (!Ext1Op || !Ext2Op) 12313 return SDValue(); 12314 if (Ext1.getValueType() != MVT::i32 || 12315 Ext2.getValueType() != MVT::i32) 12316 if (Ext1.getOperand(0) != Ext2.getOperand(0)) 12317 return SDValue(); 12318 12319 int FirstElem = Ext1Op->getZExtValue(); 12320 int SecondElem = Ext2Op->getZExtValue(); 12321 int SubvecIdx; 12322 if (FirstElem == 0 && SecondElem == 1) 12323 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0; 12324 else if (FirstElem == 2 && SecondElem == 3) 12325 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1; 12326 else 12327 return SDValue(); 12328 12329 SDValue SrcVec = Ext1.getOperand(0); 12330 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ? 12331 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP; 12332 return DAG.getNode(NodeType, dl, MVT::v2f64, 12333 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl)); 12334 } 12335 12336 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, 12337 DAGCombinerInfo &DCI) const { 12338 assert((N->getOpcode() == ISD::SINT_TO_FP || 12339 N->getOpcode() == ISD::UINT_TO_FP) && 12340 "Need an int -> FP conversion node here"); 12341 12342 if (useSoftFloat() || !Subtarget.has64BitSupport()) 12343 return SDValue(); 12344 12345 SelectionDAG &DAG = DCI.DAG; 12346 SDLoc dl(N); 12347 SDValue Op(N, 0); 12348 12349 // Don't handle ppc_fp128 here or conversions that are out-of-range capable 12350 // from the hardware. 12351 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 12352 return SDValue(); 12353 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) || 12354 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64)) 12355 return SDValue(); 12356 12357 SDValue FirstOperand(Op.getOperand(0)); 12358 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD && 12359 (FirstOperand.getValueType() == MVT::i8 || 12360 FirstOperand.getValueType() == MVT::i16); 12361 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) { 12362 bool Signed = N->getOpcode() == ISD::SINT_TO_FP; 12363 bool DstDouble = Op.getValueType() == MVT::f64; 12364 unsigned ConvOp = Signed ? 12365 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) : 12366 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS); 12367 SDValue WidthConst = 12368 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2, 12369 dl, false); 12370 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode()); 12371 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst }; 12372 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl, 12373 DAG.getVTList(MVT::f64, MVT::Other), 12374 Ops, MVT::i8, LDN->getMemOperand()); 12375 12376 // For signed conversion, we need to sign-extend the value in the VSR 12377 if (Signed) { 12378 SDValue ExtOps[] = { Ld, WidthConst }; 12379 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps); 12380 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext); 12381 } else 12382 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld); 12383 } 12384 12385 12386 // For i32 intermediate values, unfortunately, the conversion functions 12387 // leave the upper 32 bits of the value are undefined. Within the set of 12388 // scalar instructions, we have no method for zero- or sign-extending the 12389 // value. Thus, we cannot handle i32 intermediate values here. 12390 if (Op.getOperand(0).getValueType() == MVT::i32) 12391 return SDValue(); 12392 12393 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 12394 "UINT_TO_FP is supported only with FPCVT"); 12395 12396 // If we have FCFIDS, then use it when converting to single-precision. 12397 // Otherwise, convert to double-precision and then round. 12398 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 12399 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS 12400 : PPCISD::FCFIDS) 12401 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU 12402 : PPCISD::FCFID); 12403 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 12404 ? MVT::f32 12405 : MVT::f64; 12406 12407 // If we're converting from a float, to an int, and back to a float again, 12408 // then we don't need the store/load pair at all. 12409 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT && 12410 Subtarget.hasFPCVT()) || 12411 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) { 12412 SDValue Src = Op.getOperand(0).getOperand(0); 12413 if (Src.getValueType() == MVT::f32) { 12414 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 12415 DCI.AddToWorklist(Src.getNode()); 12416 } else if (Src.getValueType() != MVT::f64) { 12417 // Make sure that we don't pick up a ppc_fp128 source value. 12418 return SDValue(); 12419 } 12420 12421 unsigned FCTOp = 12422 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 12423 PPCISD::FCTIDUZ; 12424 12425 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src); 12426 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp); 12427 12428 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { 12429 FP = DAG.getNode(ISD::FP_ROUND, dl, 12430 MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); 12431 DCI.AddToWorklist(FP.getNode()); 12432 } 12433 12434 return FP; 12435 } 12436 12437 return SDValue(); 12438 } 12439 12440 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for 12441 // builtins) into loads with swaps. 12442 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, 12443 DAGCombinerInfo &DCI) const { 12444 SelectionDAG &DAG = DCI.DAG; 12445 SDLoc dl(N); 12446 SDValue Chain; 12447 SDValue Base; 12448 MachineMemOperand *MMO; 12449 12450 switch (N->getOpcode()) { 12451 default: 12452 llvm_unreachable("Unexpected opcode for little endian VSX load"); 12453 case ISD::LOAD: { 12454 LoadSDNode *LD = cast<LoadSDNode>(N); 12455 Chain = LD->getChain(); 12456 Base = LD->getBasePtr(); 12457 MMO = LD->getMemOperand(); 12458 // If the MMO suggests this isn't a load of a full vector, leave 12459 // things alone. For a built-in, we have to make the change for 12460 // correctness, so if there is a size problem that will be a bug. 12461 if (MMO->getSize() < 16) 12462 return SDValue(); 12463 break; 12464 } 12465 case ISD::INTRINSIC_W_CHAIN: { 12466 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 12467 Chain = Intrin->getChain(); 12468 // Similarly to the store case below, Intrin->getBasePtr() doesn't get 12469 // us what we want. Get operand 2 instead. 12470 Base = Intrin->getOperand(2); 12471 MMO = Intrin->getMemOperand(); 12472 break; 12473 } 12474 } 12475 12476 MVT VecTy = N->getValueType(0).getSimpleVT(); 12477 12478 // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is 12479 // aligned and the type is a vector with elements up to 4 bytes 12480 if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16) 12481 && VecTy.getScalarSizeInBits() <= 32 ) { 12482 return SDValue(); 12483 } 12484 12485 SDValue LoadOps[] = { Chain, Base }; 12486 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, 12487 DAG.getVTList(MVT::v2f64, MVT::Other), 12488 LoadOps, MVT::v2f64, MMO); 12489 12490 DCI.AddToWorklist(Load.getNode()); 12491 Chain = Load.getValue(1); 12492 SDValue Swap = DAG.getNode( 12493 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load); 12494 DCI.AddToWorklist(Swap.getNode()); 12495 12496 // Add a bitcast if the resulting load type doesn't match v2f64. 12497 if (VecTy != MVT::v2f64) { 12498 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap); 12499 DCI.AddToWorklist(N.getNode()); 12500 // Package {bitcast value, swap's chain} to match Load's shape. 12501 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other), 12502 N, Swap.getValue(1)); 12503 } 12504 12505 return Swap; 12506 } 12507 12508 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for 12509 // builtins) into stores with swaps. 12510 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, 12511 DAGCombinerInfo &DCI) const { 12512 SelectionDAG &DAG = DCI.DAG; 12513 SDLoc dl(N); 12514 SDValue Chain; 12515 SDValue Base; 12516 unsigned SrcOpnd; 12517 MachineMemOperand *MMO; 12518 12519 switch (N->getOpcode()) { 12520 default: 12521 llvm_unreachable("Unexpected opcode for little endian VSX store"); 12522 case ISD::STORE: { 12523 StoreSDNode *ST = cast<StoreSDNode>(N); 12524 Chain = ST->getChain(); 12525 Base = ST->getBasePtr(); 12526 MMO = ST->getMemOperand(); 12527 SrcOpnd = 1; 12528 // If the MMO suggests this isn't a store of a full vector, leave 12529 // things alone. For a built-in, we have to make the change for 12530 // correctness, so if there is a size problem that will be a bug. 12531 if (MMO->getSize() < 16) 12532 return SDValue(); 12533 break; 12534 } 12535 case ISD::INTRINSIC_VOID: { 12536 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 12537 Chain = Intrin->getChain(); 12538 // Intrin->getBasePtr() oddly does not get what we want. 12539 Base = Intrin->getOperand(3); 12540 MMO = Intrin->getMemOperand(); 12541 SrcOpnd = 2; 12542 break; 12543 } 12544 } 12545 12546 SDValue Src = N->getOperand(SrcOpnd); 12547 MVT VecTy = Src.getValueType().getSimpleVT(); 12548 12549 // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is 12550 // aligned and the type is a vector with elements up to 4 bytes 12551 if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16) 12552 && VecTy.getScalarSizeInBits() <= 32 ) { 12553 return SDValue(); 12554 } 12555 12556 // All stores are done as v2f64 and possible bit cast. 12557 if (VecTy != MVT::v2f64) { 12558 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); 12559 DCI.AddToWorklist(Src.getNode()); 12560 } 12561 12562 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, 12563 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src); 12564 DCI.AddToWorklist(Swap.getNode()); 12565 Chain = Swap.getValue(1); 12566 SDValue StoreOps[] = { Chain, Swap, Base }; 12567 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl, 12568 DAG.getVTList(MVT::Other), 12569 StoreOps, VecTy, MMO); 12570 DCI.AddToWorklist(Store.getNode()); 12571 return Store; 12572 } 12573 12574 // Handle DAG combine for STORE (FP_TO_INT F). 12575 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N, 12576 DAGCombinerInfo &DCI) const { 12577 12578 SelectionDAG &DAG = DCI.DAG; 12579 SDLoc dl(N); 12580 unsigned Opcode = N->getOperand(1).getOpcode(); 12581 12582 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) 12583 && "Not a FP_TO_INT Instruction!"); 12584 12585 SDValue Val = N->getOperand(1).getOperand(0); 12586 EVT Op1VT = N->getOperand(1).getValueType(); 12587 EVT ResVT = Val.getValueType(); 12588 12589 // Floating point types smaller than 32 bits are not legal on Power. 12590 if (ResVT.getScalarSizeInBits() < 32) 12591 return SDValue(); 12592 12593 // Only perform combine for conversion to i64/i32 or power9 i16/i8. 12594 bool ValidTypeForStoreFltAsInt = 12595 (Op1VT == MVT::i32 || Op1VT == MVT::i64 || 12596 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8))); 12597 12598 if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Altivec() || 12599 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt) 12600 return SDValue(); 12601 12602 // Extend f32 values to f64 12603 if (ResVT.getScalarSizeInBits() == 32) { 12604 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 12605 DCI.AddToWorklist(Val.getNode()); 12606 } 12607 12608 // Set signed or unsigned conversion opcode. 12609 unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ? 12610 PPCISD::FP_TO_SINT_IN_VSR : 12611 PPCISD::FP_TO_UINT_IN_VSR; 12612 12613 Val = DAG.getNode(ConvOpcode, 12614 dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val); 12615 DCI.AddToWorklist(Val.getNode()); 12616 12617 // Set number of bytes being converted. 12618 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8; 12619 SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2), 12620 DAG.getIntPtrConstant(ByteSize, dl, false), 12621 DAG.getValueType(Op1VT) }; 12622 12623 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl, 12624 DAG.getVTList(MVT::Other), Ops, 12625 cast<StoreSDNode>(N)->getMemoryVT(), 12626 cast<StoreSDNode>(N)->getMemOperand()); 12627 12628 DCI.AddToWorklist(Val.getNode()); 12629 return Val; 12630 } 12631 12632 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 12633 DAGCombinerInfo &DCI) const { 12634 SelectionDAG &DAG = DCI.DAG; 12635 SDLoc dl(N); 12636 switch (N->getOpcode()) { 12637 default: break; 12638 case ISD::ADD: 12639 return combineADD(N, DCI); 12640 case ISD::SHL: 12641 return combineSHL(N, DCI); 12642 case ISD::SRA: 12643 return combineSRA(N, DCI); 12644 case ISD::SRL: 12645 return combineSRL(N, DCI); 12646 case PPCISD::SHL: 12647 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. 12648 return N->getOperand(0); 12649 break; 12650 case PPCISD::SRL: 12651 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0. 12652 return N->getOperand(0); 12653 break; 12654 case PPCISD::SRA: 12655 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 12656 if (C->isNullValue() || // 0 >>s V -> 0. 12657 C->isAllOnesValue()) // -1 >>s V -> -1. 12658 return N->getOperand(0); 12659 } 12660 break; 12661 case ISD::SIGN_EXTEND: 12662 case ISD::ZERO_EXTEND: 12663 case ISD::ANY_EXTEND: 12664 return DAGCombineExtBoolTrunc(N, DCI); 12665 case ISD::TRUNCATE: 12666 return combineTRUNCATE(N, DCI); 12667 case ISD::SETCC: 12668 if (SDValue CSCC = combineSetCC(N, DCI)) 12669 return CSCC; 12670 LLVM_FALLTHROUGH; 12671 case ISD::SELECT_CC: 12672 return DAGCombineTruncBoolExt(N, DCI); 12673 case ISD::SINT_TO_FP: 12674 case ISD::UINT_TO_FP: 12675 return combineFPToIntToFP(N, DCI); 12676 case ISD::STORE: { 12677 12678 EVT Op1VT = N->getOperand(1).getValueType(); 12679 unsigned Opcode = N->getOperand(1).getOpcode(); 12680 12681 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) { 12682 SDValue Val= combineStoreFPToInt(N, DCI); 12683 if (Val) 12684 return Val; 12685 } 12686 12687 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 12688 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP && 12689 N->getOperand(1).getNode()->hasOneUse() && 12690 (Op1VT == MVT::i32 || Op1VT == MVT::i16 || 12691 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) { 12692 12693 // STBRX can only handle simple types and it makes no sense to store less 12694 // two bytes in byte-reversed order. 12695 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT(); 12696 if (mVT.isExtended() || mVT.getSizeInBits() < 16) 12697 break; 12698 12699 SDValue BSwapOp = N->getOperand(1).getOperand(0); 12700 // Do an any-extend to 32-bits if this is a half-word input. 12701 if (BSwapOp.getValueType() == MVT::i16) 12702 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 12703 12704 // If the type of BSWAP operand is wider than stored memory width 12705 // it need to be shifted to the right side before STBRX. 12706 if (Op1VT.bitsGT(mVT)) { 12707 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits(); 12708 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp, 12709 DAG.getConstant(Shift, dl, MVT::i32)); 12710 // Need to truncate if this is a bswap of i64 stored as i32/i16. 12711 if (Op1VT == MVT::i64) 12712 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp); 12713 } 12714 12715 SDValue Ops[] = { 12716 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT) 12717 }; 12718 return 12719 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 12720 Ops, cast<StoreSDNode>(N)->getMemoryVT(), 12721 cast<StoreSDNode>(N)->getMemOperand()); 12722 } 12723 12724 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0> 12725 // So it can increase the chance of CSE constant construction. 12726 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() && 12727 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) { 12728 // Need to sign-extended to 64-bits to handle negative values. 12729 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT(); 12730 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1), 12731 MemVT.getSizeInBits()); 12732 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64); 12733 12734 // DAG.getTruncStore() can't be used here because it doesn't accept 12735 // the general (base + offset) addressing mode. 12736 // So we use UpdateNodeOperands and setTruncatingStore instead. 12737 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2), 12738 N->getOperand(3)); 12739 cast<StoreSDNode>(N)->setTruncatingStore(true); 12740 return SDValue(N, 0); 12741 } 12742 12743 // For little endian, VSX stores require generating xxswapd/lxvd2x. 12744 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. 12745 if (Op1VT.isSimple()) { 12746 MVT StoreVT = Op1VT.getSimpleVT(); 12747 if (Subtarget.needsSwapsForVSXMemOps() && 12748 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || 12749 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) 12750 return expandVSXStoreForLE(N, DCI); 12751 } 12752 break; 12753 } 12754 case ISD::LOAD: { 12755 LoadSDNode *LD = cast<LoadSDNode>(N); 12756 EVT VT = LD->getValueType(0); 12757 12758 // For little endian, VSX loads require generating lxvd2x/xxswapd. 12759 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. 12760 if (VT.isSimple()) { 12761 MVT LoadVT = VT.getSimpleVT(); 12762 if (Subtarget.needsSwapsForVSXMemOps() && 12763 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || 12764 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) 12765 return expandVSXLoadForLE(N, DCI); 12766 } 12767 12768 // We sometimes end up with a 64-bit integer load, from which we extract 12769 // two single-precision floating-point numbers. This happens with 12770 // std::complex<float>, and other similar structures, because of the way we 12771 // canonicalize structure copies. However, if we lack direct moves, 12772 // then the final bitcasts from the extracted integer values to the 12773 // floating-point numbers turn into store/load pairs. Even with direct moves, 12774 // just loading the two floating-point numbers is likely better. 12775 auto ReplaceTwoFloatLoad = [&]() { 12776 if (VT != MVT::i64) 12777 return false; 12778 12779 if (LD->getExtensionType() != ISD::NON_EXTLOAD || 12780 LD->isVolatile()) 12781 return false; 12782 12783 // We're looking for a sequence like this: 12784 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64 12785 // t16: i64 = srl t13, Constant:i32<32> 12786 // t17: i32 = truncate t16 12787 // t18: f32 = bitcast t17 12788 // t19: i32 = truncate t13 12789 // t20: f32 = bitcast t19 12790 12791 if (!LD->hasNUsesOfValue(2, 0)) 12792 return false; 12793 12794 auto UI = LD->use_begin(); 12795 while (UI.getUse().getResNo() != 0) ++UI; 12796 SDNode *Trunc = *UI++; 12797 while (UI.getUse().getResNo() != 0) ++UI; 12798 SDNode *RightShift = *UI; 12799 if (Trunc->getOpcode() != ISD::TRUNCATE) 12800 std::swap(Trunc, RightShift); 12801 12802 if (Trunc->getOpcode() != ISD::TRUNCATE || 12803 Trunc->getValueType(0) != MVT::i32 || 12804 !Trunc->hasOneUse()) 12805 return false; 12806 if (RightShift->getOpcode() != ISD::SRL || 12807 !isa<ConstantSDNode>(RightShift->getOperand(1)) || 12808 RightShift->getConstantOperandVal(1) != 32 || 12809 !RightShift->hasOneUse()) 12810 return false; 12811 12812 SDNode *Trunc2 = *RightShift->use_begin(); 12813 if (Trunc2->getOpcode() != ISD::TRUNCATE || 12814 Trunc2->getValueType(0) != MVT::i32 || 12815 !Trunc2->hasOneUse()) 12816 return false; 12817 12818 SDNode *Bitcast = *Trunc->use_begin(); 12819 SDNode *Bitcast2 = *Trunc2->use_begin(); 12820 12821 if (Bitcast->getOpcode() != ISD::BITCAST || 12822 Bitcast->getValueType(0) != MVT::f32) 12823 return false; 12824 if (Bitcast2->getOpcode() != ISD::BITCAST || 12825 Bitcast2->getValueType(0) != MVT::f32) 12826 return false; 12827 12828 if (Subtarget.isLittleEndian()) 12829 std::swap(Bitcast, Bitcast2); 12830 12831 // Bitcast has the second float (in memory-layout order) and Bitcast2 12832 // has the first one. 12833 12834 SDValue BasePtr = LD->getBasePtr(); 12835 if (LD->isIndexed()) { 12836 assert(LD->getAddressingMode() == ISD::PRE_INC && 12837 "Non-pre-inc AM on PPC?"); 12838 BasePtr = 12839 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 12840 LD->getOffset()); 12841 } 12842 12843 auto MMOFlags = 12844 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile; 12845 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr, 12846 LD->getPointerInfo(), LD->getAlignment(), 12847 MMOFlags, LD->getAAInfo()); 12848 SDValue AddPtr = 12849 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), 12850 BasePtr, DAG.getIntPtrConstant(4, dl)); 12851 SDValue FloatLoad2 = DAG.getLoad( 12852 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr, 12853 LD->getPointerInfo().getWithOffset(4), 12854 MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo()); 12855 12856 if (LD->isIndexed()) { 12857 // Note that DAGCombine should re-form any pre-increment load(s) from 12858 // what is produced here if that makes sense. 12859 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr); 12860 } 12861 12862 DCI.CombineTo(Bitcast2, FloatLoad); 12863 DCI.CombineTo(Bitcast, FloatLoad2); 12864 12865 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1), 12866 SDValue(FloatLoad2.getNode(), 1)); 12867 return true; 12868 }; 12869 12870 if (ReplaceTwoFloatLoad()) 12871 return SDValue(N, 0); 12872 12873 EVT MemVT = LD->getMemoryVT(); 12874 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); 12875 unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); 12876 Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext()); 12877 unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy); 12878 if (LD->isUnindexed() && VT.isVector() && 12879 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) && 12880 // P8 and later hardware should just use LOAD. 12881 !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || 12882 VT == MVT::v4i32 || VT == MVT::v4f32)) || 12883 (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) && 12884 LD->getAlignment() >= ScalarABIAlignment)) && 12885 LD->getAlignment() < ABIAlignment) { 12886 // This is a type-legal unaligned Altivec or QPX load. 12887 SDValue Chain = LD->getChain(); 12888 SDValue Ptr = LD->getBasePtr(); 12889 bool isLittleEndian = Subtarget.isLittleEndian(); 12890 12891 // This implements the loading of unaligned vectors as described in 12892 // the venerable Apple Velocity Engine overview. Specifically: 12893 // https://developer.apple.com/hardwaredrivers/ve/alignment.html 12894 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html 12895 // 12896 // The general idea is to expand a sequence of one or more unaligned 12897 // loads into an alignment-based permutation-control instruction (lvsl 12898 // or lvsr), a series of regular vector loads (which always truncate 12899 // their input address to an aligned address), and a series of 12900 // permutations. The results of these permutations are the requested 12901 // loaded values. The trick is that the last "extra" load is not taken 12902 // from the address you might suspect (sizeof(vector) bytes after the 12903 // last requested load), but rather sizeof(vector) - 1 bytes after the 12904 // last requested vector. The point of this is to avoid a page fault if 12905 // the base address happened to be aligned. This works because if the 12906 // base address is aligned, then adding less than a full vector length 12907 // will cause the last vector in the sequence to be (re)loaded. 12908 // Otherwise, the next vector will be fetched as you might suspect was 12909 // necessary. 12910 12911 // We might be able to reuse the permutation generation from 12912 // a different base address offset from this one by an aligned amount. 12913 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this 12914 // optimization later. 12915 Intrinsic::ID Intr, IntrLD, IntrPerm; 12916 MVT PermCntlTy, PermTy, LDTy; 12917 if (Subtarget.hasAltivec()) { 12918 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr : 12919 Intrinsic::ppc_altivec_lvsl; 12920 IntrLD = Intrinsic::ppc_altivec_lvx; 12921 IntrPerm = Intrinsic::ppc_altivec_vperm; 12922 PermCntlTy = MVT::v16i8; 12923 PermTy = MVT::v4i32; 12924 LDTy = MVT::v4i32; 12925 } else { 12926 Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld : 12927 Intrinsic::ppc_qpx_qvlpcls; 12928 IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd : 12929 Intrinsic::ppc_qpx_qvlfs; 12930 IntrPerm = Intrinsic::ppc_qpx_qvfperm; 12931 PermCntlTy = MVT::v4f64; 12932 PermTy = MVT::v4f64; 12933 LDTy = MemVT.getSimpleVT(); 12934 } 12935 12936 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy); 12937 12938 // Create the new MMO for the new base load. It is like the original MMO, 12939 // but represents an area in memory almost twice the vector size centered 12940 // on the original address. If the address is unaligned, we might start 12941 // reading up to (sizeof(vector)-1) bytes below the address of the 12942 // original unaligned load. 12943 MachineFunction &MF = DAG.getMachineFunction(); 12944 MachineMemOperand *BaseMMO = 12945 MF.getMachineMemOperand(LD->getMemOperand(), 12946 -(long)MemVT.getStoreSize()+1, 12947 2*MemVT.getStoreSize()-1); 12948 12949 // Create the new base load. 12950 SDValue LDXIntID = 12951 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout())); 12952 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; 12953 SDValue BaseLoad = 12954 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 12955 DAG.getVTList(PermTy, MVT::Other), 12956 BaseLoadOps, LDTy, BaseMMO); 12957 12958 // Note that the value of IncOffset (which is provided to the next 12959 // load's pointer info offset value, and thus used to calculate the 12960 // alignment), and the value of IncValue (which is actually used to 12961 // increment the pointer value) are different! This is because we 12962 // require the next load to appear to be aligned, even though it 12963 // is actually offset from the base pointer by a lesser amount. 12964 int IncOffset = VT.getSizeInBits() / 8; 12965 int IncValue = IncOffset; 12966 12967 // Walk (both up and down) the chain looking for another load at the real 12968 // (aligned) offset (the alignment of the other load does not matter in 12969 // this case). If found, then do not use the offset reduction trick, as 12970 // that will prevent the loads from being later combined (as they would 12971 // otherwise be duplicates). 12972 if (!findConsecutiveLoad(LD, DAG)) 12973 --IncValue; 12974 12975 SDValue Increment = 12976 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout())); 12977 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); 12978 12979 MachineMemOperand *ExtraMMO = 12980 MF.getMachineMemOperand(LD->getMemOperand(), 12981 1, 2*MemVT.getStoreSize()-1); 12982 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; 12983 SDValue ExtraLoad = 12984 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 12985 DAG.getVTList(PermTy, MVT::Other), 12986 ExtraLoadOps, LDTy, ExtraMMO); 12987 12988 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 12989 BaseLoad.getValue(1), ExtraLoad.getValue(1)); 12990 12991 // Because vperm has a big-endian bias, we must reverse the order 12992 // of the input vectors and complement the permute control vector 12993 // when generating little endian code. We have already handled the 12994 // latter by using lvsr instead of lvsl, so just reverse BaseLoad 12995 // and ExtraLoad here. 12996 SDValue Perm; 12997 if (isLittleEndian) 12998 Perm = BuildIntrinsicOp(IntrPerm, 12999 ExtraLoad, BaseLoad, PermCntl, DAG, dl); 13000 else 13001 Perm = BuildIntrinsicOp(IntrPerm, 13002 BaseLoad, ExtraLoad, PermCntl, DAG, dl); 13003 13004 if (VT != PermTy) 13005 Perm = Subtarget.hasAltivec() ? 13006 DAG.getNode(ISD::BITCAST, dl, VT, Perm) : 13007 DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX 13008 DAG.getTargetConstant(1, dl, MVT::i64)); 13009 // second argument is 1 because this rounding 13010 // is always exact. 13011 13012 // The output of the permutation is our loaded result, the TokenFactor is 13013 // our new chain. 13014 DCI.CombineTo(N, Perm, TF); 13015 return SDValue(N, 0); 13016 } 13017 } 13018 break; 13019 case ISD::INTRINSIC_WO_CHAIN: { 13020 bool isLittleEndian = Subtarget.isLittleEndian(); 13021 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 13022 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr 13023 : Intrinsic::ppc_altivec_lvsl); 13024 if ((IID == Intr || 13025 IID == Intrinsic::ppc_qpx_qvlpcld || 13026 IID == Intrinsic::ppc_qpx_qvlpcls) && 13027 N->getOperand(1)->getOpcode() == ISD::ADD) { 13028 SDValue Add = N->getOperand(1); 13029 13030 int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ? 13031 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */; 13032 13033 if (DAG.MaskedValueIsZero(Add->getOperand(1), 13034 APInt::getAllOnesValue(Bits /* alignment */) 13035 .zext(Add.getScalarValueSizeInBits()))) { 13036 SDNode *BasePtr = Add->getOperand(0).getNode(); 13037 for (SDNode::use_iterator UI = BasePtr->use_begin(), 13038 UE = BasePtr->use_end(); 13039 UI != UE; ++UI) { 13040 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 13041 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) { 13042 // We've found another LVSL/LVSR, and this address is an aligned 13043 // multiple of that one. The results will be the same, so use the 13044 // one we've just found instead. 13045 13046 return SDValue(*UI, 0); 13047 } 13048 } 13049 } 13050 13051 if (isa<ConstantSDNode>(Add->getOperand(1))) { 13052 SDNode *BasePtr = Add->getOperand(0).getNode(); 13053 for (SDNode::use_iterator UI = BasePtr->use_begin(), 13054 UE = BasePtr->use_end(); UI != UE; ++UI) { 13055 if (UI->getOpcode() == ISD::ADD && 13056 isa<ConstantSDNode>(UI->getOperand(1)) && 13057 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() - 13058 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) % 13059 (1ULL << Bits) == 0) { 13060 SDNode *OtherAdd = *UI; 13061 for (SDNode::use_iterator VI = OtherAdd->use_begin(), 13062 VE = OtherAdd->use_end(); VI != VE; ++VI) { 13063 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 13064 cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) { 13065 return SDValue(*VI, 0); 13066 } 13067 } 13068 } 13069 } 13070 } 13071 } 13072 13073 // Combine vmaxsw/h/b(a, a's negation) to abs(a) 13074 // Expose the vabsduw/h/b opportunity for down stream 13075 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() && 13076 (IID == Intrinsic::ppc_altivec_vmaxsw || 13077 IID == Intrinsic::ppc_altivec_vmaxsh || 13078 IID == Intrinsic::ppc_altivec_vmaxsb)) { 13079 SDValue V1 = N->getOperand(1); 13080 SDValue V2 = N->getOperand(2); 13081 if ((V1.getSimpleValueType() == MVT::v4i32 || 13082 V1.getSimpleValueType() == MVT::v8i16 || 13083 V1.getSimpleValueType() == MVT::v16i8) && 13084 V1.getSimpleValueType() == V2.getSimpleValueType()) { 13085 // (0-a, a) 13086 if (V1.getOpcode() == ISD::SUB && 13087 ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) && 13088 V1.getOperand(1) == V2) { 13089 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2); 13090 } 13091 // (a, 0-a) 13092 if (V2.getOpcode() == ISD::SUB && 13093 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) && 13094 V2.getOperand(1) == V1) { 13095 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1); 13096 } 13097 // (x-y, y-x) 13098 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB && 13099 V1.getOperand(0) == V2.getOperand(1) && 13100 V1.getOperand(1) == V2.getOperand(0)) { 13101 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1); 13102 } 13103 } 13104 } 13105 } 13106 13107 break; 13108 case ISD::INTRINSIC_W_CHAIN: 13109 // For little endian, VSX loads require generating lxvd2x/xxswapd. 13110 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. 13111 if (Subtarget.needsSwapsForVSXMemOps()) { 13112 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 13113 default: 13114 break; 13115 case Intrinsic::ppc_vsx_lxvw4x: 13116 case Intrinsic::ppc_vsx_lxvd2x: 13117 return expandVSXLoadForLE(N, DCI); 13118 } 13119 } 13120 break; 13121 case ISD::INTRINSIC_VOID: 13122 // For little endian, VSX stores require generating xxswapd/stxvd2x. 13123 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. 13124 if (Subtarget.needsSwapsForVSXMemOps()) { 13125 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 13126 default: 13127 break; 13128 case Intrinsic::ppc_vsx_stxvw4x: 13129 case Intrinsic::ppc_vsx_stxvd2x: 13130 return expandVSXStoreForLE(N, DCI); 13131 } 13132 } 13133 break; 13134 case ISD::BSWAP: 13135 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 13136 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 13137 N->getOperand(0).hasOneUse() && 13138 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || 13139 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && 13140 N->getValueType(0) == MVT::i64))) { 13141 SDValue Load = N->getOperand(0); 13142 LoadSDNode *LD = cast<LoadSDNode>(Load); 13143 // Create the byte-swapping load. 13144 SDValue Ops[] = { 13145 LD->getChain(), // Chain 13146 LD->getBasePtr(), // Ptr 13147 DAG.getValueType(N->getValueType(0)) // VT 13148 }; 13149 SDValue BSLoad = 13150 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 13151 DAG.getVTList(N->getValueType(0) == MVT::i64 ? 13152 MVT::i64 : MVT::i32, MVT::Other), 13153 Ops, LD->getMemoryVT(), LD->getMemOperand()); 13154 13155 // If this is an i16 load, insert the truncate. 13156 SDValue ResVal = BSLoad; 13157 if (N->getValueType(0) == MVT::i16) 13158 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 13159 13160 // First, combine the bswap away. This makes the value produced by the 13161 // load dead. 13162 DCI.CombineTo(N, ResVal); 13163 13164 // Next, combine the load away, we give it a bogus result value but a real 13165 // chain result. The result value is dead because the bswap is dead. 13166 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 13167 13168 // Return N so it doesn't get rechecked! 13169 return SDValue(N, 0); 13170 } 13171 break; 13172 case PPCISD::VCMP: 13173 // If a VCMPo node already exists with exactly the same operands as this 13174 // node, use its result instead of this node (VCMPo computes both a CR6 and 13175 // a normal output). 13176 // 13177 if (!N->getOperand(0).hasOneUse() && 13178 !N->getOperand(1).hasOneUse() && 13179 !N->getOperand(2).hasOneUse()) { 13180 13181 // Scan all of the users of the LHS, looking for VCMPo's that match. 13182 SDNode *VCMPoNode = nullptr; 13183 13184 SDNode *LHSN = N->getOperand(0).getNode(); 13185 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 13186 UI != E; ++UI) 13187 if (UI->getOpcode() == PPCISD::VCMPo && 13188 UI->getOperand(1) == N->getOperand(1) && 13189 UI->getOperand(2) == N->getOperand(2) && 13190 UI->getOperand(0) == N->getOperand(0)) { 13191 VCMPoNode = *UI; 13192 break; 13193 } 13194 13195 // If there is no VCMPo node, or if the flag value has a single use, don't 13196 // transform this. 13197 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 13198 break; 13199 13200 // Look at the (necessarily single) use of the flag value. If it has a 13201 // chain, this transformation is more complex. Note that multiple things 13202 // could use the value result, which we should ignore. 13203 SDNode *FlagUser = nullptr; 13204 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 13205 FlagUser == nullptr; ++UI) { 13206 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 13207 SDNode *User = *UI; 13208 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 13209 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 13210 FlagUser = User; 13211 break; 13212 } 13213 } 13214 } 13215 13216 // If the user is a MFOCRF instruction, we know this is safe. 13217 // Otherwise we give up for right now. 13218 if (FlagUser->getOpcode() == PPCISD::MFOCRF) 13219 return SDValue(VCMPoNode, 0); 13220 } 13221 break; 13222 case ISD::BRCOND: { 13223 SDValue Cond = N->getOperand(1); 13224 SDValue Target = N->getOperand(2); 13225 13226 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && 13227 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() == 13228 Intrinsic::ppc_is_decremented_ctr_nonzero) { 13229 13230 // We now need to make the intrinsic dead (it cannot be instruction 13231 // selected). 13232 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); 13233 assert(Cond.getNode()->hasOneUse() && 13234 "Counter decrement has more than one use"); 13235 13236 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, 13237 N->getOperand(0), Target); 13238 } 13239 } 13240 break; 13241 case ISD::BR_CC: { 13242 // If this is a branch on an altivec predicate comparison, lower this so 13243 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This 13244 // lowering is done pre-legalize, because the legalizer lowers the predicate 13245 // compare down to code that is difficult to reassemble. 13246 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 13247 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 13248 13249 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero 13250 // value. If so, pass-through the AND to get to the intrinsic. 13251 if (LHS.getOpcode() == ISD::AND && 13252 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && 13253 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == 13254 Intrinsic::ppc_is_decremented_ctr_nonzero && 13255 isa<ConstantSDNode>(LHS.getOperand(1)) && 13256 !isNullConstant(LHS.getOperand(1))) 13257 LHS = LHS.getOperand(0); 13258 13259 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && 13260 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 13261 Intrinsic::ppc_is_decremented_ctr_nonzero && 13262 isa<ConstantSDNode>(RHS)) { 13263 assert((CC == ISD::SETEQ || CC == ISD::SETNE) && 13264 "Counter decrement comparison is not EQ or NE"); 13265 13266 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 13267 bool isBDNZ = (CC == ISD::SETEQ && Val) || 13268 (CC == ISD::SETNE && !Val); 13269 13270 // We now need to make the intrinsic dead (it cannot be instruction 13271 // selected). 13272 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); 13273 assert(LHS.getNode()->hasOneUse() && 13274 "Counter decrement has more than one use"); 13275 13276 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, 13277 N->getOperand(0), N->getOperand(4)); 13278 } 13279 13280 int CompareOpc; 13281 bool isDot; 13282 13283 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 13284 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 13285 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { 13286 assert(isDot && "Can't compare against a vector result!"); 13287 13288 // If this is a comparison against something other than 0/1, then we know 13289 // that the condition is never/always true. 13290 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 13291 if (Val != 0 && Val != 1) { 13292 if (CC == ISD::SETEQ) // Cond never true, remove branch. 13293 return N->getOperand(0); 13294 // Always !=, turn it into an unconditional branch. 13295 return DAG.getNode(ISD::BR, dl, MVT::Other, 13296 N->getOperand(0), N->getOperand(4)); 13297 } 13298 13299 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 13300 13301 // Create the PPCISD altivec 'dot' comparison node. 13302 SDValue Ops[] = { 13303 LHS.getOperand(2), // LHS of compare 13304 LHS.getOperand(3), // RHS of compare 13305 DAG.getConstant(CompareOpc, dl, MVT::i32) 13306 }; 13307 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; 13308 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 13309 13310 // Unpack the result based on how the target uses it. 13311 PPC::Predicate CompOpc; 13312 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 13313 default: // Can't happen, don't crash on invalid number though. 13314 case 0: // Branch on the value of the EQ bit of CR6. 13315 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 13316 break; 13317 case 1: // Branch on the inverted value of the EQ bit of CR6. 13318 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 13319 break; 13320 case 2: // Branch on the value of the LT bit of CR6. 13321 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 13322 break; 13323 case 3: // Branch on the inverted value of the LT bit of CR6. 13324 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 13325 break; 13326 } 13327 13328 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 13329 DAG.getConstant(CompOpc, dl, MVT::i32), 13330 DAG.getRegister(PPC::CR6, MVT::i32), 13331 N->getOperand(4), CompNode.getValue(1)); 13332 } 13333 break; 13334 } 13335 case ISD::BUILD_VECTOR: 13336 return DAGCombineBuildVector(N, DCI); 13337 case ISD::ABS: 13338 return combineABS(N, DCI); 13339 case ISD::VSELECT: 13340 return combineVSelect(N, DCI); 13341 } 13342 13343 return SDValue(); 13344 } 13345 13346 SDValue 13347 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, 13348 SelectionDAG &DAG, 13349 SmallVectorImpl<SDNode *> &Created) const { 13350 // fold (sdiv X, pow2) 13351 EVT VT = N->getValueType(0); 13352 if (VT == MVT::i64 && !Subtarget.isPPC64()) 13353 return SDValue(); 13354 if ((VT != MVT::i32 && VT != MVT::i64) || 13355 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) 13356 return SDValue(); 13357 13358 SDLoc DL(N); 13359 SDValue N0 = N->getOperand(0); 13360 13361 bool IsNegPow2 = (-Divisor).isPowerOf2(); 13362 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros(); 13363 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT); 13364 13365 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); 13366 Created.push_back(Op.getNode()); 13367 13368 if (IsNegPow2) { 13369 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); 13370 Created.push_back(Op.getNode()); 13371 } 13372 13373 return Op; 13374 } 13375 13376 //===----------------------------------------------------------------------===// 13377 // Inline Assembly Support 13378 //===----------------------------------------------------------------------===// 13379 13380 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 13381 KnownBits &Known, 13382 const APInt &DemandedElts, 13383 const SelectionDAG &DAG, 13384 unsigned Depth) const { 13385 Known.resetAll(); 13386 switch (Op.getOpcode()) { 13387 default: break; 13388 case PPCISD::LBRX: { 13389 // lhbrx is known to have the top bits cleared out. 13390 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 13391 Known.Zero = 0xFFFF0000; 13392 break; 13393 } 13394 case ISD::INTRINSIC_WO_CHAIN: { 13395 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 13396 default: break; 13397 case Intrinsic::ppc_altivec_vcmpbfp_p: 13398 case Intrinsic::ppc_altivec_vcmpeqfp_p: 13399 case Intrinsic::ppc_altivec_vcmpequb_p: 13400 case Intrinsic::ppc_altivec_vcmpequh_p: 13401 case Intrinsic::ppc_altivec_vcmpequw_p: 13402 case Intrinsic::ppc_altivec_vcmpequd_p: 13403 case Intrinsic::ppc_altivec_vcmpgefp_p: 13404 case Intrinsic::ppc_altivec_vcmpgtfp_p: 13405 case Intrinsic::ppc_altivec_vcmpgtsb_p: 13406 case Intrinsic::ppc_altivec_vcmpgtsh_p: 13407 case Intrinsic::ppc_altivec_vcmpgtsw_p: 13408 case Intrinsic::ppc_altivec_vcmpgtsd_p: 13409 case Intrinsic::ppc_altivec_vcmpgtub_p: 13410 case Intrinsic::ppc_altivec_vcmpgtuh_p: 13411 case Intrinsic::ppc_altivec_vcmpgtuw_p: 13412 case Intrinsic::ppc_altivec_vcmpgtud_p: 13413 Known.Zero = ~1U; // All bits but the low one are known to be zero. 13414 break; 13415 } 13416 } 13417 } 13418 } 13419 13420 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { 13421 switch (Subtarget.getDarwinDirective()) { 13422 default: break; 13423 case PPC::DIR_970: 13424 case PPC::DIR_PWR4: 13425 case PPC::DIR_PWR5: 13426 case PPC::DIR_PWR5X: 13427 case PPC::DIR_PWR6: 13428 case PPC::DIR_PWR6X: 13429 case PPC::DIR_PWR7: 13430 case PPC::DIR_PWR8: 13431 case PPC::DIR_PWR9: { 13432 if (!ML) 13433 break; 13434 13435 const PPCInstrInfo *TII = Subtarget.getInstrInfo(); 13436 13437 // For small loops (between 5 and 8 instructions), align to a 32-byte 13438 // boundary so that the entire loop fits in one instruction-cache line. 13439 uint64_t LoopSize = 0; 13440 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) 13441 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) { 13442 LoopSize += TII->getInstSizeInBytes(*J); 13443 if (LoopSize > 32) 13444 break; 13445 } 13446 13447 if (LoopSize > 16 && LoopSize <= 32) 13448 return 5; 13449 13450 break; 13451 } 13452 } 13453 13454 return TargetLowering::getPrefLoopAlignment(ML); 13455 } 13456 13457 /// getConstraintType - Given a constraint, return the type of 13458 /// constraint it is for this target. 13459 PPCTargetLowering::ConstraintType 13460 PPCTargetLowering::getConstraintType(StringRef Constraint) const { 13461 if (Constraint.size() == 1) { 13462 switch (Constraint[0]) { 13463 default: break; 13464 case 'b': 13465 case 'r': 13466 case 'f': 13467 case 'd': 13468 case 'v': 13469 case 'y': 13470 return C_RegisterClass; 13471 case 'Z': 13472 // FIXME: While Z does indicate a memory constraint, it specifically 13473 // indicates an r+r address (used in conjunction with the 'y' modifier 13474 // in the replacement string). Currently, we're forcing the base 13475 // register to be r0 in the asm printer (which is interpreted as zero) 13476 // and forming the complete address in the second register. This is 13477 // suboptimal. 13478 return C_Memory; 13479 } 13480 } else if (Constraint == "wc") { // individual CR bits. 13481 return C_RegisterClass; 13482 } else if (Constraint == "wa" || Constraint == "wd" || 13483 Constraint == "wf" || Constraint == "ws" || 13484 Constraint == "wi") { 13485 return C_RegisterClass; // VSX registers. 13486 } 13487 return TargetLowering::getConstraintType(Constraint); 13488 } 13489 13490 /// Examine constraint type and operand type and determine a weight value. 13491 /// This object must already have been set up with the operand type 13492 /// and the current alternative constraint selected. 13493 TargetLowering::ConstraintWeight 13494 PPCTargetLowering::getSingleConstraintMatchWeight( 13495 AsmOperandInfo &info, const char *constraint) const { 13496 ConstraintWeight weight = CW_Invalid; 13497 Value *CallOperandVal = info.CallOperandVal; 13498 // If we don't have a value, we can't do a match, 13499 // but allow it at the lowest weight. 13500 if (!CallOperandVal) 13501 return CW_Default; 13502 Type *type = CallOperandVal->getType(); 13503 13504 // Look at the constraint type. 13505 if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) 13506 return CW_Register; // an individual CR bit. 13507 else if ((StringRef(constraint) == "wa" || 13508 StringRef(constraint) == "wd" || 13509 StringRef(constraint) == "wf") && 13510 type->isVectorTy()) 13511 return CW_Register; 13512 else if (StringRef(constraint) == "ws" && type->isDoubleTy()) 13513 return CW_Register; 13514 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64)) 13515 return CW_Register; // just hold 64-bit integers data. 13516 13517 switch (*constraint) { 13518 default: 13519 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 13520 break; 13521 case 'b': 13522 if (type->isIntegerTy()) 13523 weight = CW_Register; 13524 break; 13525 case 'f': 13526 if (type->isFloatTy()) 13527 weight = CW_Register; 13528 break; 13529 case 'd': 13530 if (type->isDoubleTy()) 13531 weight = CW_Register; 13532 break; 13533 case 'v': 13534 if (type->isVectorTy()) 13535 weight = CW_Register; 13536 break; 13537 case 'y': 13538 weight = CW_Register; 13539 break; 13540 case 'Z': 13541 weight = CW_Memory; 13542 break; 13543 } 13544 return weight; 13545 } 13546 13547 std::pair<unsigned, const TargetRegisterClass *> 13548 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 13549 StringRef Constraint, 13550 MVT VT) const { 13551 if (Constraint.size() == 1) { 13552 // GCC RS6000 Constraint Letters 13553 switch (Constraint[0]) { 13554 case 'b': // R1-R31 13555 if (VT == MVT::i64 && Subtarget.isPPC64()) 13556 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); 13557 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); 13558 case 'r': // R0-R31 13559 if (VT == MVT::i64 && Subtarget.isPPC64()) 13560 return std::make_pair(0U, &PPC::G8RCRegClass); 13561 return std::make_pair(0U, &PPC::GPRCRegClass); 13562 // 'd' and 'f' constraints are both defined to be "the floating point 13563 // registers", where one is for 32-bit and the other for 64-bit. We don't 13564 // really care overly much here so just give them all the same reg classes. 13565 case 'd': 13566 case 'f': 13567 if (Subtarget.hasSPE()) { 13568 if (VT == MVT::f32 || VT == MVT::i32) 13569 return std::make_pair(0U, &PPC::SPE4RCRegClass); 13570 if (VT == MVT::f64 || VT == MVT::i64) 13571 return std::make_pair(0U, &PPC::SPERCRegClass); 13572 } else { 13573 if (VT == MVT::f32 || VT == MVT::i32) 13574 return std::make_pair(0U, &PPC::F4RCRegClass); 13575 if (VT == MVT::f64 || VT == MVT::i64) 13576 return std::make_pair(0U, &PPC::F8RCRegClass); 13577 if (VT == MVT::v4f64 && Subtarget.hasQPX()) 13578 return std::make_pair(0U, &PPC::QFRCRegClass); 13579 if (VT == MVT::v4f32 && Subtarget.hasQPX()) 13580 return std::make_pair(0U, &PPC::QSRCRegClass); 13581 } 13582 break; 13583 case 'v': 13584 if (VT == MVT::v4f64 && Subtarget.hasQPX()) 13585 return std::make_pair(0U, &PPC::QFRCRegClass); 13586 if (VT == MVT::v4f32 && Subtarget.hasQPX()) 13587 return std::make_pair(0U, &PPC::QSRCRegClass); 13588 if (Subtarget.hasAltivec()) 13589 return std::make_pair(0U, &PPC::VRRCRegClass); 13590 break; 13591 case 'y': // crrc 13592 return std::make_pair(0U, &PPC::CRRCRegClass); 13593 } 13594 } else if (Constraint == "wc" && Subtarget.useCRBits()) { 13595 // An individual CR bit. 13596 return std::make_pair(0U, &PPC::CRBITRCRegClass); 13597 } else if ((Constraint == "wa" || Constraint == "wd" || 13598 Constraint == "wf" || Constraint == "wi") && 13599 Subtarget.hasVSX()) { 13600 return std::make_pair(0U, &PPC::VSRCRegClass); 13601 } else if (Constraint == "ws" && Subtarget.hasVSX()) { 13602 if (VT == MVT::f32 && Subtarget.hasP8Vector()) 13603 return std::make_pair(0U, &PPC::VSSRCRegClass); 13604 else 13605 return std::make_pair(0U, &PPC::VSFRCRegClass); 13606 } 13607 13608 std::pair<unsigned, const TargetRegisterClass *> R = 13609 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 13610 13611 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers 13612 // (which we call X[0-9]+). If a 64-bit value has been requested, and a 13613 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent 13614 // register. 13615 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use 13616 // the AsmName field from *RegisterInfo.td, then this would not be necessary. 13617 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && 13618 PPC::GPRCRegClass.contains(R.first)) 13619 return std::make_pair(TRI->getMatchingSuperReg(R.first, 13620 PPC::sub_32, &PPC::G8RCRegClass), 13621 &PPC::G8RCRegClass); 13622 13623 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. 13624 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) { 13625 R.first = PPC::CR0; 13626 R.second = &PPC::CRRCRegClass; 13627 } 13628 13629 return R; 13630 } 13631 13632 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 13633 /// vector. If it is invalid, don't add anything to Ops. 13634 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 13635 std::string &Constraint, 13636 std::vector<SDValue>&Ops, 13637 SelectionDAG &DAG) const { 13638 SDValue Result; 13639 13640 // Only support length 1 constraints. 13641 if (Constraint.length() > 1) return; 13642 13643 char Letter = Constraint[0]; 13644 switch (Letter) { 13645 default: break; 13646 case 'I': 13647 case 'J': 13648 case 'K': 13649 case 'L': 13650 case 'M': 13651 case 'N': 13652 case 'O': 13653 case 'P': { 13654 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 13655 if (!CST) return; // Must be an immediate to match. 13656 SDLoc dl(Op); 13657 int64_t Value = CST->getSExtValue(); 13658 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative 13659 // numbers are printed as such. 13660 switch (Letter) { 13661 default: llvm_unreachable("Unknown constraint letter!"); 13662 case 'I': // "I" is a signed 16-bit constant. 13663 if (isInt<16>(Value)) 13664 Result = DAG.getTargetConstant(Value, dl, TCVT); 13665 break; 13666 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 13667 if (isShiftedUInt<16, 16>(Value)) 13668 Result = DAG.getTargetConstant(Value, dl, TCVT); 13669 break; 13670 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 13671 if (isShiftedInt<16, 16>(Value)) 13672 Result = DAG.getTargetConstant(Value, dl, TCVT); 13673 break; 13674 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 13675 if (isUInt<16>(Value)) 13676 Result = DAG.getTargetConstant(Value, dl, TCVT); 13677 break; 13678 case 'M': // "M" is a constant that is greater than 31. 13679 if (Value > 31) 13680 Result = DAG.getTargetConstant(Value, dl, TCVT); 13681 break; 13682 case 'N': // "N" is a positive constant that is an exact power of two. 13683 if (Value > 0 && isPowerOf2_64(Value)) 13684 Result = DAG.getTargetConstant(Value, dl, TCVT); 13685 break; 13686 case 'O': // "O" is the constant zero. 13687 if (Value == 0) 13688 Result = DAG.getTargetConstant(Value, dl, TCVT); 13689 break; 13690 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 13691 if (isInt<16>(-Value)) 13692 Result = DAG.getTargetConstant(Value, dl, TCVT); 13693 break; 13694 } 13695 break; 13696 } 13697 } 13698 13699 if (Result.getNode()) { 13700 Ops.push_back(Result); 13701 return; 13702 } 13703 13704 // Handle standard constraint letters. 13705 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 13706 } 13707 13708 // isLegalAddressingMode - Return true if the addressing mode represented 13709 // by AM is legal for this target, for a load/store of the specified type. 13710 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, 13711 const AddrMode &AM, Type *Ty, 13712 unsigned AS, Instruction *I) const { 13713 // PPC does not allow r+i addressing modes for vectors! 13714 if (Ty->isVectorTy() && AM.BaseOffs != 0) 13715 return false; 13716 13717 // PPC allows a sign-extended 16-bit immediate field. 13718 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 13719 return false; 13720 13721 // No global is ever allowed as a base. 13722 if (AM.BaseGV) 13723 return false; 13724 13725 // PPC only support r+r, 13726 switch (AM.Scale) { 13727 case 0: // "r+i" or just "i", depending on HasBaseReg. 13728 break; 13729 case 1: 13730 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 13731 return false; 13732 // Otherwise we have r+r or r+i. 13733 break; 13734 case 2: 13735 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 13736 return false; 13737 // Allow 2*r as r+r. 13738 break; 13739 default: 13740 // No other scales are supported. 13741 return false; 13742 } 13743 13744 return true; 13745 } 13746 13747 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 13748 SelectionDAG &DAG) const { 13749 MachineFunction &MF = DAG.getMachineFunction(); 13750 MachineFrameInfo &MFI = MF.getFrameInfo(); 13751 MFI.setReturnAddressIsTaken(true); 13752 13753 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 13754 return SDValue(); 13755 13756 SDLoc dl(Op); 13757 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 13758 13759 // Make sure the function does not optimize away the store of the RA to 13760 // the stack. 13761 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 13762 FuncInfo->setLRStoreRequired(); 13763 bool isPPC64 = Subtarget.isPPC64(); 13764 auto PtrVT = getPointerTy(MF.getDataLayout()); 13765 13766 if (Depth > 0) { 13767 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 13768 SDValue Offset = 13769 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl, 13770 isPPC64 ? MVT::i64 : MVT::i32); 13771 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 13772 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), 13773 MachinePointerInfo()); 13774 } 13775 13776 // Just load the return address off the stack. 13777 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 13778 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, 13779 MachinePointerInfo()); 13780 } 13781 13782 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 13783 SelectionDAG &DAG) const { 13784 SDLoc dl(Op); 13785 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 13786 13787 MachineFunction &MF = DAG.getMachineFunction(); 13788 MachineFrameInfo &MFI = MF.getFrameInfo(); 13789 MFI.setFrameAddressIsTaken(true); 13790 13791 EVT PtrVT = getPointerTy(MF.getDataLayout()); 13792 bool isPPC64 = PtrVT == MVT::i64; 13793 13794 // Naked functions never have a frame pointer, and so we use r1. For all 13795 // other functions, this decision must be delayed until during PEI. 13796 unsigned FrameReg; 13797 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 13798 FrameReg = isPPC64 ? PPC::X1 : PPC::R1; 13799 else 13800 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; 13801 13802 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 13803 PtrVT); 13804 while (Depth--) 13805 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 13806 FrameAddr, MachinePointerInfo()); 13807 return FrameAddr; 13808 } 13809 13810 // FIXME? Maybe this could be a TableGen attribute on some registers and 13811 // this table could be generated automatically from RegInfo. 13812 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, 13813 SelectionDAG &DAG) const { 13814 bool isPPC64 = Subtarget.isPPC64(); 13815 bool isDarwinABI = Subtarget.isDarwinABI(); 13816 13817 if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || 13818 (!isPPC64 && VT != MVT::i32)) 13819 report_fatal_error("Invalid register global variable type"); 13820 13821 bool is64Bit = isPPC64 && VT == MVT::i64; 13822 unsigned Reg = StringSwitch<unsigned>(RegName) 13823 .Case("r1", is64Bit ? PPC::X1 : PPC::R1) 13824 .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2) 13825 .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : 13826 (is64Bit ? PPC::X13 : PPC::R13)) 13827 .Default(0); 13828 13829 if (Reg) 13830 return Reg; 13831 report_fatal_error("Invalid register name global variable"); 13832 } 13833 13834 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const { 13835 // 32-bit SVR4 ABI access everything as got-indirect. 13836 if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) 13837 return true; 13838 13839 CodeModel::Model CModel = getTargetMachine().getCodeModel(); 13840 // If it is small or large code model, module locals are accessed 13841 // indirectly by loading their address from .toc/.got. The difference 13842 // is that for large code model we have ADDISTocHa + LDtocL and for 13843 // small code model we simply have LDtoc. 13844 if (CModel == CodeModel::Small || CModel == CodeModel::Large) 13845 return true; 13846 13847 // JumpTable and BlockAddress are accessed as got-indirect. 13848 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA)) 13849 return true; 13850 13851 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { 13852 const GlobalValue *GV = G->getGlobal(); 13853 unsigned char GVFlags = Subtarget.classifyGlobalReference(GV); 13854 // The NLP flag indicates that a global access has to use an 13855 // extra indirection. 13856 if (GVFlags & PPCII::MO_NLP_FLAG) 13857 return true; 13858 } 13859 13860 return false; 13861 } 13862 13863 bool 13864 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 13865 // The PowerPC target isn't yet aware of offsets. 13866 return false; 13867 } 13868 13869 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 13870 const CallInst &I, 13871 MachineFunction &MF, 13872 unsigned Intrinsic) const { 13873 switch (Intrinsic) { 13874 case Intrinsic::ppc_qpx_qvlfd: 13875 case Intrinsic::ppc_qpx_qvlfs: 13876 case Intrinsic::ppc_qpx_qvlfcd: 13877 case Intrinsic::ppc_qpx_qvlfcs: 13878 case Intrinsic::ppc_qpx_qvlfiwa: 13879 case Intrinsic::ppc_qpx_qvlfiwz: 13880 case Intrinsic::ppc_altivec_lvx: 13881 case Intrinsic::ppc_altivec_lvxl: 13882 case Intrinsic::ppc_altivec_lvebx: 13883 case Intrinsic::ppc_altivec_lvehx: 13884 case Intrinsic::ppc_altivec_lvewx: 13885 case Intrinsic::ppc_vsx_lxvd2x: 13886 case Intrinsic::ppc_vsx_lxvw4x: { 13887 EVT VT; 13888 switch (Intrinsic) { 13889 case Intrinsic::ppc_altivec_lvebx: 13890 VT = MVT::i8; 13891 break; 13892 case Intrinsic::ppc_altivec_lvehx: 13893 VT = MVT::i16; 13894 break; 13895 case Intrinsic::ppc_altivec_lvewx: 13896 VT = MVT::i32; 13897 break; 13898 case Intrinsic::ppc_vsx_lxvd2x: 13899 VT = MVT::v2f64; 13900 break; 13901 case Intrinsic::ppc_qpx_qvlfd: 13902 VT = MVT::v4f64; 13903 break; 13904 case Intrinsic::ppc_qpx_qvlfs: 13905 VT = MVT::v4f32; 13906 break; 13907 case Intrinsic::ppc_qpx_qvlfcd: 13908 VT = MVT::v2f64; 13909 break; 13910 case Intrinsic::ppc_qpx_qvlfcs: 13911 VT = MVT::v2f32; 13912 break; 13913 default: 13914 VT = MVT::v4i32; 13915 break; 13916 } 13917 13918 Info.opc = ISD::INTRINSIC_W_CHAIN; 13919 Info.memVT = VT; 13920 Info.ptrVal = I.getArgOperand(0); 13921 Info.offset = -VT.getStoreSize()+1; 13922 Info.size = 2*VT.getStoreSize()-1; 13923 Info.align = 1; 13924 Info.flags = MachineMemOperand::MOLoad; 13925 return true; 13926 } 13927 case Intrinsic::ppc_qpx_qvlfda: 13928 case Intrinsic::ppc_qpx_qvlfsa: 13929 case Intrinsic::ppc_qpx_qvlfcda: 13930 case Intrinsic::ppc_qpx_qvlfcsa: 13931 case Intrinsic::ppc_qpx_qvlfiwaa: 13932 case Intrinsic::ppc_qpx_qvlfiwza: { 13933 EVT VT; 13934 switch (Intrinsic) { 13935 case Intrinsic::ppc_qpx_qvlfda: 13936 VT = MVT::v4f64; 13937 break; 13938 case Intrinsic::ppc_qpx_qvlfsa: 13939 VT = MVT::v4f32; 13940 break; 13941 case Intrinsic::ppc_qpx_qvlfcda: 13942 VT = MVT::v2f64; 13943 break; 13944 case Intrinsic::ppc_qpx_qvlfcsa: 13945 VT = MVT::v2f32; 13946 break; 13947 default: 13948 VT = MVT::v4i32; 13949 break; 13950 } 13951 13952 Info.opc = ISD::INTRINSIC_W_CHAIN; 13953 Info.memVT = VT; 13954 Info.ptrVal = I.getArgOperand(0); 13955 Info.offset = 0; 13956 Info.size = VT.getStoreSize(); 13957 Info.align = 1; 13958 Info.flags = MachineMemOperand::MOLoad; 13959 return true; 13960 } 13961 case Intrinsic::ppc_qpx_qvstfd: 13962 case Intrinsic::ppc_qpx_qvstfs: 13963 case Intrinsic::ppc_qpx_qvstfcd: 13964 case Intrinsic::ppc_qpx_qvstfcs: 13965 case Intrinsic::ppc_qpx_qvstfiw: 13966 case Intrinsic::ppc_altivec_stvx: 13967 case Intrinsic::ppc_altivec_stvxl: 13968 case Intrinsic::ppc_altivec_stvebx: 13969 case Intrinsic::ppc_altivec_stvehx: 13970 case Intrinsic::ppc_altivec_stvewx: 13971 case Intrinsic::ppc_vsx_stxvd2x: 13972 case Intrinsic::ppc_vsx_stxvw4x: { 13973 EVT VT; 13974 switch (Intrinsic) { 13975 case Intrinsic::ppc_altivec_stvebx: 13976 VT = MVT::i8; 13977 break; 13978 case Intrinsic::ppc_altivec_stvehx: 13979 VT = MVT::i16; 13980 break; 13981 case Intrinsic::ppc_altivec_stvewx: 13982 VT = MVT::i32; 13983 break; 13984 case Intrinsic::ppc_vsx_stxvd2x: 13985 VT = MVT::v2f64; 13986 break; 13987 case Intrinsic::ppc_qpx_qvstfd: 13988 VT = MVT::v4f64; 13989 break; 13990 case Intrinsic::ppc_qpx_qvstfs: 13991 VT = MVT::v4f32; 13992 break; 13993 case Intrinsic::ppc_qpx_qvstfcd: 13994 VT = MVT::v2f64; 13995 break; 13996 case Intrinsic::ppc_qpx_qvstfcs: 13997 VT = MVT::v2f32; 13998 break; 13999 default: 14000 VT = MVT::v4i32; 14001 break; 14002 } 14003 14004 Info.opc = ISD::INTRINSIC_VOID; 14005 Info.memVT = VT; 14006 Info.ptrVal = I.getArgOperand(1); 14007 Info.offset = -VT.getStoreSize()+1; 14008 Info.size = 2*VT.getStoreSize()-1; 14009 Info.align = 1; 14010 Info.flags = MachineMemOperand::MOStore; 14011 return true; 14012 } 14013 case Intrinsic::ppc_qpx_qvstfda: 14014 case Intrinsic::ppc_qpx_qvstfsa: 14015 case Intrinsic::ppc_qpx_qvstfcda: 14016 case Intrinsic::ppc_qpx_qvstfcsa: 14017 case Intrinsic::ppc_qpx_qvstfiwa: { 14018 EVT VT; 14019 switch (Intrinsic) { 14020 case Intrinsic::ppc_qpx_qvstfda: 14021 VT = MVT::v4f64; 14022 break; 14023 case Intrinsic::ppc_qpx_qvstfsa: 14024 VT = MVT::v4f32; 14025 break; 14026 case Intrinsic::ppc_qpx_qvstfcda: 14027 VT = MVT::v2f64; 14028 break; 14029 case Intrinsic::ppc_qpx_qvstfcsa: 14030 VT = MVT::v2f32; 14031 break; 14032 default: 14033 VT = MVT::v4i32; 14034 break; 14035 } 14036 14037 Info.opc = ISD::INTRINSIC_VOID; 14038 Info.memVT = VT; 14039 Info.ptrVal = I.getArgOperand(1); 14040 Info.offset = 0; 14041 Info.size = VT.getStoreSize(); 14042 Info.align = 1; 14043 Info.flags = MachineMemOperand::MOStore; 14044 return true; 14045 } 14046 default: 14047 break; 14048 } 14049 14050 return false; 14051 } 14052 14053 /// getOptimalMemOpType - Returns the target specific optimal type for load 14054 /// and store operations as a result of memset, memcpy, and memmove 14055 /// lowering. If DstAlign is zero that means it's safe to destination 14056 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 14057 /// means there isn't a need to check it against alignment requirement, 14058 /// probably because the source does not need to be loaded. If 'IsMemset' is 14059 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 14060 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 14061 /// source is constant so it does not need to be loaded. 14062 /// It returns EVT::Other if the type should be determined using generic 14063 /// target-independent logic. 14064 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, 14065 unsigned DstAlign, unsigned SrcAlign, 14066 bool IsMemset, bool ZeroMemset, 14067 bool MemcpyStrSrc, 14068 MachineFunction &MF) const { 14069 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { 14070 const Function &F = MF.getFunction(); 14071 // When expanding a memset, require at least two QPX instructions to cover 14072 // the cost of loading the value to be stored from the constant pool. 14073 if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && 14074 (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && 14075 !F.hasFnAttribute(Attribute::NoImplicitFloat)) { 14076 return MVT::v4f64; 14077 } 14078 14079 // We should use Altivec/VSX loads and stores when available. For unaligned 14080 // addresses, unaligned VSX loads are only fast starting with the P8. 14081 if (Subtarget.hasAltivec() && Size >= 16 && 14082 (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || 14083 ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) 14084 return MVT::v4i32; 14085 } 14086 14087 if (Subtarget.isPPC64()) { 14088 return MVT::i64; 14089 } 14090 14091 return MVT::i32; 14092 } 14093 14094 /// Returns true if it is beneficial to convert a load of a constant 14095 /// to just the constant itself. 14096 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 14097 Type *Ty) const { 14098 assert(Ty->isIntegerTy()); 14099 14100 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 14101 return !(BitSize == 0 || BitSize > 64); 14102 } 14103 14104 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { 14105 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 14106 return false; 14107 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 14108 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 14109 return NumBits1 == 64 && NumBits2 == 32; 14110 } 14111 14112 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { 14113 if (!VT1.isInteger() || !VT2.isInteger()) 14114 return false; 14115 unsigned NumBits1 = VT1.getSizeInBits(); 14116 unsigned NumBits2 = VT2.getSizeInBits(); 14117 return NumBits1 == 64 && NumBits2 == 32; 14118 } 14119 14120 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 14121 // Generally speaking, zexts are not free, but they are free when they can be 14122 // folded with other operations. 14123 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) { 14124 EVT MemVT = LD->getMemoryVT(); 14125 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 || 14126 (Subtarget.isPPC64() && MemVT == MVT::i32)) && 14127 (LD->getExtensionType() == ISD::NON_EXTLOAD || 14128 LD->getExtensionType() == ISD::ZEXTLOAD)) 14129 return true; 14130 } 14131 14132 // FIXME: Add other cases... 14133 // - 32-bit shifts with a zext to i64 14134 // - zext after ctlz, bswap, etc. 14135 // - zext after and by a constant mask 14136 14137 return TargetLowering::isZExtFree(Val, VT2); 14138 } 14139 14140 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const { 14141 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && 14142 "invalid fpext types"); 14143 // Extending to float128 is not free. 14144 if (DestVT == MVT::f128) 14145 return false; 14146 return true; 14147 } 14148 14149 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 14150 return isInt<16>(Imm) || isUInt<16>(Imm); 14151 } 14152 14153 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { 14154 return isInt<16>(Imm) || isUInt<16>(Imm); 14155 } 14156 14157 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 14158 unsigned, 14159 unsigned, 14160 bool *Fast) const { 14161 if (DisablePPCUnaligned) 14162 return false; 14163 14164 // PowerPC supports unaligned memory access for simple non-vector types. 14165 // Although accessing unaligned addresses is not as efficient as accessing 14166 // aligned addresses, it is generally more efficient than manual expansion, 14167 // and generally only traps for software emulation when crossing page 14168 // boundaries. 14169 14170 if (!VT.isSimple()) 14171 return false; 14172 14173 if (VT.getSimpleVT().isVector()) { 14174 if (Subtarget.hasVSX()) { 14175 if (VT != MVT::v2f64 && VT != MVT::v2i64 && 14176 VT != MVT::v4f32 && VT != MVT::v4i32) 14177 return false; 14178 } else { 14179 return false; 14180 } 14181 } 14182 14183 if (VT == MVT::ppcf128) 14184 return false; 14185 14186 if (Fast) 14187 *Fast = true; 14188 14189 return true; 14190 } 14191 14192 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 14193 VT = VT.getScalarType(); 14194 14195 if (!VT.isSimple()) 14196 return false; 14197 14198 switch (VT.getSimpleVT().SimpleTy) { 14199 case MVT::f32: 14200 case MVT::f64: 14201 return true; 14202 case MVT::f128: 14203 return (EnableQuadPrecision && Subtarget.hasP9Vector()); 14204 default: 14205 break; 14206 } 14207 14208 return false; 14209 } 14210 14211 const MCPhysReg * 14212 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { 14213 // LR is a callee-save register, but we must treat it as clobbered by any call 14214 // site. Hence we include LR in the scratch registers, which are in turn added 14215 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies 14216 // to CTR, which is used by any indirect call. 14217 static const MCPhysReg ScratchRegs[] = { 14218 PPC::X12, PPC::LR8, PPC::CTR8, 0 14219 }; 14220 14221 return ScratchRegs; 14222 } 14223 14224 unsigned PPCTargetLowering::getExceptionPointerRegister( 14225 const Constant *PersonalityFn) const { 14226 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3; 14227 } 14228 14229 unsigned PPCTargetLowering::getExceptionSelectorRegister( 14230 const Constant *PersonalityFn) const { 14231 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4; 14232 } 14233 14234 bool 14235 PPCTargetLowering::shouldExpandBuildVectorWithShuffles( 14236 EVT VT , unsigned DefinedValues) const { 14237 if (VT == MVT::v2i64) 14238 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves 14239 14240 if (Subtarget.hasVSX() || Subtarget.hasQPX()) 14241 return true; 14242 14243 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); 14244 } 14245 14246 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 14247 if (DisableILPPref || Subtarget.enableMachineScheduler()) 14248 return TargetLowering::getSchedulingPreference(N); 14249 14250 return Sched::ILP; 14251 } 14252 14253 // Create a fast isel object. 14254 FastISel * 14255 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, 14256 const TargetLibraryInfo *LibInfo) const { 14257 return PPC::createFastISel(FuncInfo, LibInfo); 14258 } 14259 14260 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { 14261 if (Subtarget.isDarwinABI()) return; 14262 if (!Subtarget.isPPC64()) return; 14263 14264 // Update IsSplitCSR in PPCFunctionInfo 14265 PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>(); 14266 PFI->setIsSplitCSR(true); 14267 } 14268 14269 void PPCTargetLowering::insertCopiesSplitCSR( 14270 MachineBasicBlock *Entry, 14271 const SmallVectorImpl<MachineBasicBlock *> &Exits) const { 14272 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 14273 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); 14274 if (!IStart) 14275 return; 14276 14277 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 14278 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); 14279 MachineBasicBlock::iterator MBBI = Entry->begin(); 14280 for (const MCPhysReg *I = IStart; *I; ++I) { 14281 const TargetRegisterClass *RC = nullptr; 14282 if (PPC::G8RCRegClass.contains(*I)) 14283 RC = &PPC::G8RCRegClass; 14284 else if (PPC::F8RCRegClass.contains(*I)) 14285 RC = &PPC::F8RCRegClass; 14286 else if (PPC::CRRCRegClass.contains(*I)) 14287 RC = &PPC::CRRCRegClass; 14288 else if (PPC::VRRCRegClass.contains(*I)) 14289 RC = &PPC::VRRCRegClass; 14290 else 14291 llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 14292 14293 unsigned NewVR = MRI->createVirtualRegister(RC); 14294 // Create copy from CSR to a virtual register. 14295 // FIXME: this currently does not emit CFI pseudo-instructions, it works 14296 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be 14297 // nounwind. If we want to generalize this later, we may need to emit 14298 // CFI pseudo-instructions. 14299 assert(Entry->getParent()->getFunction().hasFnAttribute( 14300 Attribute::NoUnwind) && 14301 "Function should be nounwind in insertCopiesSplitCSR!"); 14302 Entry->addLiveIn(*I); 14303 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) 14304 .addReg(*I); 14305 14306 // Insert the copy-back instructions right before the terminator 14307 for (auto *Exit : Exits) 14308 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), 14309 TII->get(TargetOpcode::COPY), *I) 14310 .addReg(NewVR); 14311 } 14312 } 14313 14314 // Override to enable LOAD_STACK_GUARD lowering on Linux. 14315 bool PPCTargetLowering::useLoadStackGuardNode() const { 14316 if (!Subtarget.isTargetLinux()) 14317 return TargetLowering::useLoadStackGuardNode(); 14318 return true; 14319 } 14320 14321 // Override to disable global variable loading on Linux. 14322 void PPCTargetLowering::insertSSPDeclarations(Module &M) const { 14323 if (!Subtarget.isTargetLinux()) 14324 return TargetLowering::insertSSPDeclarations(M); 14325 } 14326 14327 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 14328 bool ForCodeSize) const { 14329 if (!VT.isSimple() || !Subtarget.hasVSX()) 14330 return false; 14331 14332 switch(VT.getSimpleVT().SimpleTy) { 14333 default: 14334 // For FP types that are currently not supported by PPC backend, return 14335 // false. Examples: f16, f80. 14336 return false; 14337 case MVT::f32: 14338 case MVT::f64: 14339 case MVT::ppcf128: 14340 return Imm.isPosZero(); 14341 } 14342 } 14343 14344 // For vector shift operation op, fold 14345 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y) 14346 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, 14347 SelectionDAG &DAG) { 14348 SDValue N0 = N->getOperand(0); 14349 SDValue N1 = N->getOperand(1); 14350 EVT VT = N0.getValueType(); 14351 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 14352 unsigned Opcode = N->getOpcode(); 14353 unsigned TargetOpcode; 14354 14355 switch (Opcode) { 14356 default: 14357 llvm_unreachable("Unexpected shift operation"); 14358 case ISD::SHL: 14359 TargetOpcode = PPCISD::SHL; 14360 break; 14361 case ISD::SRL: 14362 TargetOpcode = PPCISD::SRL; 14363 break; 14364 case ISD::SRA: 14365 TargetOpcode = PPCISD::SRA; 14366 break; 14367 } 14368 14369 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) && 14370 N1->getOpcode() == ISD::AND) 14371 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) 14372 if (Mask->getZExtValue() == OpSizeInBits - 1) 14373 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0)); 14374 14375 return SDValue(); 14376 } 14377 14378 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const { 14379 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) 14380 return Value; 14381 14382 SDValue N0 = N->getOperand(0); 14383 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 14384 if (!Subtarget.isISA3_0() || 14385 N0.getOpcode() != ISD::SIGN_EXTEND || 14386 N0.getOperand(0).getValueType() != MVT::i32 || 14387 CN1 == nullptr || N->getValueType(0) != MVT::i64) 14388 return SDValue(); 14389 14390 // We can't save an operation here if the value is already extended, and 14391 // the existing shift is easier to combine. 14392 SDValue ExtsSrc = N0.getOperand(0); 14393 if (ExtsSrc.getOpcode() == ISD::TRUNCATE && 14394 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext) 14395 return SDValue(); 14396 14397 SDLoc DL(N0); 14398 SDValue ShiftBy = SDValue(CN1, 0); 14399 // We want the shift amount to be i32 on the extswli, but the shift could 14400 // have an i64. 14401 if (ShiftBy.getValueType() == MVT::i64) 14402 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32); 14403 14404 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0), 14405 ShiftBy); 14406 } 14407 14408 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const { 14409 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) 14410 return Value; 14411 14412 return SDValue(); 14413 } 14414 14415 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const { 14416 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) 14417 return Value; 14418 14419 return SDValue(); 14420 } 14421 14422 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1)) 14423 // Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0)) 14424 // When C is zero, the equation (addi Z, -C) can be simplified to Z 14425 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types 14426 static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, 14427 const PPCSubtarget &Subtarget) { 14428 if (!Subtarget.isPPC64()) 14429 return SDValue(); 14430 14431 SDValue LHS = N->getOperand(0); 14432 SDValue RHS = N->getOperand(1); 14433 14434 auto isZextOfCompareWithConstant = [](SDValue Op) { 14435 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() || 14436 Op.getValueType() != MVT::i64) 14437 return false; 14438 14439 SDValue Cmp = Op.getOperand(0); 14440 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() || 14441 Cmp.getOperand(0).getValueType() != MVT::i64) 14442 return false; 14443 14444 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) { 14445 int64_t NegConstant = 0 - Constant->getSExtValue(); 14446 // Due to the limitations of the addi instruction, 14447 // -C is required to be [-32768, 32767]. 14448 return isInt<16>(NegConstant); 14449 } 14450 14451 return false; 14452 }; 14453 14454 bool LHSHasPattern = isZextOfCompareWithConstant(LHS); 14455 bool RHSHasPattern = isZextOfCompareWithConstant(RHS); 14456 14457 // If there is a pattern, canonicalize a zext operand to the RHS. 14458 if (LHSHasPattern && !RHSHasPattern) 14459 std::swap(LHS, RHS); 14460 else if (!LHSHasPattern && !RHSHasPattern) 14461 return SDValue(); 14462 14463 SDLoc DL(N); 14464 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue); 14465 SDValue Cmp = RHS.getOperand(0); 14466 SDValue Z = Cmp.getOperand(0); 14467 auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1)); 14468 14469 assert(Constant && "Constant Should not be a null pointer."); 14470 int64_t NegConstant = 0 - Constant->getSExtValue(); 14471 14472 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) { 14473 default: break; 14474 case ISD::SETNE: { 14475 // when C == 0 14476 // --> addze X, (addic Z, -1).carry 14477 // / 14478 // add X, (zext(setne Z, C))-- 14479 // \ when -32768 <= -C <= 32767 && C != 0 14480 // --> addze X, (addic (addi Z, -C), -1).carry 14481 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z, 14482 DAG.getConstant(NegConstant, DL, MVT::i64)); 14483 SDValue AddOrZ = NegConstant != 0 ? Add : Z; 14484 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue), 14485 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64)); 14486 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64), 14487 SDValue(Addc.getNode(), 1)); 14488 } 14489 case ISD::SETEQ: { 14490 // when C == 0 14491 // --> addze X, (subfic Z, 0).carry 14492 // / 14493 // add X, (zext(sete Z, C))-- 14494 // \ when -32768 <= -C <= 32767 && C != 0 14495 // --> addze X, (subfic (addi Z, -C), 0).carry 14496 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z, 14497 DAG.getConstant(NegConstant, DL, MVT::i64)); 14498 SDValue AddOrZ = NegConstant != 0 ? Add : Z; 14499 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue), 14500 DAG.getConstant(0, DL, MVT::i64), AddOrZ); 14501 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64), 14502 SDValue(Subc.getNode(), 1)); 14503 } 14504 } 14505 14506 return SDValue(); 14507 } 14508 14509 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const { 14510 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget)) 14511 return Value; 14512 14513 return SDValue(); 14514 } 14515 14516 // Detect TRUNCATE operations on bitcasts of float128 values. 14517 // What we are looking for here is the situtation where we extract a subset 14518 // of bits from a 128 bit float. 14519 // This can be of two forms: 14520 // 1) BITCAST of f128 feeding TRUNCATE 14521 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE 14522 // The reason this is required is because we do not have a legal i128 type 14523 // and so we want to prevent having to store the f128 and then reload part 14524 // of it. 14525 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N, 14526 DAGCombinerInfo &DCI) const { 14527 // If we are using CRBits then try that first. 14528 if (Subtarget.useCRBits()) { 14529 // Check if CRBits did anything and return that if it did. 14530 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI)) 14531 return CRTruncValue; 14532 } 14533 14534 SDLoc dl(N); 14535 SDValue Op0 = N->getOperand(0); 14536 14537 // Looking for a truncate of i128 to i64. 14538 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64) 14539 return SDValue(); 14540 14541 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0; 14542 14543 // SRL feeding TRUNCATE. 14544 if (Op0.getOpcode() == ISD::SRL) { 14545 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1)); 14546 // The right shift has to be by 64 bits. 14547 if (!ConstNode || ConstNode->getZExtValue() != 64) 14548 return SDValue(); 14549 14550 // Switch the element number to extract. 14551 EltToExtract = EltToExtract ? 0 : 1; 14552 // Update Op0 past the SRL. 14553 Op0 = Op0.getOperand(0); 14554 } 14555 14556 // BITCAST feeding a TRUNCATE possibly via SRL. 14557 if (Op0.getOpcode() == ISD::BITCAST && 14558 Op0.getValueType() == MVT::i128 && 14559 Op0.getOperand(0).getValueType() == MVT::f128) { 14560 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0)); 14561 return DCI.DAG.getNode( 14562 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast, 14563 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32)); 14564 } 14565 return SDValue(); 14566 } 14567 14568 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 14569 // Only duplicate to increase tail-calls for the 64bit SysV ABIs. 14570 if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64()) 14571 return false; 14572 14573 // If not a tail call then no need to proceed. 14574 if (!CI->isTailCall()) 14575 return false; 14576 14577 // If tail calls are disabled for the caller then we are done. 14578 const Function *Caller = CI->getParent()->getParent(); 14579 auto Attr = Caller->getFnAttribute("disable-tail-calls"); 14580 if (Attr.getValueAsString() == "true") 14581 return false; 14582 14583 // If sibling calls have been disabled and tail-calls aren't guaranteed 14584 // there is no reason to duplicate. 14585 auto &TM = getTargetMachine(); 14586 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO) 14587 return false; 14588 14589 // Can't tail call a function called indirectly, or if it has variadic args. 14590 const Function *Callee = CI->getCalledFunction(); 14591 if (!Callee || Callee->isVarArg()) 14592 return false; 14593 14594 // Make sure the callee and caller calling conventions are eligible for tco. 14595 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(), 14596 CI->getCallingConv())) 14597 return false; 14598 14599 // If the function is local then we have a good chance at tail-calling it 14600 return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee); 14601 } 14602 14603 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 14604 if (!Subtarget.hasVSX()) 14605 return false; 14606 if (Subtarget.hasP9Vector() && VT == MVT::f128) 14607 return true; 14608 return VT == MVT::f32 || VT == MVT::f64 || 14609 VT == MVT::v4f32 || VT == MVT::v2f64; 14610 } 14611 14612 bool PPCTargetLowering:: 14613 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { 14614 const Value *Mask = AndI.getOperand(1); 14615 // If the mask is suitable for andi. or andis. we should sink the and. 14616 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) { 14617 // Can't handle constants wider than 64-bits. 14618 if (CI->getBitWidth() > 64) 14619 return false; 14620 int64_t ConstVal = CI->getZExtValue(); 14621 return isUInt<16>(ConstVal) || 14622 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF)); 14623 } 14624 14625 // For non-constant masks, we can always use the record-form and. 14626 return true; 14627 } 14628 14629 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0) 14630 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0) 14631 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0) 14632 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0) 14633 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32 14634 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const { 14635 assert((N->getOpcode() == ISD::ABS) && "Need ABS node here"); 14636 assert(Subtarget.hasP9Altivec() && 14637 "Only combine this when P9 altivec supported!"); 14638 EVT VT = N->getValueType(0); 14639 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8) 14640 return SDValue(); 14641 14642 SelectionDAG &DAG = DCI.DAG; 14643 SDLoc dl(N); 14644 if (N->getOperand(0).getOpcode() == ISD::SUB) { 14645 // Even for signed integers, if it's known to be positive (as signed 14646 // integer) due to zero-extended inputs. 14647 unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode(); 14648 unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode(); 14649 if ((SubOpcd0 == ISD::ZERO_EXTEND || 14650 SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) && 14651 (SubOpcd1 == ISD::ZERO_EXTEND || 14652 SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) { 14653 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(), 14654 N->getOperand(0)->getOperand(0), 14655 N->getOperand(0)->getOperand(1), 14656 DAG.getTargetConstant(0, dl, MVT::i32)); 14657 } 14658 14659 // For type v4i32, it can be optimized with xvnegsp + vabsduw 14660 if (N->getOperand(0).getValueType() == MVT::v4i32 && 14661 N->getOperand(0).hasOneUse()) { 14662 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(), 14663 N->getOperand(0)->getOperand(0), 14664 N->getOperand(0)->getOperand(1), 14665 DAG.getTargetConstant(1, dl, MVT::i32)); 14666 } 14667 } 14668 14669 return SDValue(); 14670 } 14671 14672 // For type v4i32/v8ii16/v16i8, transform 14673 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b) 14674 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b) 14675 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b) 14676 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b) 14677 SDValue PPCTargetLowering::combineVSelect(SDNode *N, 14678 DAGCombinerInfo &DCI) const { 14679 assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here"); 14680 assert(Subtarget.hasP9Altivec() && 14681 "Only combine this when P9 altivec supported!"); 14682 14683 SelectionDAG &DAG = DCI.DAG; 14684 SDLoc dl(N); 14685 SDValue Cond = N->getOperand(0); 14686 SDValue TrueOpnd = N->getOperand(1); 14687 SDValue FalseOpnd = N->getOperand(2); 14688 EVT VT = N->getOperand(1).getValueType(); 14689 14690 if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB || 14691 FalseOpnd.getOpcode() != ISD::SUB) 14692 return SDValue(); 14693 14694 // ABSD only available for type v4i32/v8i16/v16i8 14695 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8) 14696 return SDValue(); 14697 14698 // At least to save one more dependent computation 14699 if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse())) 14700 return SDValue(); 14701 14702 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 14703 14704 // Can only handle unsigned comparison here 14705 switch (CC) { 14706 default: 14707 return SDValue(); 14708 case ISD::SETUGT: 14709 case ISD::SETUGE: 14710 break; 14711 case ISD::SETULT: 14712 case ISD::SETULE: 14713 std::swap(TrueOpnd, FalseOpnd); 14714 break; 14715 } 14716 14717 SDValue CmpOpnd1 = Cond.getOperand(0); 14718 SDValue CmpOpnd2 = Cond.getOperand(1); 14719 14720 // SETCC CmpOpnd1 CmpOpnd2 cond 14721 // TrueOpnd = CmpOpnd1 - CmpOpnd2 14722 // FalseOpnd = CmpOpnd2 - CmpOpnd1 14723 if (TrueOpnd.getOperand(0) == CmpOpnd1 && 14724 TrueOpnd.getOperand(1) == CmpOpnd2 && 14725 FalseOpnd.getOperand(0) == CmpOpnd2 && 14726 FalseOpnd.getOperand(1) == CmpOpnd1) { 14727 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(), 14728 CmpOpnd1, CmpOpnd2, 14729 DAG.getTargetConstant(0, dl, MVT::i32)); 14730 } 14731 14732 return SDValue(); 14733 } 14734