1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the PPCISelLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PPCISelLowering.h" 15 #include "MCTargetDesc/PPCPredicates.h" 16 #include "PPCCallingConv.h" 17 #include "PPCCCState.h" 18 #include "PPCMachineFunctionInfo.h" 19 #include "PPCPerfectShuffle.h" 20 #include "PPCTargetMachine.h" 21 #include "PPCTargetObjectFile.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/ADT/StringSwitch.h" 25 #include "llvm/ADT/Triple.h" 26 #include "llvm/CodeGen/CallingConvLower.h" 27 #include "llvm/CodeGen/MachineFrameInfo.h" 28 #include "llvm/CodeGen/MachineFunction.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineLoopInfo.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/CodeGen/SelectionDAG.h" 33 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 34 #include "llvm/IR/CallingConv.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/Function.h" 38 #include "llvm/IR/Intrinsics.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Support/Format.h" 42 #include "llvm/Support/MathExtras.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetOptions.h" 45 #include <list> 46 47 using namespace llvm; 48 49 #define DEBUG_TYPE "ppc-lowering" 50 51 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", 52 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); 53 54 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", 55 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); 56 57 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", 58 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); 59 60 static cl::opt<bool> DisableSCO("disable-ppc-sco", 61 cl::desc("disable sibling call optimization on ppc"), cl::Hidden); 62 63 STATISTIC(NumTailCalls, "Number of tail calls"); 64 STATISTIC(NumSiblingCalls, "Number of sibling calls"); 65 66 // FIXME: Remove this once the bug has been fixed! 67 extern cl::opt<bool> ANDIGlueBug; 68 69 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, 70 const PPCSubtarget &STI) 71 : TargetLowering(TM), Subtarget(STI) { 72 // Use _setjmp/_longjmp instead of setjmp/longjmp. 73 setUseUnderscoreSetJmp(true); 74 setUseUnderscoreLongJmp(true); 75 76 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 77 // arguments are at least 4/8 bytes aligned. 78 bool isPPC64 = Subtarget.isPPC64(); 79 setMinStackArgumentAlignment(isPPC64 ? 8:4); 80 81 // Set up the register classes. 82 addRegisterClass(MVT::i32, &PPC::GPRCRegClass); 83 if (!useSoftFloat()) { 84 addRegisterClass(MVT::f32, &PPC::F4RCRegClass); 85 addRegisterClass(MVT::f64, &PPC::F8RCRegClass); 86 } 87 88 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 89 for (MVT VT : MVT::integer_valuetypes()) { 90 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 91 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); 92 } 93 94 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 95 96 // PowerPC has pre-inc load and store's. 97 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 98 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 99 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 100 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 101 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 102 setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); 103 setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal); 104 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 105 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 106 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 107 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 108 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 109 setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal); 110 setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); 111 112 if (Subtarget.useCRBits()) { 113 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 114 115 if (isPPC64 || Subtarget.hasFPCVT()) { 116 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); 117 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, 118 isPPC64 ? MVT::i64 : MVT::i32); 119 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); 120 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, 121 isPPC64 ? MVT::i64 : MVT::i32); 122 } else { 123 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); 124 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); 125 } 126 127 // PowerPC does not support direct load / store of condition registers 128 setOperationAction(ISD::LOAD, MVT::i1, Custom); 129 setOperationAction(ISD::STORE, MVT::i1, Custom); 130 131 // FIXME: Remove this once the ANDI glue bug is fixed: 132 if (ANDIGlueBug) 133 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); 134 135 for (MVT VT : MVT::integer_valuetypes()) { 136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 137 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 138 setTruncStoreAction(VT, MVT::i1, Expand); 139 } 140 141 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); 142 } 143 144 // This is used in the ppcf128->int sequence. Note it has different semantics 145 // from FP_ROUND: that rounds to nearest, this rounds to zero. 146 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 147 148 // We do not currently implement these libm ops for PowerPC. 149 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 150 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 151 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 152 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 153 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 154 setOperationAction(ISD::FREM, MVT::ppcf128, Expand); 155 156 // PowerPC has no SREM/UREM instructions 157 setOperationAction(ISD::SREM, MVT::i32, Expand); 158 setOperationAction(ISD::UREM, MVT::i32, Expand); 159 setOperationAction(ISD::SREM, MVT::i64, Expand); 160 setOperationAction(ISD::UREM, MVT::i64, Expand); 161 162 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 163 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 164 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 165 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 166 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 167 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 168 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 169 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 170 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 171 172 // We don't support sin/cos/sqrt/fmod/pow 173 setOperationAction(ISD::FSIN , MVT::f64, Expand); 174 setOperationAction(ISD::FCOS , MVT::f64, Expand); 175 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 176 setOperationAction(ISD::FREM , MVT::f64, Expand); 177 setOperationAction(ISD::FPOW , MVT::f64, Expand); 178 setOperationAction(ISD::FMA , MVT::f64, Legal); 179 setOperationAction(ISD::FSIN , MVT::f32, Expand); 180 setOperationAction(ISD::FCOS , MVT::f32, Expand); 181 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 182 setOperationAction(ISD::FREM , MVT::f32, Expand); 183 setOperationAction(ISD::FPOW , MVT::f32, Expand); 184 setOperationAction(ISD::FMA , MVT::f32, Legal); 185 186 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 187 188 // If we're enabling GP optimizations, use hardware square root 189 if (!Subtarget.hasFSQRT() && 190 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && 191 Subtarget.hasFRE())) 192 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 193 194 if (!Subtarget.hasFSQRT() && 195 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() && 196 Subtarget.hasFRES())) 197 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 198 199 if (Subtarget.hasFCPSGN()) { 200 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); 201 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); 202 } else { 203 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 204 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 205 } 206 207 if (Subtarget.hasFPRND()) { 208 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 209 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 210 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 211 setOperationAction(ISD::FROUND, MVT::f64, Legal); 212 213 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 214 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 215 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 216 setOperationAction(ISD::FROUND, MVT::f32, Legal); 217 } 218 219 // PowerPC does not have BSWAP 220 // CTPOP or CTTZ were introduced in P8/P9 respectivelly 221 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 222 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 223 if (Subtarget.isISA3_0()) { 224 setOperationAction(ISD::CTTZ , MVT::i32 , Legal); 225 setOperationAction(ISD::CTTZ , MVT::i64 , Legal); 226 } else { 227 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 228 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 229 } 230 231 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) { 232 setOperationAction(ISD::CTPOP, MVT::i32 , Legal); 233 setOperationAction(ISD::CTPOP, MVT::i64 , Legal); 234 } else { 235 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 236 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 237 } 238 239 // PowerPC does not have ROTR 240 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 241 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 242 243 if (!Subtarget.useCRBits()) { 244 // PowerPC does not have Select 245 setOperationAction(ISD::SELECT, MVT::i32, Expand); 246 setOperationAction(ISD::SELECT, MVT::i64, Expand); 247 setOperationAction(ISD::SELECT, MVT::f32, Expand); 248 setOperationAction(ISD::SELECT, MVT::f64, Expand); 249 } 250 251 // PowerPC wants to turn select_cc of FP into fsel when possible. 252 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 253 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 254 255 // PowerPC wants to optimize integer setcc a bit 256 if (!Subtarget.useCRBits()) 257 setOperationAction(ISD::SETCC, MVT::i32, Custom); 258 259 // PowerPC does not have BRCOND which requires SetCC 260 if (!Subtarget.useCRBits()) 261 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 262 263 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 264 265 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 266 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 267 268 // PowerPC does not have [U|S]INT_TO_FP 269 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 270 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 271 272 if (Subtarget.hasDirectMove() && isPPC64) { 273 setOperationAction(ISD::BITCAST, MVT::f32, Legal); 274 setOperationAction(ISD::BITCAST, MVT::i32, Legal); 275 setOperationAction(ISD::BITCAST, MVT::i64, Legal); 276 setOperationAction(ISD::BITCAST, MVT::f64, Legal); 277 } else { 278 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 279 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 280 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 281 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 282 } 283 284 // We cannot sextinreg(i1). Expand to shifts. 285 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 286 287 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 288 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 289 // support continuation, user-level threading, and etc.. As a result, no 290 // other SjLj exception interfaces are implemented and please don't build 291 // your own exception handling based on them. 292 // LLVM/Clang supports zero-cost DWARF exception handling. 293 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 294 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 295 296 // We want to legalize GlobalAddress and ConstantPool nodes into the 297 // appropriate instructions to materialize the address. 298 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 299 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 300 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 301 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 302 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 303 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 304 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 305 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 306 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 307 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 308 309 // TRAP is legal. 310 setOperationAction(ISD::TRAP, MVT::Other, Legal); 311 312 // TRAMPOLINE is custom lowered. 313 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 314 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 315 316 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 317 setOperationAction(ISD::VASTART , MVT::Other, Custom); 318 319 if (Subtarget.isSVR4ABI()) { 320 if (isPPC64) { 321 // VAARG always uses double-word chunks, so promote anything smaller. 322 setOperationAction(ISD::VAARG, MVT::i1, Promote); 323 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 324 setOperationAction(ISD::VAARG, MVT::i8, Promote); 325 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 326 setOperationAction(ISD::VAARG, MVT::i16, Promote); 327 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 328 setOperationAction(ISD::VAARG, MVT::i32, Promote); 329 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 330 setOperationAction(ISD::VAARG, MVT::Other, Expand); 331 } else { 332 // VAARG is custom lowered with the 32-bit SVR4 ABI. 333 setOperationAction(ISD::VAARG, MVT::Other, Custom); 334 setOperationAction(ISD::VAARG, MVT::i64, Custom); 335 } 336 } else 337 setOperationAction(ISD::VAARG, MVT::Other, Expand); 338 339 if (Subtarget.isSVR4ABI() && !isPPC64) 340 // VACOPY is custom lowered with the 32-bit SVR4 ABI. 341 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 342 else 343 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 344 345 // Use the default implementation. 346 setOperationAction(ISD::VAEND , MVT::Other, Expand); 347 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 348 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 349 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 350 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 351 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom); 352 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom); 353 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); 354 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); 355 356 // We want to custom lower some of our intrinsics. 357 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 358 359 // To handle counter-based loop conditions. 360 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); 361 362 // Comparisons that require checking two conditions. 363 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 364 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 365 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 366 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 367 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 368 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 369 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 370 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 371 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 372 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 373 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 374 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 375 376 if (Subtarget.has64BitSupport()) { 377 // They also have instructions for converting between i64 and fp. 378 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 379 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 380 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 381 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 382 // This is just the low 32 bits of a (signed) fp->i64 conversion. 383 // We cannot do this with Promote because i64 is not a legal type. 384 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 385 386 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) 387 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 388 } else { 389 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 390 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 391 } 392 393 // With the instructions enabled under FPCVT, we can do everything. 394 if (Subtarget.hasFPCVT()) { 395 if (Subtarget.has64BitSupport()) { 396 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 397 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 398 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 399 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 400 } 401 402 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 403 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 404 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 405 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 406 } 407 408 if (Subtarget.use64BitRegs()) { 409 // 64-bit PowerPC implementations can support i64 types directly 410 addRegisterClass(MVT::i64, &PPC::G8RCRegClass); 411 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 412 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 413 // 64-bit PowerPC wants to expand i128 shifts itself. 414 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 415 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 416 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 417 } else { 418 // 32-bit PowerPC wants to expand i64 shifts itself. 419 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 420 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 421 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 422 } 423 424 if (Subtarget.hasAltivec()) { 425 // First set operation action for all vector types to expand. Then we 426 // will selectively turn on ones that can be effectively codegen'd. 427 for (MVT VT : MVT::vector_valuetypes()) { 428 // add/sub are legal for all supported vector VT's. 429 setOperationAction(ISD::ADD, VT, Legal); 430 setOperationAction(ISD::SUB, VT, Legal); 431 432 // Vector instructions introduced in P8 433 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { 434 setOperationAction(ISD::CTPOP, VT, Legal); 435 setOperationAction(ISD::CTLZ, VT, Legal); 436 } 437 else { 438 setOperationAction(ISD::CTPOP, VT, Expand); 439 setOperationAction(ISD::CTLZ, VT, Expand); 440 } 441 442 // Vector instructions introduced in P9 443 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128)) 444 setOperationAction(ISD::CTTZ, VT, Legal); 445 else 446 setOperationAction(ISD::CTTZ, VT, Expand); 447 448 // We promote all shuffles to v16i8. 449 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 450 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 451 452 // We promote all non-typed operations to v4i32. 453 setOperationAction(ISD::AND , VT, Promote); 454 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 455 setOperationAction(ISD::OR , VT, Promote); 456 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 457 setOperationAction(ISD::XOR , VT, Promote); 458 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 459 setOperationAction(ISD::LOAD , VT, Promote); 460 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 461 setOperationAction(ISD::SELECT, VT, Promote); 462 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 463 setOperationAction(ISD::SELECT_CC, VT, Promote); 464 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); 465 setOperationAction(ISD::STORE, VT, Promote); 466 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 467 468 // No other operations are legal. 469 setOperationAction(ISD::MUL , VT, Expand); 470 setOperationAction(ISD::SDIV, VT, Expand); 471 setOperationAction(ISD::SREM, VT, Expand); 472 setOperationAction(ISD::UDIV, VT, Expand); 473 setOperationAction(ISD::UREM, VT, Expand); 474 setOperationAction(ISD::FDIV, VT, Expand); 475 setOperationAction(ISD::FREM, VT, Expand); 476 setOperationAction(ISD::FNEG, VT, Expand); 477 setOperationAction(ISD::FSQRT, VT, Expand); 478 setOperationAction(ISD::FLOG, VT, Expand); 479 setOperationAction(ISD::FLOG10, VT, Expand); 480 setOperationAction(ISD::FLOG2, VT, Expand); 481 setOperationAction(ISD::FEXP, VT, Expand); 482 setOperationAction(ISD::FEXP2, VT, Expand); 483 setOperationAction(ISD::FSIN, VT, Expand); 484 setOperationAction(ISD::FCOS, VT, Expand); 485 setOperationAction(ISD::FABS, VT, Expand); 486 setOperationAction(ISD::FPOWI, VT, Expand); 487 setOperationAction(ISD::FFLOOR, VT, Expand); 488 setOperationAction(ISD::FCEIL, VT, Expand); 489 setOperationAction(ISD::FTRUNC, VT, Expand); 490 setOperationAction(ISD::FRINT, VT, Expand); 491 setOperationAction(ISD::FNEARBYINT, VT, Expand); 492 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 493 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 494 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 495 setOperationAction(ISD::MULHU, VT, Expand); 496 setOperationAction(ISD::MULHS, VT, Expand); 497 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 498 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 499 setOperationAction(ISD::UDIVREM, VT, Expand); 500 setOperationAction(ISD::SDIVREM, VT, Expand); 501 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 502 setOperationAction(ISD::FPOW, VT, Expand); 503 setOperationAction(ISD::BSWAP, VT, Expand); 504 setOperationAction(ISD::VSELECT, VT, Expand); 505 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 506 setOperationAction(ISD::ROTL, VT, Expand); 507 setOperationAction(ISD::ROTR, VT, Expand); 508 509 for (MVT InnerVT : MVT::vector_valuetypes()) { 510 setTruncStoreAction(VT, InnerVT, Expand); 511 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 512 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 513 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 514 } 515 } 516 517 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 518 // with merges, splats, etc. 519 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 520 521 setOperationAction(ISD::AND , MVT::v4i32, Legal); 522 setOperationAction(ISD::OR , MVT::v4i32, Legal); 523 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 524 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 525 setOperationAction(ISD::SELECT, MVT::v4i32, 526 Subtarget.useCRBits() ? Legal : Expand); 527 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 528 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 529 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 530 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 531 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 532 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 533 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 534 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 535 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 536 537 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); 538 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); 539 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); 540 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); 541 542 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 543 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 544 545 if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { 546 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 547 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 548 } 549 550 if (Subtarget.hasP8Altivec()) 551 setOperationAction(ISD::MUL, MVT::v4i32, Legal); 552 else 553 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 554 555 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 556 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 557 558 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 559 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 560 561 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 562 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 563 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 564 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 565 if (Subtarget.hasP8Altivec()) 566 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 567 if (Subtarget.hasVSX()) 568 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 569 570 // Altivec does not contain unordered floating-point compare instructions 571 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); 572 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); 573 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); 574 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); 575 576 if (Subtarget.hasVSX()) { 577 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 578 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 579 if (Subtarget.hasP8Vector()) { 580 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 581 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); 582 } 583 if (Subtarget.hasDirectMove() && isPPC64) { 584 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); 585 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); 586 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); 587 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); 588 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal); 589 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal); 590 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); 591 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); 592 } 593 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 594 595 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 596 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 597 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 598 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 599 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 600 601 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 602 603 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 604 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 605 606 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 607 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 608 609 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); 610 setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); 611 setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); 612 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 613 setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); 614 615 // Share the Altivec comparison restrictions. 616 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); 617 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); 618 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); 619 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); 620 621 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 622 setOperationAction(ISD::STORE, MVT::v2f64, Legal); 623 624 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); 625 626 if (Subtarget.hasP8Vector()) 627 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass); 628 629 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); 630 631 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass); 632 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); 633 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); 634 635 if (Subtarget.hasP8Altivec()) { 636 setOperationAction(ISD::SHL, MVT::v2i64, Legal); 637 setOperationAction(ISD::SRA, MVT::v2i64, Legal); 638 setOperationAction(ISD::SRL, MVT::v2i64, Legal); 639 640 setOperationAction(ISD::SETCC, MVT::v2i64, Legal); 641 } 642 else { 643 setOperationAction(ISD::SHL, MVT::v2i64, Expand); 644 setOperationAction(ISD::SRA, MVT::v2i64, Expand); 645 setOperationAction(ISD::SRL, MVT::v2i64, Expand); 646 647 setOperationAction(ISD::SETCC, MVT::v2i64, Custom); 648 649 // VSX v2i64 only supports non-arithmetic operations. 650 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 651 setOperationAction(ISD::SUB, MVT::v2i64, Expand); 652 } 653 654 setOperationAction(ISD::LOAD, MVT::v2i64, Promote); 655 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); 656 setOperationAction(ISD::STORE, MVT::v2i64, Promote); 657 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); 658 659 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); 660 661 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 662 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 663 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 664 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 665 666 // Vector operation legalization checks the result type of 667 // SIGN_EXTEND_INREG, overall legalization checks the inner type. 668 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); 669 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); 670 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); 671 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); 672 673 setOperationAction(ISD::FNEG, MVT::v4f32, Legal); 674 setOperationAction(ISD::FNEG, MVT::v2f64, Legal); 675 setOperationAction(ISD::FABS, MVT::v4f32, Legal); 676 setOperationAction(ISD::FABS, MVT::v2f64, Legal); 677 678 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); 679 } 680 681 if (Subtarget.hasP8Altivec()) { 682 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); 683 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); 684 } 685 686 if (Subtarget.hasP9Vector()) { 687 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 688 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 689 } 690 691 if (Subtarget.isISA3_0() && Subtarget.hasDirectMove()) 692 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 693 } 694 695 if (Subtarget.hasQPX()) { 696 setOperationAction(ISD::FADD, MVT::v4f64, Legal); 697 setOperationAction(ISD::FSUB, MVT::v4f64, Legal); 698 setOperationAction(ISD::FMUL, MVT::v4f64, Legal); 699 setOperationAction(ISD::FREM, MVT::v4f64, Expand); 700 701 setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); 702 setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand); 703 704 setOperationAction(ISD::LOAD , MVT::v4f64, Custom); 705 setOperationAction(ISD::STORE , MVT::v4f64, Custom); 706 707 setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom); 708 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom); 709 710 if (!Subtarget.useCRBits()) 711 setOperationAction(ISD::SELECT, MVT::v4f64, Expand); 712 setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); 713 714 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal); 715 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand); 716 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand); 717 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand); 718 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom); 719 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal); 720 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); 721 722 setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal); 723 setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); 724 725 setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); 726 setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); 727 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); 728 729 setOperationAction(ISD::FNEG , MVT::v4f64, Legal); 730 setOperationAction(ISD::FABS , MVT::v4f64, Legal); 731 setOperationAction(ISD::FSIN , MVT::v4f64, Expand); 732 setOperationAction(ISD::FCOS , MVT::v4f64, Expand); 733 setOperationAction(ISD::FPOWI , MVT::v4f64, Expand); 734 setOperationAction(ISD::FPOW , MVT::v4f64, Expand); 735 setOperationAction(ISD::FLOG , MVT::v4f64, Expand); 736 setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); 737 setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); 738 setOperationAction(ISD::FEXP , MVT::v4f64, Expand); 739 setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand); 740 741 setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal); 742 setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal); 743 744 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal); 745 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal); 746 747 addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass); 748 749 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 750 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 751 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 752 setOperationAction(ISD::FREM, MVT::v4f32, Expand); 753 754 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); 755 setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand); 756 757 setOperationAction(ISD::LOAD , MVT::v4f32, Custom); 758 setOperationAction(ISD::STORE , MVT::v4f32, Custom); 759 760 if (!Subtarget.useCRBits()) 761 setOperationAction(ISD::SELECT, MVT::v4f32, Expand); 762 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 763 764 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal); 765 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand); 766 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand); 767 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand); 768 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom); 769 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 770 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 771 772 setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal); 773 setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand); 774 775 setOperationAction(ISD::FNEG , MVT::v4f32, Legal); 776 setOperationAction(ISD::FABS , MVT::v4f32, Legal); 777 setOperationAction(ISD::FSIN , MVT::v4f32, Expand); 778 setOperationAction(ISD::FCOS , MVT::v4f32, Expand); 779 setOperationAction(ISD::FPOWI , MVT::v4f32, Expand); 780 setOperationAction(ISD::FPOW , MVT::v4f32, Expand); 781 setOperationAction(ISD::FLOG , MVT::v4f32, Expand); 782 setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); 783 setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); 784 setOperationAction(ISD::FEXP , MVT::v4f32, Expand); 785 setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand); 786 787 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); 788 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); 789 790 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal); 791 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal); 792 793 addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass); 794 795 setOperationAction(ISD::AND , MVT::v4i1, Legal); 796 setOperationAction(ISD::OR , MVT::v4i1, Legal); 797 setOperationAction(ISD::XOR , MVT::v4i1, Legal); 798 799 if (!Subtarget.useCRBits()) 800 setOperationAction(ISD::SELECT, MVT::v4i1, Expand); 801 setOperationAction(ISD::VSELECT, MVT::v4i1, Legal); 802 803 setOperationAction(ISD::LOAD , MVT::v4i1, Custom); 804 setOperationAction(ISD::STORE , MVT::v4i1, Custom); 805 806 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom); 807 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand); 808 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand); 809 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand); 810 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom); 811 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand); 812 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); 813 814 setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom); 815 setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom); 816 817 addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass); 818 819 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); 820 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); 821 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); 822 setOperationAction(ISD::FROUND, MVT::v4f64, Legal); 823 824 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 825 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 826 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 827 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 828 829 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand); 830 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); 831 832 // These need to set FE_INEXACT, and so cannot be vectorized here. 833 setOperationAction(ISD::FRINT, MVT::v4f64, Expand); 834 setOperationAction(ISD::FRINT, MVT::v4f32, Expand); 835 836 if (TM.Options.UnsafeFPMath) { 837 setOperationAction(ISD::FDIV, MVT::v4f64, Legal); 838 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); 839 840 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 841 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 842 } else { 843 setOperationAction(ISD::FDIV, MVT::v4f64, Expand); 844 setOperationAction(ISD::FSQRT, MVT::v4f64, Expand); 845 846 setOperationAction(ISD::FDIV, MVT::v4f32, Expand); 847 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 848 } 849 } 850 851 if (Subtarget.has64BitSupport()) 852 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 853 854 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); 855 856 if (!isPPC64) { 857 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); 858 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); 859 } 860 861 setBooleanContents(ZeroOrOneBooleanContent); 862 863 if (Subtarget.hasAltivec()) { 864 // Altivec instructions set fields to all zeros or all ones. 865 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 866 } 867 868 if (!isPPC64) { 869 // These libcalls are not available in 32-bit. 870 setLibcallName(RTLIB::SHL_I128, nullptr); 871 setLibcallName(RTLIB::SRL_I128, nullptr); 872 setLibcallName(RTLIB::SRA_I128, nullptr); 873 } 874 875 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); 876 877 // We have target-specific dag combine patterns for the following nodes: 878 setTargetDAGCombine(ISD::SINT_TO_FP); 879 setTargetDAGCombine(ISD::BUILD_VECTOR); 880 if (Subtarget.hasFPCVT()) 881 setTargetDAGCombine(ISD::UINT_TO_FP); 882 setTargetDAGCombine(ISD::LOAD); 883 setTargetDAGCombine(ISD::STORE); 884 setTargetDAGCombine(ISD::BR_CC); 885 if (Subtarget.useCRBits()) 886 setTargetDAGCombine(ISD::BRCOND); 887 setTargetDAGCombine(ISD::BSWAP); 888 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 889 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 890 setTargetDAGCombine(ISD::INTRINSIC_VOID); 891 892 setTargetDAGCombine(ISD::SIGN_EXTEND); 893 setTargetDAGCombine(ISD::ZERO_EXTEND); 894 setTargetDAGCombine(ISD::ANY_EXTEND); 895 896 if (Subtarget.useCRBits()) { 897 setTargetDAGCombine(ISD::TRUNCATE); 898 setTargetDAGCombine(ISD::SETCC); 899 setTargetDAGCombine(ISD::SELECT_CC); 900 } 901 902 // Use reciprocal estimates. 903 if (TM.Options.UnsafeFPMath) { 904 setTargetDAGCombine(ISD::FDIV); 905 setTargetDAGCombine(ISD::FSQRT); 906 } 907 908 // Darwin long double math library functions have $LDBL128 appended. 909 if (Subtarget.isDarwin()) { 910 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 911 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 912 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 913 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 914 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 915 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 916 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 917 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 918 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 919 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 920 } 921 922 // With 32 condition bits, we don't need to sink (and duplicate) compares 923 // aggressively in CodeGenPrep. 924 if (Subtarget.useCRBits()) { 925 setHasMultipleConditionRegisters(); 926 setJumpIsExpensive(); 927 } 928 929 setMinFunctionAlignment(2); 930 if (Subtarget.isDarwin()) 931 setPrefFunctionAlignment(4); 932 933 switch (Subtarget.getDarwinDirective()) { 934 default: break; 935 case PPC::DIR_970: 936 case PPC::DIR_A2: 937 case PPC::DIR_E500mc: 938 case PPC::DIR_E5500: 939 case PPC::DIR_PWR4: 940 case PPC::DIR_PWR5: 941 case PPC::DIR_PWR5X: 942 case PPC::DIR_PWR6: 943 case PPC::DIR_PWR6X: 944 case PPC::DIR_PWR7: 945 case PPC::DIR_PWR8: 946 case PPC::DIR_PWR9: 947 setPrefFunctionAlignment(4); 948 setPrefLoopAlignment(4); 949 break; 950 } 951 952 if (Subtarget.enableMachineScheduler()) 953 setSchedulingPreference(Sched::Source); 954 else 955 setSchedulingPreference(Sched::Hybrid); 956 957 computeRegisterProperties(STI.getRegisterInfo()); 958 959 // The Freescale cores do better with aggressive inlining of memcpy and 960 // friends. GCC uses same threshold of 128 bytes (= 32 word stores). 961 if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || 962 Subtarget.getDarwinDirective() == PPC::DIR_E5500) { 963 MaxStoresPerMemset = 32; 964 MaxStoresPerMemsetOptSize = 16; 965 MaxStoresPerMemcpy = 32; 966 MaxStoresPerMemcpyOptSize = 8; 967 MaxStoresPerMemmove = 32; 968 MaxStoresPerMemmoveOptSize = 8; 969 } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) { 970 // The A2 also benefits from (very) aggressive inlining of memcpy and 971 // friends. The overhead of a the function call, even when warm, can be 972 // over one hundred cycles. 973 MaxStoresPerMemset = 128; 974 MaxStoresPerMemcpy = 128; 975 MaxStoresPerMemmove = 128; 976 } 977 } 978 979 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine 980 /// the desired ByVal argument alignment. 981 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, 982 unsigned MaxMaxAlign) { 983 if (MaxAlign == MaxMaxAlign) 984 return; 985 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 986 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) 987 MaxAlign = 32; 988 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) 989 MaxAlign = 16; 990 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 991 unsigned EltAlign = 0; 992 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); 993 if (EltAlign > MaxAlign) 994 MaxAlign = EltAlign; 995 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 996 for (auto *EltTy : STy->elements()) { 997 unsigned EltAlign = 0; 998 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign); 999 if (EltAlign > MaxAlign) 1000 MaxAlign = EltAlign; 1001 if (MaxAlign == MaxMaxAlign) 1002 break; 1003 } 1004 } 1005 } 1006 1007 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 1008 /// function arguments in the caller parameter area. 1009 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, 1010 const DataLayout &DL) const { 1011 // Darwin passes everything on 4 byte boundary. 1012 if (Subtarget.isDarwin()) 1013 return 4; 1014 1015 // 16byte and wider vectors are passed on 16byte boundary. 1016 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 1017 unsigned Align = Subtarget.isPPC64() ? 8 : 4; 1018 if (Subtarget.hasAltivec() || Subtarget.hasQPX()) 1019 getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); 1020 return Align; 1021 } 1022 1023 bool PPCTargetLowering::useSoftFloat() const { 1024 return Subtarget.useSoftFloat(); 1025 } 1026 1027 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 1028 switch ((PPCISD::NodeType)Opcode) { 1029 case PPCISD::FIRST_NUMBER: break; 1030 case PPCISD::FSEL: return "PPCISD::FSEL"; 1031 case PPCISD::FCFID: return "PPCISD::FCFID"; 1032 case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; 1033 case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; 1034 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; 1035 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 1036 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 1037 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; 1038 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; 1039 case PPCISD::FRE: return "PPCISD::FRE"; 1040 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; 1041 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 1042 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 1043 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 1044 case PPCISD::VPERM: return "PPCISD::VPERM"; 1045 case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; 1046 case PPCISD::XXINSERT: return "PPCISD::XXINSERT"; 1047 case PPCISD::VECSHL: return "PPCISD::VECSHL"; 1048 case PPCISD::CMPB: return "PPCISD::CMPB"; 1049 case PPCISD::Hi: return "PPCISD::Hi"; 1050 case PPCISD::Lo: return "PPCISD::Lo"; 1051 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 1052 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 1053 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; 1054 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 1055 case PPCISD::SRL: return "PPCISD::SRL"; 1056 case PPCISD::SRA: return "PPCISD::SRA"; 1057 case PPCISD::SHL: return "PPCISD::SHL"; 1058 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; 1059 case PPCISD::CALL: return "PPCISD::CALL"; 1060 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; 1061 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 1062 case PPCISD::BCTRL: return "PPCISD::BCTRL"; 1063 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; 1064 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 1065 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; 1066 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; 1067 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; 1068 case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; 1069 case PPCISD::MFVSR: return "PPCISD::MFVSR"; 1070 case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; 1071 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; 1072 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; 1073 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; 1074 case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT"; 1075 case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT"; 1076 case PPCISD::VCMP: return "PPCISD::VCMP"; 1077 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 1078 case PPCISD::LBRX: return "PPCISD::LBRX"; 1079 case PPCISD::STBRX: return "PPCISD::STBRX"; 1080 case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; 1081 case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; 1082 case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; 1083 case PPCISD::STXSIX: return "PPCISD::STXSIX"; 1084 case PPCISD::VEXTS: return "PPCISD::VEXTS"; 1085 case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; 1086 case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; 1087 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 1088 case PPCISD::BDNZ: return "PPCISD::BDNZ"; 1089 case PPCISD::BDZ: return "PPCISD::BDZ"; 1090 case PPCISD::MFFS: return "PPCISD::MFFS"; 1091 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 1092 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 1093 case PPCISD::CR6SET: return "PPCISD::CR6SET"; 1094 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; 1095 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; 1096 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; 1097 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; 1098 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; 1099 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; 1100 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; 1101 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; 1102 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; 1103 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; 1104 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; 1105 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; 1106 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; 1107 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; 1108 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; 1109 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; 1110 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; 1111 case PPCISD::SC: return "PPCISD::SC"; 1112 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; 1113 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; 1114 case PPCISD::RFEBB: return "PPCISD::RFEBB"; 1115 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; 1116 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; 1117 case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; 1118 case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; 1119 case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; 1120 case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; 1121 case PPCISD::QBFLT: return "PPCISD::QBFLT"; 1122 case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; 1123 } 1124 return nullptr; 1125 } 1126 1127 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, 1128 EVT VT) const { 1129 if (!VT.isVector()) 1130 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; 1131 1132 if (Subtarget.hasQPX()) 1133 return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); 1134 1135 return VT.changeVectorElementTypeToInteger(); 1136 } 1137 1138 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { 1139 assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); 1140 return true; 1141 } 1142 1143 //===----------------------------------------------------------------------===// 1144 // Node matching predicates, for use by the tblgen matching code. 1145 //===----------------------------------------------------------------------===// 1146 1147 /// isFloatingPointZero - Return true if this is 0.0 or -0.0. 1148 static bool isFloatingPointZero(SDValue Op) { 1149 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 1150 return CFP->getValueAPF().isZero(); 1151 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 1152 // Maybe this has already been legalized into the constant pool? 1153 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 1154 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 1155 return CFP->getValueAPF().isZero(); 1156 } 1157 return false; 1158 } 1159 1160 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 1161 /// true if Op is undef or if it matches the specified value. 1162 static bool isConstantOrUndef(int Op, int Val) { 1163 return Op < 0 || Op == Val; 1164 } 1165 1166 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 1167 /// VPKUHUM instruction. 1168 /// The ShuffleKind distinguishes between big-endian operations with 1169 /// two different inputs (0), either-endian operations with two identical 1170 /// inputs (1), and little-endian operations with two different inputs (2). 1171 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1172 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1173 SelectionDAG &DAG) { 1174 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1175 if (ShuffleKind == 0) { 1176 if (IsLE) 1177 return false; 1178 for (unsigned i = 0; i != 16; ++i) 1179 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 1180 return false; 1181 } else if (ShuffleKind == 2) { 1182 if (!IsLE) 1183 return false; 1184 for (unsigned i = 0; i != 16; ++i) 1185 if (!isConstantOrUndef(N->getMaskElt(i), i*2)) 1186 return false; 1187 } else if (ShuffleKind == 1) { 1188 unsigned j = IsLE ? 0 : 1; 1189 for (unsigned i = 0; i != 8; ++i) 1190 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || 1191 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) 1192 return false; 1193 } 1194 return true; 1195 } 1196 1197 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 1198 /// VPKUWUM instruction. 1199 /// The ShuffleKind distinguishes between big-endian operations with 1200 /// two different inputs (0), either-endian operations with two identical 1201 /// inputs (1), and little-endian operations with two different inputs (2). 1202 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1203 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1204 SelectionDAG &DAG) { 1205 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1206 if (ShuffleKind == 0) { 1207 if (IsLE) 1208 return false; 1209 for (unsigned i = 0; i != 16; i += 2) 1210 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 1211 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 1212 return false; 1213 } else if (ShuffleKind == 2) { 1214 if (!IsLE) 1215 return false; 1216 for (unsigned i = 0; i != 16; i += 2) 1217 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 1218 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) 1219 return false; 1220 } else if (ShuffleKind == 1) { 1221 unsigned j = IsLE ? 0 : 2; 1222 for (unsigned i = 0; i != 8; i += 2) 1223 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 1224 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 1225 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 1226 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) 1227 return false; 1228 } 1229 return true; 1230 } 1231 1232 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a 1233 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the 1234 /// current subtarget. 1235 /// 1236 /// The ShuffleKind distinguishes between big-endian operations with 1237 /// two different inputs (0), either-endian operations with two identical 1238 /// inputs (1), and little-endian operations with two different inputs (2). 1239 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1240 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1241 SelectionDAG &DAG) { 1242 const PPCSubtarget& Subtarget = 1243 static_cast<const PPCSubtarget&>(DAG.getSubtarget()); 1244 if (!Subtarget.hasP8Vector()) 1245 return false; 1246 1247 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1248 if (ShuffleKind == 0) { 1249 if (IsLE) 1250 return false; 1251 for (unsigned i = 0; i != 16; i += 4) 1252 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) || 1253 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) || 1254 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) || 1255 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7)) 1256 return false; 1257 } else if (ShuffleKind == 2) { 1258 if (!IsLE) 1259 return false; 1260 for (unsigned i = 0; i != 16; i += 4) 1261 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 1262 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) || 1263 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) || 1264 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3)) 1265 return false; 1266 } else if (ShuffleKind == 1) { 1267 unsigned j = IsLE ? 0 : 4; 1268 for (unsigned i = 0; i != 8; i += 4) 1269 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 1270 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 1271 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) || 1272 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) || 1273 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 1274 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) || 1275 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) || 1276 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)) 1277 return false; 1278 } 1279 return true; 1280 } 1281 1282 /// isVMerge - Common function, used to match vmrg* shuffles. 1283 /// 1284 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 1285 unsigned LHSStart, unsigned RHSStart) { 1286 if (N->getValueType(0) != MVT::v16i8) 1287 return false; 1288 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 1289 "Unsupported merge size!"); 1290 1291 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 1292 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 1293 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 1294 LHSStart+j+i*UnitSize) || 1295 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 1296 RHSStart+j+i*UnitSize)) 1297 return false; 1298 } 1299 return true; 1300 } 1301 1302 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 1303 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). 1304 /// The ShuffleKind distinguishes between big-endian merges with two 1305 /// different inputs (0), either-endian merges with two identical inputs (1), 1306 /// and little-endian merges with two different inputs (2). For the latter, 1307 /// the input operands are swapped (see PPCInstrAltivec.td). 1308 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 1309 unsigned ShuffleKind, SelectionDAG &DAG) { 1310 if (DAG.getDataLayout().isLittleEndian()) { 1311 if (ShuffleKind == 1) // unary 1312 return isVMerge(N, UnitSize, 0, 0); 1313 else if (ShuffleKind == 2) // swapped 1314 return isVMerge(N, UnitSize, 0, 16); 1315 else 1316 return false; 1317 } else { 1318 if (ShuffleKind == 1) // unary 1319 return isVMerge(N, UnitSize, 8, 8); 1320 else if (ShuffleKind == 0) // normal 1321 return isVMerge(N, UnitSize, 8, 24); 1322 else 1323 return false; 1324 } 1325 } 1326 1327 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 1328 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). 1329 /// The ShuffleKind distinguishes between big-endian merges with two 1330 /// different inputs (0), either-endian merges with two identical inputs (1), 1331 /// and little-endian merges with two different inputs (2). For the latter, 1332 /// the input operands are swapped (see PPCInstrAltivec.td). 1333 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 1334 unsigned ShuffleKind, SelectionDAG &DAG) { 1335 if (DAG.getDataLayout().isLittleEndian()) { 1336 if (ShuffleKind == 1) // unary 1337 return isVMerge(N, UnitSize, 8, 8); 1338 else if (ShuffleKind == 2) // swapped 1339 return isVMerge(N, UnitSize, 8, 24); 1340 else 1341 return false; 1342 } else { 1343 if (ShuffleKind == 1) // unary 1344 return isVMerge(N, UnitSize, 0, 0); 1345 else if (ShuffleKind == 0) // normal 1346 return isVMerge(N, UnitSize, 0, 16); 1347 else 1348 return false; 1349 } 1350 } 1351 1352 /** 1353 * \brief Common function used to match vmrgew and vmrgow shuffles 1354 * 1355 * The indexOffset determines whether to look for even or odd words in 1356 * the shuffle mask. This is based on the of the endianness of the target 1357 * machine. 1358 * - Little Endian: 1359 * - Use offset of 0 to check for odd elements 1360 * - Use offset of 4 to check for even elements 1361 * - Big Endian: 1362 * - Use offset of 0 to check for even elements 1363 * - Use offset of 4 to check for odd elements 1364 * A detailed description of the vector element ordering for little endian and 1365 * big endian can be found at 1366 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html 1367 * Targeting your applications - what little endian and big endian IBM XL C/C++ 1368 * compiler differences mean to you 1369 * 1370 * The mask to the shuffle vector instruction specifies the indices of the 1371 * elements from the two input vectors to place in the result. The elements are 1372 * numbered in array-access order, starting with the first vector. These vectors 1373 * are always of type v16i8, thus each vector will contain 16 elements of size 1374 * 8. More info on the shuffle vector can be found in the 1375 * http://llvm.org/docs/LangRef.html#shufflevector-instruction 1376 * Language Reference. 1377 * 1378 * The RHSStartValue indicates whether the same input vectors are used (unary) 1379 * or two different input vectors are used, based on the following: 1380 * - If the instruction uses the same vector for both inputs, the range of the 1381 * indices will be 0 to 15. In this case, the RHSStart value passed should 1382 * be 0. 1383 * - If the instruction has two different vectors then the range of the 1384 * indices will be 0 to 31. In this case, the RHSStart value passed should 1385 * be 16 (indices 0-15 specify elements in the first vector while indices 16 1386 * to 31 specify elements in the second vector). 1387 * 1388 * \param[in] N The shuffle vector SD Node to analyze 1389 * \param[in] IndexOffset Specifies whether to look for even or odd elements 1390 * \param[in] RHSStartValue Specifies the starting index for the righthand input 1391 * vector to the shuffle_vector instruction 1392 * \return true iff this shuffle vector represents an even or odd word merge 1393 */ 1394 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, 1395 unsigned RHSStartValue) { 1396 if (N->getValueType(0) != MVT::v16i8) 1397 return false; 1398 1399 for (unsigned i = 0; i < 2; ++i) 1400 for (unsigned j = 0; j < 4; ++j) 1401 if (!isConstantOrUndef(N->getMaskElt(i*4+j), 1402 i*RHSStartValue+j+IndexOffset) || 1403 !isConstantOrUndef(N->getMaskElt(i*4+j+8), 1404 i*RHSStartValue+j+IndexOffset+8)) 1405 return false; 1406 return true; 1407 } 1408 1409 /** 1410 * \brief Determine if the specified shuffle mask is suitable for the vmrgew or 1411 * vmrgow instructions. 1412 * 1413 * \param[in] N The shuffle vector SD Node to analyze 1414 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false) 1415 * \param[in] ShuffleKind Identify the type of merge: 1416 * - 0 = big-endian merge with two different inputs; 1417 * - 1 = either-endian merge with two identical inputs; 1418 * - 2 = little-endian merge with two different inputs (inputs are swapped for 1419 * little-endian merges). 1420 * \param[in] DAG The current SelectionDAG 1421 * \return true iff this shuffle mask 1422 */ 1423 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, 1424 unsigned ShuffleKind, SelectionDAG &DAG) { 1425 if (DAG.getDataLayout().isLittleEndian()) { 1426 unsigned indexOffset = CheckEven ? 4 : 0; 1427 if (ShuffleKind == 1) // Unary 1428 return isVMerge(N, indexOffset, 0); 1429 else if (ShuffleKind == 2) // swapped 1430 return isVMerge(N, indexOffset, 16); 1431 else 1432 return false; 1433 } 1434 else { 1435 unsigned indexOffset = CheckEven ? 0 : 4; 1436 if (ShuffleKind == 1) // Unary 1437 return isVMerge(N, indexOffset, 0); 1438 else if (ShuffleKind == 0) // Normal 1439 return isVMerge(N, indexOffset, 16); 1440 else 1441 return false; 1442 } 1443 return false; 1444 } 1445 1446 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 1447 /// amount, otherwise return -1. 1448 /// The ShuffleKind distinguishes between big-endian operations with two 1449 /// different inputs (0), either-endian operations with two identical inputs 1450 /// (1), and little-endian operations with two different inputs (2). For the 1451 /// latter, the input operands are swapped (see PPCInstrAltivec.td). 1452 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, 1453 SelectionDAG &DAG) { 1454 if (N->getValueType(0) != MVT::v16i8) 1455 return -1; 1456 1457 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1458 1459 // Find the first non-undef value in the shuffle mask. 1460 unsigned i; 1461 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 1462 /*search*/; 1463 1464 if (i == 16) return -1; // all undef. 1465 1466 // Otherwise, check to see if the rest of the elements are consecutively 1467 // numbered from this value. 1468 unsigned ShiftAmt = SVOp->getMaskElt(i); 1469 if (ShiftAmt < i) return -1; 1470 1471 ShiftAmt -= i; 1472 bool isLE = DAG.getDataLayout().isLittleEndian(); 1473 1474 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { 1475 // Check the rest of the elements to see if they are consecutive. 1476 for (++i; i != 16; ++i) 1477 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1478 return -1; 1479 } else if (ShuffleKind == 1) { 1480 // Check the rest of the elements to see if they are consecutive. 1481 for (++i; i != 16; ++i) 1482 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 1483 return -1; 1484 } else 1485 return -1; 1486 1487 if (isLE) 1488 ShiftAmt = 16 - ShiftAmt; 1489 1490 return ShiftAmt; 1491 } 1492 1493 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 1494 /// specifies a splat of a single element that is suitable for input to 1495 /// VSPLTB/VSPLTH/VSPLTW. 1496 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 1497 assert(N->getValueType(0) == MVT::v16i8 && 1498 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 1499 1500 // The consecutive indices need to specify an element, not part of two 1501 // different elements. So abandon ship early if this isn't the case. 1502 if (N->getMaskElt(0) % EltSize != 0) 1503 return false; 1504 1505 // This is a splat operation if each element of the permute is the same, and 1506 // if the value doesn't reference the second vector. 1507 unsigned ElementBase = N->getMaskElt(0); 1508 1509 // FIXME: Handle UNDEF elements too! 1510 if (ElementBase >= 16) 1511 return false; 1512 1513 // Check that the indices are consecutive, in the case of a multi-byte element 1514 // splatted with a v16i8 mask. 1515 for (unsigned i = 1; i != EltSize; ++i) 1516 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 1517 return false; 1518 1519 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 1520 if (N->getMaskElt(i) < 0) continue; 1521 for (unsigned j = 0; j != EltSize; ++j) 1522 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 1523 return false; 1524 } 1525 return true; 1526 } 1527 1528 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, 1529 unsigned &InsertAtByte, bool &Swap, bool IsLE) { 1530 1531 // Check that the mask is shuffling words 1532 for (unsigned i = 0; i < 4; ++i) { 1533 unsigned B0 = N->getMaskElt(i*4); 1534 unsigned B1 = N->getMaskElt(i*4+1); 1535 unsigned B2 = N->getMaskElt(i*4+2); 1536 unsigned B3 = N->getMaskElt(i*4+3); 1537 if (B0 % 4) 1538 return false; 1539 if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1) 1540 return false; 1541 } 1542 1543 // Now we look at mask elements 0,4,8,12 1544 unsigned M0 = N->getMaskElt(0) / 4; 1545 unsigned M1 = N->getMaskElt(4) / 4; 1546 unsigned M2 = N->getMaskElt(8) / 4; 1547 unsigned M3 = N->getMaskElt(12) / 4; 1548 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 }; 1549 unsigned BigEndianShifts[] = { 3, 0, 1, 2 }; 1550 1551 // Below, let H and L be arbitrary elements of the shuffle mask 1552 // where H is in the range [4,7] and L is in the range [0,3]. 1553 // H, 1, 2, 3 or L, 5, 6, 7 1554 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) || 1555 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) { 1556 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3]; 1557 InsertAtByte = IsLE ? 12 : 0; 1558 Swap = M0 < 4; 1559 return true; 1560 } 1561 // 0, H, 2, 3 or 4, L, 6, 7 1562 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) || 1563 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) { 1564 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3]; 1565 InsertAtByte = IsLE ? 8 : 4; 1566 Swap = M1 < 4; 1567 return true; 1568 } 1569 // 0, 1, H, 3 or 4, 5, L, 7 1570 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) || 1571 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) { 1572 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3]; 1573 InsertAtByte = IsLE ? 4 : 8; 1574 Swap = M2 < 4; 1575 return true; 1576 } 1577 // 0, 1, 2, H or 4, 5, 6, L 1578 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) || 1579 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) { 1580 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3]; 1581 InsertAtByte = IsLE ? 0 : 12; 1582 Swap = M3 < 4; 1583 return true; 1584 } 1585 1586 // If both vector operands for the shuffle are the same vector, the mask will 1587 // contain only elements from the first one and the second one will be undef. 1588 if (N->getOperand(1).isUndef()) { 1589 ShiftElts = 0; 1590 Swap = true; 1591 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1; 1592 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) { 1593 InsertAtByte = IsLE ? 12 : 0; 1594 return true; 1595 } 1596 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) { 1597 InsertAtByte = IsLE ? 8 : 4; 1598 return true; 1599 } 1600 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) { 1601 InsertAtByte = IsLE ? 4 : 8; 1602 return true; 1603 } 1604 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) { 1605 InsertAtByte = IsLE ? 0 : 12; 1606 return true; 1607 } 1608 } 1609 1610 return false; 1611 } 1612 1613 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 1614 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 1615 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, 1616 SelectionDAG &DAG) { 1617 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1618 assert(isSplatShuffleMask(SVOp, EltSize)); 1619 if (DAG.getDataLayout().isLittleEndian()) 1620 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); 1621 else 1622 return SVOp->getMaskElt(0) / EltSize; 1623 } 1624 1625 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 1626 /// by using a vspltis[bhw] instruction of the specified element size, return 1627 /// the constant being splatted. The ByteSize field indicates the number of 1628 /// bytes of each element [124] -> [bhw]. 1629 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 1630 SDValue OpVal(nullptr, 0); 1631 1632 // If ByteSize of the splat is bigger than the element size of the 1633 // build_vector, then we have a case where we are checking for a splat where 1634 // multiple elements of the buildvector are folded together into a single 1635 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 1636 unsigned EltSize = 16/N->getNumOperands(); 1637 if (EltSize < ByteSize) { 1638 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 1639 SDValue UniquedVals[4]; 1640 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 1641 1642 // See if all of the elements in the buildvector agree across. 1643 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1644 if (N->getOperand(i).isUndef()) continue; 1645 // If the element isn't a constant, bail fully out. 1646 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 1647 1648 1649 if (!UniquedVals[i&(Multiple-1)].getNode()) 1650 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 1651 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 1652 return SDValue(); // no match. 1653 } 1654 1655 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 1656 // either constant or undef values that are identical for each chunk. See 1657 // if these chunks can form into a larger vspltis*. 1658 1659 // Check to see if all of the leading entries are either 0 or -1. If 1660 // neither, then this won't fit into the immediate field. 1661 bool LeadingZero = true; 1662 bool LeadingOnes = true; 1663 for (unsigned i = 0; i != Multiple-1; ++i) { 1664 if (!UniquedVals[i].getNode()) continue; // Must have been undefs. 1665 1666 LeadingZero &= isNullConstant(UniquedVals[i]); 1667 LeadingOnes &= isAllOnesConstant(UniquedVals[i]); 1668 } 1669 // Finally, check the least significant entry. 1670 if (LeadingZero) { 1671 if (!UniquedVals[Multiple-1].getNode()) 1672 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef 1673 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 1674 if (Val < 16) // 0,0,0,4 -> vspltisw(4) 1675 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); 1676 } 1677 if (LeadingOnes) { 1678 if (!UniquedVals[Multiple-1].getNode()) 1679 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef 1680 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 1681 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 1682 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); 1683 } 1684 1685 return SDValue(); 1686 } 1687 1688 // Check to see if this buildvec has a single non-undef value in its elements. 1689 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1690 if (N->getOperand(i).isUndef()) continue; 1691 if (!OpVal.getNode()) 1692 OpVal = N->getOperand(i); 1693 else if (OpVal != N->getOperand(i)) 1694 return SDValue(); 1695 } 1696 1697 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. 1698 1699 unsigned ValSizeInBytes = EltSize; 1700 uint64_t Value = 0; 1701 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1702 Value = CN->getZExtValue(); 1703 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 1704 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 1705 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 1706 } 1707 1708 // If the splat value is larger than the element value, then we can never do 1709 // this splat. The only case that we could fit the replicated bits into our 1710 // immediate field for would be zero, and we prefer to use vxor for it. 1711 if (ValSizeInBytes < ByteSize) return SDValue(); 1712 1713 // If the element value is larger than the splat value, check if it consists 1714 // of a repeated bit pattern of size ByteSize. 1715 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)) 1716 return SDValue(); 1717 1718 // Properly sign extend the value. 1719 int MaskVal = SignExtend32(Value, ByteSize * 8); 1720 1721 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 1722 if (MaskVal == 0) return SDValue(); 1723 1724 // Finally, if this value fits in a 5 bit sext field, return it 1725 if (SignExtend32<5>(MaskVal) == MaskVal) 1726 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32); 1727 return SDValue(); 1728 } 1729 1730 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift 1731 /// amount, otherwise return -1. 1732 int PPC::isQVALIGNIShuffleMask(SDNode *N) { 1733 EVT VT = N->getValueType(0); 1734 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1) 1735 return -1; 1736 1737 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1738 1739 // Find the first non-undef value in the shuffle mask. 1740 unsigned i; 1741 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i) 1742 /*search*/; 1743 1744 if (i == 4) return -1; // all undef. 1745 1746 // Otherwise, check to see if the rest of the elements are consecutively 1747 // numbered from this value. 1748 unsigned ShiftAmt = SVOp->getMaskElt(i); 1749 if (ShiftAmt < i) return -1; 1750 ShiftAmt -= i; 1751 1752 // Check the rest of the elements to see if they are consecutive. 1753 for (++i; i != 4; ++i) 1754 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1755 return -1; 1756 1757 return ShiftAmt; 1758 } 1759 1760 //===----------------------------------------------------------------------===// 1761 // Addressing Mode Selection 1762 //===----------------------------------------------------------------------===// 1763 1764 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit 1765 /// or 64-bit immediate, and if the value can be accurately represented as a 1766 /// sign extension from a 16-bit value. If so, this returns true and the 1767 /// immediate. 1768 static bool isIntS16Immediate(SDNode *N, short &Imm) { 1769 if (!isa<ConstantSDNode>(N)) 1770 return false; 1771 1772 Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); 1773 if (N->getValueType(0) == MVT::i32) 1774 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 1775 else 1776 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 1777 } 1778 static bool isIntS16Immediate(SDValue Op, short &Imm) { 1779 return isIntS16Immediate(Op.getNode(), Imm); 1780 } 1781 1782 /// SelectAddressRegReg - Given the specified addressed, check to see if it 1783 /// can be represented as an indexed [r+r] operation. Returns false if it 1784 /// can be more efficiently represented with [r+imm]. 1785 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 1786 SDValue &Index, 1787 SelectionDAG &DAG) const { 1788 short imm = 0; 1789 if (N.getOpcode() == ISD::ADD) { 1790 if (isIntS16Immediate(N.getOperand(1), imm)) 1791 return false; // r+i 1792 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 1793 return false; // r+i 1794 1795 Base = N.getOperand(0); 1796 Index = N.getOperand(1); 1797 return true; 1798 } else if (N.getOpcode() == ISD::OR) { 1799 if (isIntS16Immediate(N.getOperand(1), imm)) 1800 return false; // r+i can fold it if we can. 1801 1802 // If this is an or of disjoint bitfields, we can codegen this as an add 1803 // (for better address arithmetic) if the LHS and RHS of the OR are provably 1804 // disjoint. 1805 APInt LHSKnownZero, LHSKnownOne; 1806 APInt RHSKnownZero, RHSKnownOne; 1807 DAG.computeKnownBits(N.getOperand(0), 1808 LHSKnownZero, LHSKnownOne); 1809 1810 if (LHSKnownZero.getBoolValue()) { 1811 DAG.computeKnownBits(N.getOperand(1), 1812 RHSKnownZero, RHSKnownOne); 1813 // If all of the bits are known zero on the LHS or RHS, the add won't 1814 // carry. 1815 if (~(LHSKnownZero | RHSKnownZero) == 0) { 1816 Base = N.getOperand(0); 1817 Index = N.getOperand(1); 1818 return true; 1819 } 1820 } 1821 } 1822 1823 return false; 1824 } 1825 1826 // If we happen to be doing an i64 load or store into a stack slot that has 1827 // less than a 4-byte alignment, then the frame-index elimination may need to 1828 // use an indexed load or store instruction (because the offset may not be a 1829 // multiple of 4). The extra register needed to hold the offset comes from the 1830 // register scavenger, and it is possible that the scavenger will need to use 1831 // an emergency spill slot. As a result, we need to make sure that a spill slot 1832 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned 1833 // stack slot. 1834 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { 1835 // FIXME: This does not handle the LWA case. 1836 if (VT != MVT::i64) 1837 return; 1838 1839 // NOTE: We'll exclude negative FIs here, which come from argument 1840 // lowering, because there are no known test cases triggering this problem 1841 // using packed structures (or similar). We can remove this exclusion if 1842 // we find such a test case. The reason why this is so test-case driven is 1843 // because this entire 'fixup' is only to prevent crashes (from the 1844 // register scavenger) on not-really-valid inputs. For example, if we have: 1845 // %a = alloca i1 1846 // %b = bitcast i1* %a to i64* 1847 // store i64* a, i64 b 1848 // then the store should really be marked as 'align 1', but is not. If it 1849 // were marked as 'align 1' then the indexed form would have been 1850 // instruction-selected initially, and the problem this 'fixup' is preventing 1851 // won't happen regardless. 1852 if (FrameIdx < 0) 1853 return; 1854 1855 MachineFunction &MF = DAG.getMachineFunction(); 1856 MachineFrameInfo &MFI = MF.getFrameInfo(); 1857 1858 unsigned Align = MFI.getObjectAlignment(FrameIdx); 1859 if (Align >= 4) 1860 return; 1861 1862 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1863 FuncInfo->setHasNonRISpills(); 1864 } 1865 1866 /// Returns true if the address N can be represented by a base register plus 1867 /// a signed 16-bit displacement [r+imm], and if it is not better 1868 /// represented as reg+reg. If Aligned is true, only accept displacements 1869 /// suitable for STD and friends, i.e. multiples of 4. 1870 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 1871 SDValue &Base, 1872 SelectionDAG &DAG, 1873 bool Aligned) const { 1874 // FIXME dl should come from parent load or store, not from address 1875 SDLoc dl(N); 1876 // If this can be more profitably realized as r+r, fail. 1877 if (SelectAddressRegReg(N, Disp, Base, DAG)) 1878 return false; 1879 1880 if (N.getOpcode() == ISD::ADD) { 1881 short imm = 0; 1882 if (isIntS16Immediate(N.getOperand(1), imm) && 1883 (!Aligned || (imm & 3) == 0)) { 1884 Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); 1885 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1886 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1887 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1888 } else { 1889 Base = N.getOperand(0); 1890 } 1891 return true; // [r+i] 1892 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 1893 // Match LOAD (ADD (X, Lo(G))). 1894 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 1895 && "Cannot handle constant offsets yet!"); 1896 Disp = N.getOperand(1).getOperand(0); // The global address. 1897 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 1898 Disp.getOpcode() == ISD::TargetGlobalTLSAddress || 1899 Disp.getOpcode() == ISD::TargetConstantPool || 1900 Disp.getOpcode() == ISD::TargetJumpTable); 1901 Base = N.getOperand(0); 1902 return true; // [&g+r] 1903 } 1904 } else if (N.getOpcode() == ISD::OR) { 1905 short imm = 0; 1906 if (isIntS16Immediate(N.getOperand(1), imm) && 1907 (!Aligned || (imm & 3) == 0)) { 1908 // If this is an or of disjoint bitfields, we can codegen this as an add 1909 // (for better address arithmetic) if the LHS and RHS of the OR are 1910 // provably disjoint. 1911 APInt LHSKnownZero, LHSKnownOne; 1912 DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 1913 1914 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 1915 // If all of the bits are known zero on the LHS or RHS, the add won't 1916 // carry. 1917 if (FrameIndexSDNode *FI = 1918 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1919 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1920 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1921 } else { 1922 Base = N.getOperand(0); 1923 } 1924 Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); 1925 return true; 1926 } 1927 } 1928 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1929 // Loading from a constant address. 1930 1931 // If this address fits entirely in a 16-bit sext immediate field, codegen 1932 // this as "d, 0" 1933 short Imm; 1934 if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { 1935 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); 1936 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1937 CN->getValueType(0)); 1938 return true; 1939 } 1940 1941 // Handle 32-bit sext immediates with LIS + addr mode. 1942 if ((CN->getValueType(0) == MVT::i32 || 1943 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && 1944 (!Aligned || (CN->getZExtValue() & 3) == 0)) { 1945 int Addr = (int)CN->getZExtValue(); 1946 1947 // Otherwise, break this down into an LIS + disp. 1948 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32); 1949 1950 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl, 1951 MVT::i32); 1952 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1953 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 1954 return true; 1955 } 1956 } 1957 1958 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout())); 1959 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) { 1960 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1961 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1962 } else 1963 Base = N; 1964 return true; // [r+0] 1965 } 1966 1967 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be 1968 /// represented as an indexed [r+r] operation. 1969 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 1970 SDValue &Index, 1971 SelectionDAG &DAG) const { 1972 // Check to see if we can easily represent this as an [r+r] address. This 1973 // will fail if it thinks that the address is more profitably represented as 1974 // reg+imm, e.g. where imm = 0. 1975 if (SelectAddressRegReg(N, Base, Index, DAG)) 1976 return true; 1977 1978 // If the operand is an addition, always emit this as [r+r], since this is 1979 // better (for code size, and execution, as the memop does the add for free) 1980 // than emitting an explicit add. 1981 if (N.getOpcode() == ISD::ADD) { 1982 Base = N.getOperand(0); 1983 Index = N.getOperand(1); 1984 return true; 1985 } 1986 1987 // Otherwise, do it the hard way, using R0 as the base register. 1988 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1989 N.getValueType()); 1990 Index = N; 1991 return true; 1992 } 1993 1994 /// getPreIndexedAddressParts - returns true by value, base pointer and 1995 /// offset pointer and addressing mode by reference if the node's address 1996 /// can be legally represented as pre-indexed load / store address. 1997 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1998 SDValue &Offset, 1999 ISD::MemIndexedMode &AM, 2000 SelectionDAG &DAG) const { 2001 if (DisablePPCPreinc) return false; 2002 2003 bool isLoad = true; 2004 SDValue Ptr; 2005 EVT VT; 2006 unsigned Alignment; 2007 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 2008 Ptr = LD->getBasePtr(); 2009 VT = LD->getMemoryVT(); 2010 Alignment = LD->getAlignment(); 2011 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 2012 Ptr = ST->getBasePtr(); 2013 VT = ST->getMemoryVT(); 2014 Alignment = ST->getAlignment(); 2015 isLoad = false; 2016 } else 2017 return false; 2018 2019 // PowerPC doesn't have preinc load/store instructions for vectors (except 2020 // for QPX, which does have preinc r+r forms). 2021 if (VT.isVector()) { 2022 if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) { 2023 return false; 2024 } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) { 2025 AM = ISD::PRE_INC; 2026 return true; 2027 } 2028 } 2029 2030 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { 2031 2032 // Common code will reject creating a pre-inc form if the base pointer 2033 // is a frame index, or if N is a store and the base pointer is either 2034 // the same as or a predecessor of the value being stored. Check for 2035 // those situations here, and try with swapped Base/Offset instead. 2036 bool Swap = false; 2037 2038 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) 2039 Swap = true; 2040 else if (!isLoad) { 2041 SDValue Val = cast<StoreSDNode>(N)->getValue(); 2042 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) 2043 Swap = true; 2044 } 2045 2046 if (Swap) 2047 std::swap(Base, Offset); 2048 2049 AM = ISD::PRE_INC; 2050 return true; 2051 } 2052 2053 // LDU/STU can only handle immediates that are a multiple of 4. 2054 if (VT != MVT::i64) { 2055 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) 2056 return false; 2057 } else { 2058 // LDU/STU need an address with at least 4-byte alignment. 2059 if (Alignment < 4) 2060 return false; 2061 2062 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) 2063 return false; 2064 } 2065 2066 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 2067 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 2068 // sext i32 to i64 when addr mode is r+i. 2069 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 2070 LD->getExtensionType() == ISD::SEXTLOAD && 2071 isa<ConstantSDNode>(Offset)) 2072 return false; 2073 } 2074 2075 AM = ISD::PRE_INC; 2076 return true; 2077 } 2078 2079 //===----------------------------------------------------------------------===// 2080 // LowerOperation implementation 2081 //===----------------------------------------------------------------------===// 2082 2083 /// Return true if we should reference labels using a PICBase, set the HiOpFlags 2084 /// and LoOpFlags to the target MO flags. 2085 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, 2086 unsigned &HiOpFlags, unsigned &LoOpFlags, 2087 const GlobalValue *GV = nullptr) { 2088 HiOpFlags = PPCII::MO_HA; 2089 LoOpFlags = PPCII::MO_LO; 2090 2091 // Don't use the pic base if not in PIC relocation model. 2092 if (IsPIC) { 2093 HiOpFlags |= PPCII::MO_PIC_FLAG; 2094 LoOpFlags |= PPCII::MO_PIC_FLAG; 2095 } 2096 2097 // If this is a reference to a global value that requires a non-lazy-ptr, make 2098 // sure that instruction lowering adds it. 2099 if (GV && Subtarget.hasLazyResolverStub(GV)) { 2100 HiOpFlags |= PPCII::MO_NLP_FLAG; 2101 LoOpFlags |= PPCII::MO_NLP_FLAG; 2102 2103 if (GV->hasHiddenVisibility()) { 2104 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 2105 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 2106 } 2107 } 2108 } 2109 2110 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 2111 SelectionDAG &DAG) { 2112 SDLoc DL(HiPart); 2113 EVT PtrVT = HiPart.getValueType(); 2114 SDValue Zero = DAG.getConstant(0, DL, PtrVT); 2115 2116 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 2117 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 2118 2119 // With PIC, the first instruction is actually "GR+hi(&G)". 2120 if (isPIC) 2121 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 2122 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 2123 2124 // Generate non-pic code that has direct accesses to the constant pool. 2125 // The address of the global is just (hi(&g)+lo(&g)). 2126 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 2127 } 2128 2129 static void setUsesTOCBasePtr(MachineFunction &MF) { 2130 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2131 FuncInfo->setUsesTOCBasePtr(); 2132 } 2133 2134 static void setUsesTOCBasePtr(SelectionDAG &DAG) { 2135 setUsesTOCBasePtr(DAG.getMachineFunction()); 2136 } 2137 2138 static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, 2139 SDValue GA) { 2140 EVT VT = Is64Bit ? MVT::i64 : MVT::i32; 2141 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : 2142 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); 2143 2144 SDValue Ops[] = { GA, Reg }; 2145 return DAG.getMemIntrinsicNode( 2146 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, 2147 MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true, 2148 false, 0); 2149 } 2150 2151 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 2152 SelectionDAG &DAG) const { 2153 EVT PtrVT = Op.getValueType(); 2154 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2155 const Constant *C = CP->getConstVal(); 2156 2157 // 64-bit SVR4 ABI code is always position-independent. 2158 // The actual address of the GlobalValue is stored in the TOC. 2159 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2160 setUsesTOCBasePtr(DAG); 2161 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); 2162 return getTOCEntry(DAG, SDLoc(CP), true, GA); 2163 } 2164 2165 unsigned MOHiFlag, MOLoFlag; 2166 bool IsPIC = isPositionIndependent(); 2167 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2168 2169 if (IsPIC && Subtarget.isSVR4ABI()) { 2170 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 2171 PPCII::MO_PIC_FLAG); 2172 return getTOCEntry(DAG, SDLoc(CP), false, GA); 2173 } 2174 2175 SDValue CPIHi = 2176 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 2177 SDValue CPILo = 2178 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 2179 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG); 2180 } 2181 2182 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 2183 EVT PtrVT = Op.getValueType(); 2184 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 2185 2186 // 64-bit SVR4 ABI code is always position-independent. 2187 // The actual address of the GlobalValue is stored in the TOC. 2188 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2189 setUsesTOCBasePtr(DAG); 2190 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 2191 return getTOCEntry(DAG, SDLoc(JT), true, GA); 2192 } 2193 2194 unsigned MOHiFlag, MOLoFlag; 2195 bool IsPIC = isPositionIndependent(); 2196 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2197 2198 if (IsPIC && Subtarget.isSVR4ABI()) { 2199 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 2200 PPCII::MO_PIC_FLAG); 2201 return getTOCEntry(DAG, SDLoc(GA), false, GA); 2202 } 2203 2204 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 2205 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 2206 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG); 2207 } 2208 2209 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 2210 SelectionDAG &DAG) const { 2211 EVT PtrVT = Op.getValueType(); 2212 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op); 2213 const BlockAddress *BA = BASDN->getBlockAddress(); 2214 2215 // 64-bit SVR4 ABI code is always position-independent. 2216 // The actual BlockAddress is stored in the TOC. 2217 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2218 setUsesTOCBasePtr(DAG); 2219 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); 2220 return getTOCEntry(DAG, SDLoc(BASDN), true, GA); 2221 } 2222 2223 unsigned MOHiFlag, MOLoFlag; 2224 bool IsPIC = isPositionIndependent(); 2225 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2226 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); 2227 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); 2228 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG); 2229 } 2230 2231 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 2232 SelectionDAG &DAG) const { 2233 2234 // FIXME: TLS addresses currently use medium model code sequences, 2235 // which is the most useful form. Eventually support for small and 2236 // large models could be added if users need it, at the cost of 2237 // additional complexity. 2238 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2239 if (DAG.getTarget().Options.EmulatedTLS) 2240 return LowerToTLSEmulatedModel(GA, DAG); 2241 2242 SDLoc dl(GA); 2243 const GlobalValue *GV = GA->getGlobal(); 2244 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2245 bool is64bit = Subtarget.isPPC64(); 2246 const Module *M = DAG.getMachineFunction().getFunction()->getParent(); 2247 PICLevel::Level picLevel = M->getPICLevel(); 2248 2249 TLSModel::Model Model = getTargetMachine().getTLSModel(GV); 2250 2251 if (Model == TLSModel::LocalExec) { 2252 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2253 PPCII::MO_TPREL_HA); 2254 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2255 PPCII::MO_TPREL_LO); 2256 SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 2257 is64bit ? MVT::i64 : MVT::i32); 2258 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); 2259 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); 2260 } 2261 2262 if (Model == TLSModel::InitialExec) { 2263 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2264 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2265 PPCII::MO_TLS); 2266 SDValue GOTPtr; 2267 if (is64bit) { 2268 setUsesTOCBasePtr(DAG); 2269 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2270 GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, 2271 PtrVT, GOTReg, TGA); 2272 } else 2273 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); 2274 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, 2275 PtrVT, TGA, GOTPtr); 2276 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); 2277 } 2278 2279 if (Model == TLSModel::GeneralDynamic) { 2280 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2281 SDValue GOTPtr; 2282 if (is64bit) { 2283 setUsesTOCBasePtr(DAG); 2284 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2285 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, 2286 GOTReg, TGA); 2287 } else { 2288 if (picLevel == PICLevel::SmallPIC) 2289 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 2290 else 2291 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 2292 } 2293 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT, 2294 GOTPtr, TGA, TGA); 2295 } 2296 2297 if (Model == TLSModel::LocalDynamic) { 2298 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2299 SDValue GOTPtr; 2300 if (is64bit) { 2301 setUsesTOCBasePtr(DAG); 2302 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2303 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, 2304 GOTReg, TGA); 2305 } else { 2306 if (picLevel == PICLevel::SmallPIC) 2307 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 2308 else 2309 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 2310 } 2311 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl, 2312 PtrVT, GOTPtr, TGA, TGA); 2313 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, 2314 PtrVT, TLSAddr, TGA); 2315 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); 2316 } 2317 2318 llvm_unreachable("Unknown TLS model!"); 2319 } 2320 2321 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 2322 SelectionDAG &DAG) const { 2323 EVT PtrVT = Op.getValueType(); 2324 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 2325 SDLoc DL(GSDN); 2326 const GlobalValue *GV = GSDN->getGlobal(); 2327 2328 // 64-bit SVR4 ABI code is always position-independent. 2329 // The actual address of the GlobalValue is stored in the TOC. 2330 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2331 setUsesTOCBasePtr(DAG); 2332 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 2333 return getTOCEntry(DAG, DL, true, GA); 2334 } 2335 2336 unsigned MOHiFlag, MOLoFlag; 2337 bool IsPIC = isPositionIndependent(); 2338 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV); 2339 2340 if (IsPIC && Subtarget.isSVR4ABI()) { 2341 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 2342 GSDN->getOffset(), 2343 PPCII::MO_PIC_FLAG); 2344 return getTOCEntry(DAG, DL, false, GA); 2345 } 2346 2347 SDValue GAHi = 2348 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 2349 SDValue GALo = 2350 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 2351 2352 SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG); 2353 2354 // If the global reference is actually to a non-lazy-pointer, we have to do an 2355 // extra load to get the address of the global. 2356 if (MOHiFlag & PPCII::MO_NLP_FLAG) 2357 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 2358 return Ptr; 2359 } 2360 2361 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 2362 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 2363 SDLoc dl(Op); 2364 2365 if (Op.getValueType() == MVT::v2i64) { 2366 // When the operands themselves are v2i64 values, we need to do something 2367 // special because VSX has no underlying comparison operations for these. 2368 if (Op.getOperand(0).getValueType() == MVT::v2i64) { 2369 // Equality can be handled by casting to the legal type for Altivec 2370 // comparisons, everything else needs to be expanded. 2371 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 2372 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, 2373 DAG.getSetCC(dl, MVT::v4i32, 2374 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), 2375 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), 2376 CC)); 2377 } 2378 2379 return SDValue(); 2380 } 2381 2382 // We handle most of these in the usual way. 2383 return Op; 2384 } 2385 2386 // If we're comparing for equality to zero, expose the fact that this is 2387 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can 2388 // fold the new nodes. 2389 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG)) 2390 return V; 2391 2392 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 2393 // Leave comparisons against 0 and -1 alone for now, since they're usually 2394 // optimized. FIXME: revisit this when we can custom lower all setcc 2395 // optimizations. 2396 if (C->isAllOnesValue() || C->isNullValue()) 2397 return SDValue(); 2398 } 2399 2400 // If we have an integer seteq/setne, turn it into a compare against zero 2401 // by xor'ing the rhs with the lhs, which is faster than setting a 2402 // condition register, reading it back out, and masking the correct bit. The 2403 // normal approach here uses sub to do this instead of xor. Using xor exposes 2404 // the result to other bit-twiddling opportunities. 2405 EVT LHSVT = Op.getOperand(0).getValueType(); 2406 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 2407 EVT VT = Op.getValueType(); 2408 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 2409 Op.getOperand(1)); 2410 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC); 2411 } 2412 return SDValue(); 2413 } 2414 2415 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { 2416 SDNode *Node = Op.getNode(); 2417 EVT VT = Node->getValueType(0); 2418 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2419 SDValue InChain = Node->getOperand(0); 2420 SDValue VAListPtr = Node->getOperand(1); 2421 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 2422 SDLoc dl(Node); 2423 2424 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 2425 2426 // gpr_index 2427 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 2428 VAListPtr, MachinePointerInfo(SV), MVT::i8); 2429 InChain = GprIndex.getValue(1); 2430 2431 if (VT == MVT::i64) { 2432 // Check if GprIndex is even 2433 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 2434 DAG.getConstant(1, dl, MVT::i32)); 2435 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 2436 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE); 2437 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 2438 DAG.getConstant(1, dl, MVT::i32)); 2439 // Align GprIndex to be even if it isn't 2440 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 2441 GprIndex); 2442 } 2443 2444 // fpr index is 1 byte after gpr 2445 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2446 DAG.getConstant(1, dl, MVT::i32)); 2447 2448 // fpr 2449 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 2450 FprPtr, MachinePointerInfo(SV), MVT::i8); 2451 InChain = FprIndex.getValue(1); 2452 2453 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2454 DAG.getConstant(8, dl, MVT::i32)); 2455 2456 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2457 DAG.getConstant(4, dl, MVT::i32)); 2458 2459 // areas 2460 SDValue OverflowArea = 2461 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo()); 2462 InChain = OverflowArea.getValue(1); 2463 2464 SDValue RegSaveArea = 2465 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo()); 2466 InChain = RegSaveArea.getValue(1); 2467 2468 // select overflow_area if index > 8 2469 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 2470 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT); 2471 2472 // adjustment constant gpr_index * 4/8 2473 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 2474 VT.isInteger() ? GprIndex : FprIndex, 2475 DAG.getConstant(VT.isInteger() ? 4 : 8, dl, 2476 MVT::i32)); 2477 2478 // OurReg = RegSaveArea + RegConstant 2479 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 2480 RegConstant); 2481 2482 // Floating types are 32 bytes into RegSaveArea 2483 if (VT.isFloatingPoint()) 2484 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 2485 DAG.getConstant(32, dl, MVT::i32)); 2486 2487 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 2488 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 2489 VT.isInteger() ? GprIndex : FprIndex, 2490 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl, 2491 MVT::i32)); 2492 2493 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 2494 VT.isInteger() ? VAListPtr : FprPtr, 2495 MachinePointerInfo(SV), MVT::i8); 2496 2497 // determine if we should load from reg_save_area or overflow_area 2498 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 2499 2500 // increase overflow_area by 4/8 if gpr/fpr > 8 2501 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 2502 DAG.getConstant(VT.isInteger() ? 4 : 8, 2503 dl, MVT::i32)); 2504 2505 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 2506 OverflowAreaPlusN); 2507 2508 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr, 2509 MachinePointerInfo(), MVT::i32); 2510 2511 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo()); 2512 } 2513 2514 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { 2515 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); 2516 2517 // We have to copy the entire va_list struct: 2518 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte 2519 return DAG.getMemcpy(Op.getOperand(0), Op, 2520 Op.getOperand(1), Op.getOperand(2), 2521 DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true, 2522 false, MachinePointerInfo(), MachinePointerInfo()); 2523 } 2524 2525 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 2526 SelectionDAG &DAG) const { 2527 return Op.getOperand(0); 2528 } 2529 2530 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 2531 SelectionDAG &DAG) const { 2532 SDValue Chain = Op.getOperand(0); 2533 SDValue Trmp = Op.getOperand(1); // trampoline 2534 SDValue FPtr = Op.getOperand(2); // nested function 2535 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 2536 SDLoc dl(Op); 2537 2538 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2539 bool isPPC64 = (PtrVT == MVT::i64); 2540 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); 2541 2542 TargetLowering::ArgListTy Args; 2543 TargetLowering::ArgListEntry Entry; 2544 2545 Entry.Ty = IntPtrTy; 2546 Entry.Node = Trmp; Args.push_back(Entry); 2547 2548 // TrampSize == (isPPC64 ? 48 : 40); 2549 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl, 2550 isPPC64 ? MVT::i64 : MVT::i32); 2551 Args.push_back(Entry); 2552 2553 Entry.Node = FPtr; Args.push_back(Entry); 2554 Entry.Node = Nest; Args.push_back(Entry); 2555 2556 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 2557 TargetLowering::CallLoweringInfo CLI(DAG); 2558 CLI.setDebugLoc(dl).setChain(Chain) 2559 .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), 2560 DAG.getExternalSymbol("__trampoline_setup", PtrVT), 2561 std::move(Args)); 2562 2563 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 2564 return CallResult.second; 2565 } 2566 2567 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { 2568 MachineFunction &MF = DAG.getMachineFunction(); 2569 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2570 EVT PtrVT = getPointerTy(MF.getDataLayout()); 2571 2572 SDLoc dl(Op); 2573 2574 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 2575 // vastart just stores the address of the VarArgsFrameIndex slot into the 2576 // memory location argument. 2577 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2578 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2579 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2580 MachinePointerInfo(SV)); 2581 } 2582 2583 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 2584 // We suppose the given va_list is already allocated. 2585 // 2586 // typedef struct { 2587 // char gpr; /* index into the array of 8 GPRs 2588 // * stored in the register save area 2589 // * gpr=0 corresponds to r3, 2590 // * gpr=1 to r4, etc. 2591 // */ 2592 // char fpr; /* index into the array of 8 FPRs 2593 // * stored in the register save area 2594 // * fpr=0 corresponds to f1, 2595 // * fpr=1 to f2, etc. 2596 // */ 2597 // char *overflow_arg_area; 2598 // /* location on stack that holds 2599 // * the next overflow argument 2600 // */ 2601 // char *reg_save_area; 2602 // /* where r3:r10 and f1:f8 (if saved) 2603 // * are stored 2604 // */ 2605 // } va_list[1]; 2606 2607 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); 2608 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); 2609 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 2610 PtrVT); 2611 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 2612 PtrVT); 2613 2614 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 2615 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT); 2616 2617 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 2618 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT); 2619 2620 uint64_t FPROffset = 1; 2621 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT); 2622 2623 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2624 2625 // Store first byte : number of int regs 2626 SDValue firstStore = 2627 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1), 2628 MachinePointerInfo(SV), MVT::i8); 2629 uint64_t nextOffset = FPROffset; 2630 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 2631 ConstFPROffset); 2632 2633 // Store second byte : number of float regs 2634 SDValue secondStore = 2635 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 2636 MachinePointerInfo(SV, nextOffset), MVT::i8); 2637 nextOffset += StackOffset; 2638 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 2639 2640 // Store second word : arguments given on stack 2641 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 2642 MachinePointerInfo(SV, nextOffset)); 2643 nextOffset += FrameOffset; 2644 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 2645 2646 // Store third word : arguments given in registers 2647 return DAG.getStore(thirdStore, dl, FR, nextPtr, 2648 MachinePointerInfo(SV, nextOffset)); 2649 } 2650 2651 #include "PPCGenCallingConv.inc" 2652 2653 // Function whose sole purpose is to kill compiler warnings 2654 // stemming from unused functions included from PPCGenCallingConv.inc. 2655 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { 2656 return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; 2657 } 2658 2659 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 2660 CCValAssign::LocInfo &LocInfo, 2661 ISD::ArgFlagsTy &ArgFlags, 2662 CCState &State) { 2663 return true; 2664 } 2665 2666 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 2667 MVT &LocVT, 2668 CCValAssign::LocInfo &LocInfo, 2669 ISD::ArgFlagsTy &ArgFlags, 2670 CCState &State) { 2671 static const MCPhysReg ArgRegs[] = { 2672 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2673 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2674 }; 2675 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2676 2677 unsigned RegNum = State.getFirstUnallocated(ArgRegs); 2678 2679 // Skip one register if the first unallocated register has an even register 2680 // number and there are still argument registers available which have not been 2681 // allocated yet. RegNum is actually an index into ArgRegs, which means we 2682 // need to skip a register if RegNum is odd. 2683 if (RegNum != NumArgRegs && RegNum % 2 == 1) { 2684 State.AllocateReg(ArgRegs[RegNum]); 2685 } 2686 2687 // Always return false here, as this function only makes sure that the first 2688 // unallocated register has an odd register number and does not actually 2689 // allocate a register for the current argument. 2690 return false; 2691 } 2692 2693 bool 2694 llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT, 2695 MVT &LocVT, 2696 CCValAssign::LocInfo &LocInfo, 2697 ISD::ArgFlagsTy &ArgFlags, 2698 CCState &State) { 2699 static const MCPhysReg ArgRegs[] = { 2700 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2701 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2702 }; 2703 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2704 2705 unsigned RegNum = State.getFirstUnallocated(ArgRegs); 2706 int RegsLeft = NumArgRegs - RegNum; 2707 2708 // Skip if there is not enough registers left for long double type (4 gpr regs 2709 // in soft float mode) and put long double argument on the stack. 2710 if (RegNum != NumArgRegs && RegsLeft < 4) { 2711 for (int i = 0; i < RegsLeft; i++) { 2712 State.AllocateReg(ArgRegs[RegNum + i]); 2713 } 2714 } 2715 2716 return false; 2717 } 2718 2719 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 2720 MVT &LocVT, 2721 CCValAssign::LocInfo &LocInfo, 2722 ISD::ArgFlagsTy &ArgFlags, 2723 CCState &State) { 2724 static const MCPhysReg ArgRegs[] = { 2725 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2726 PPC::F8 2727 }; 2728 2729 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2730 2731 unsigned RegNum = State.getFirstUnallocated(ArgRegs); 2732 2733 // If there is only one Floating-point register left we need to put both f64 2734 // values of a split ppc_fp128 value on the stack. 2735 if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { 2736 State.AllocateReg(ArgRegs[RegNum]); 2737 } 2738 2739 // Always return false here, as this function only makes sure that the two f64 2740 // values a ppc_fp128 value is split into are both passed in registers or both 2741 // passed on the stack and does not actually allocate a register for the 2742 // current argument. 2743 return false; 2744 } 2745 2746 /// FPR - The set of FP registers that should be allocated for arguments, 2747 /// on Darwin. 2748 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, 2749 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, 2750 PPC::F11, PPC::F12, PPC::F13}; 2751 2752 /// QFPR - The set of QPX registers that should be allocated for arguments. 2753 static const MCPhysReg QFPR[] = { 2754 PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, 2755 PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13}; 2756 2757 /// CalculateStackSlotSize - Calculates the size reserved for this argument on 2758 /// the stack. 2759 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 2760 unsigned PtrByteSize) { 2761 unsigned ArgSize = ArgVT.getStoreSize(); 2762 if (Flags.isByVal()) 2763 ArgSize = Flags.getByValSize(); 2764 2765 // Round up to multiples of the pointer size, except for array members, 2766 // which are always packed. 2767 if (!Flags.isInConsecutiveRegs()) 2768 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2769 2770 return ArgSize; 2771 } 2772 2773 /// CalculateStackSlotAlignment - Calculates the alignment of this argument 2774 /// on the stack. 2775 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, 2776 ISD::ArgFlagsTy Flags, 2777 unsigned PtrByteSize) { 2778 unsigned Align = PtrByteSize; 2779 2780 // Altivec parameters are padded to a 16 byte boundary. 2781 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2782 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2783 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || 2784 ArgVT == MVT::v1i128) 2785 Align = 16; 2786 // QPX vector types stored in double-precision are padded to a 32 byte 2787 // boundary. 2788 else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1) 2789 Align = 32; 2790 2791 // ByVal parameters are aligned as requested. 2792 if (Flags.isByVal()) { 2793 unsigned BVAlign = Flags.getByValAlign(); 2794 if (BVAlign > PtrByteSize) { 2795 if (BVAlign % PtrByteSize != 0) 2796 llvm_unreachable( 2797 "ByVal alignment is not a multiple of the pointer size"); 2798 2799 Align = BVAlign; 2800 } 2801 } 2802 2803 // Array members are always packed to their original alignment. 2804 if (Flags.isInConsecutiveRegs()) { 2805 // If the array member was split into multiple registers, the first 2806 // needs to be aligned to the size of the full type. (Except for 2807 // ppcf128, which is only aligned as its f64 components.) 2808 if (Flags.isSplit() && OrigVT != MVT::ppcf128) 2809 Align = OrigVT.getStoreSize(); 2810 else 2811 Align = ArgVT.getStoreSize(); 2812 } 2813 2814 return Align; 2815 } 2816 2817 /// CalculateStackSlotUsed - Return whether this argument will use its 2818 /// stack slot (instead of being passed in registers). ArgOffset, 2819 /// AvailableFPRs, and AvailableVRs must hold the current argument 2820 /// position, and will be updated to account for this argument. 2821 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, 2822 ISD::ArgFlagsTy Flags, 2823 unsigned PtrByteSize, 2824 unsigned LinkageSize, 2825 unsigned ParamAreaSize, 2826 unsigned &ArgOffset, 2827 unsigned &AvailableFPRs, 2828 unsigned &AvailableVRs, bool HasQPX) { 2829 bool UseMemory = false; 2830 2831 // Respect alignment of argument on the stack. 2832 unsigned Align = 2833 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 2834 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 2835 // If there's no space left in the argument save area, we must 2836 // use memory (this check also catches zero-sized arguments). 2837 if (ArgOffset >= LinkageSize + ParamAreaSize) 2838 UseMemory = true; 2839 2840 // Allocate argument on the stack. 2841 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 2842 if (Flags.isInConsecutiveRegsLast()) 2843 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2844 // If we overran the argument save area, we must use memory 2845 // (this check catches arguments passed partially in memory) 2846 if (ArgOffset > LinkageSize + ParamAreaSize) 2847 UseMemory = true; 2848 2849 // However, if the argument is actually passed in an FPR or a VR, 2850 // we don't use memory after all. 2851 if (!Flags.isByVal()) { 2852 if (ArgVT == MVT::f32 || ArgVT == MVT::f64 || 2853 // QPX registers overlap with the scalar FP registers. 2854 (HasQPX && (ArgVT == MVT::v4f32 || 2855 ArgVT == MVT::v4f64 || 2856 ArgVT == MVT::v4i1))) 2857 if (AvailableFPRs > 0) { 2858 --AvailableFPRs; 2859 return false; 2860 } 2861 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2862 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2863 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || 2864 ArgVT == MVT::v1i128) 2865 if (AvailableVRs > 0) { 2866 --AvailableVRs; 2867 return false; 2868 } 2869 } 2870 2871 return UseMemory; 2872 } 2873 2874 /// EnsureStackAlignment - Round stack frame size up from NumBytes to 2875 /// ensure minimum alignment required for target. 2876 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, 2877 unsigned NumBytes) { 2878 unsigned TargetAlign = Lowering->getStackAlignment(); 2879 unsigned AlignMask = TargetAlign - 1; 2880 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 2881 return NumBytes; 2882 } 2883 2884 SDValue PPCTargetLowering::LowerFormalArguments( 2885 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 2886 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 2887 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 2888 if (Subtarget.isSVR4ABI()) { 2889 if (Subtarget.isPPC64()) 2890 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, 2891 dl, DAG, InVals); 2892 else 2893 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, 2894 dl, DAG, InVals); 2895 } else { 2896 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 2897 dl, DAG, InVals); 2898 } 2899 } 2900 2901 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( 2902 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 2903 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 2904 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 2905 2906 // 32-bit SVR4 ABI Stack Frame Layout: 2907 // +-----------------------------------+ 2908 // +--> | Back chain | 2909 // | +-----------------------------------+ 2910 // | | Floating-point register save area | 2911 // | +-----------------------------------+ 2912 // | | General register save area | 2913 // | +-----------------------------------+ 2914 // | | CR save word | 2915 // | +-----------------------------------+ 2916 // | | VRSAVE save word | 2917 // | +-----------------------------------+ 2918 // | | Alignment padding | 2919 // | +-----------------------------------+ 2920 // | | Vector register save area | 2921 // | +-----------------------------------+ 2922 // | | Local variable space | 2923 // | +-----------------------------------+ 2924 // | | Parameter list area | 2925 // | +-----------------------------------+ 2926 // | | LR save word | 2927 // | +-----------------------------------+ 2928 // SP--> +--- | Back chain | 2929 // +-----------------------------------+ 2930 // 2931 // Specifications: 2932 // System V Application Binary Interface PowerPC Processor Supplement 2933 // AltiVec Technology Programming Interface Manual 2934 2935 MachineFunction &MF = DAG.getMachineFunction(); 2936 MachineFrameInfo &MFI = MF.getFrameInfo(); 2937 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2938 2939 EVT PtrVT = getPointerTy(MF.getDataLayout()); 2940 // Potential tail calls could cause overwriting of argument stack slots. 2941 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2942 (CallConv == CallingConv::Fast)); 2943 unsigned PtrByteSize = 4; 2944 2945 // Assign locations to all of the incoming arguments. 2946 SmallVector<CCValAssign, 16> ArgLocs; 2947 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 2948 *DAG.getContext()); 2949 2950 // Reserve space for the linkage area on the stack. 2951 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 2952 CCInfo.AllocateStack(LinkageSize, PtrByteSize); 2953 if (useSoftFloat()) 2954 CCInfo.PreAnalyzeFormalArguments(Ins); 2955 2956 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); 2957 CCInfo.clearWasPPCF128(); 2958 2959 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2960 CCValAssign &VA = ArgLocs[i]; 2961 2962 // Arguments stored in registers. 2963 if (VA.isRegLoc()) { 2964 const TargetRegisterClass *RC; 2965 EVT ValVT = VA.getValVT(); 2966 2967 switch (ValVT.getSimpleVT().SimpleTy) { 2968 default: 2969 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 2970 case MVT::i1: 2971 case MVT::i32: 2972 RC = &PPC::GPRCRegClass; 2973 break; 2974 case MVT::f32: 2975 if (Subtarget.hasP8Vector()) 2976 RC = &PPC::VSSRCRegClass; 2977 else 2978 RC = &PPC::F4RCRegClass; 2979 break; 2980 case MVT::f64: 2981 if (Subtarget.hasVSX()) 2982 RC = &PPC::VSFRCRegClass; 2983 else 2984 RC = &PPC::F8RCRegClass; 2985 break; 2986 case MVT::v16i8: 2987 case MVT::v8i16: 2988 case MVT::v4i32: 2989 RC = &PPC::VRRCRegClass; 2990 break; 2991 case MVT::v4f32: 2992 RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass; 2993 break; 2994 case MVT::v2f64: 2995 case MVT::v2i64: 2996 RC = &PPC::VRRCRegClass; 2997 break; 2998 case MVT::v4f64: 2999 RC = &PPC::QFRCRegClass; 3000 break; 3001 case MVT::v4i1: 3002 RC = &PPC::QBRCRegClass; 3003 break; 3004 } 3005 3006 // Transform the arguments stored in physical registers into virtual ones. 3007 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 3008 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, 3009 ValVT == MVT::i1 ? MVT::i32 : ValVT); 3010 3011 if (ValVT == MVT::i1) 3012 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); 3013 3014 InVals.push_back(ArgValue); 3015 } else { 3016 // Argument stored in memory. 3017 assert(VA.isMemLoc()); 3018 3019 unsigned ArgSize = VA.getLocVT().getStoreSize(); 3020 int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(), 3021 isImmutable); 3022 3023 // Create load nodes to retrieve arguments from the stack. 3024 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3025 InVals.push_back( 3026 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo())); 3027 } 3028 } 3029 3030 // Assign locations to all of the incoming aggregate by value arguments. 3031 // Aggregates passed by value are stored in the local variable space of the 3032 // caller's stack frame, right above the parameter list area. 3033 SmallVector<CCValAssign, 16> ByValArgLocs; 3034 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3035 ByValArgLocs, *DAG.getContext()); 3036 3037 // Reserve stack space for the allocations in CCInfo. 3038 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 3039 3040 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); 3041 3042 // Area that is at least reserved in the caller of this function. 3043 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 3044 MinReservedArea = std::max(MinReservedArea, LinkageSize); 3045 3046 // Set the size that is at least reserved in caller of this function. Tail 3047 // call optimized function's reserved stack space needs to be aligned so that 3048 // taking the difference between two stack areas will result in an aligned 3049 // stack. 3050 MinReservedArea = 3051 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3052 FuncInfo->setMinReservedArea(MinReservedArea); 3053 3054 SmallVector<SDValue, 8> MemOps; 3055 3056 // If the function takes variable number of arguments, make a frame index for 3057 // the start of the first vararg value... for expansion of llvm.va_start. 3058 if (isVarArg) { 3059 static const MCPhysReg GPArgRegs[] = { 3060 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 3061 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 3062 }; 3063 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 3064 3065 static const MCPhysReg FPArgRegs[] = { 3066 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 3067 PPC::F8 3068 }; 3069 unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 3070 3071 if (useSoftFloat()) 3072 NumFPArgRegs = 0; 3073 3074 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); 3075 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs)); 3076 3077 // Make room for NumGPArgRegs and NumFPArgRegs. 3078 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 3079 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; 3080 3081 FuncInfo->setVarArgsStackOffset( 3082 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8, 3083 CCInfo.getNextStackOffset(), true)); 3084 3085 FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false)); 3086 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3087 3088 // The fixed integer arguments of a variadic function are stored to the 3089 // VarArgsFrameIndex on the stack so that they may be loaded by 3090 // dereferencing the result of va_next. 3091 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 3092 // Get an existing live-in vreg, or add a new one. 3093 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 3094 if (!VReg) 3095 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 3096 3097 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3098 SDValue Store = 3099 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3100 MemOps.push_back(Store); 3101 // Increment the address by four for the next argument to store 3102 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); 3103 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3104 } 3105 3106 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 3107 // is set. 3108 // The double arguments are stored to the VarArgsFrameIndex 3109 // on the stack. 3110 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 3111 // Get an existing live-in vreg, or add a new one. 3112 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 3113 if (!VReg) 3114 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 3115 3116 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 3117 SDValue Store = 3118 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3119 MemOps.push_back(Store); 3120 // Increment the address by eight for the next argument to store 3121 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl, 3122 PtrVT); 3123 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3124 } 3125 } 3126 3127 if (!MemOps.empty()) 3128 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3129 3130 return Chain; 3131 } 3132 3133 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3134 // value to MVT::i64 and then truncate to the correct register size. 3135 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, 3136 EVT ObjectVT, SelectionDAG &DAG, 3137 SDValue ArgVal, 3138 const SDLoc &dl) const { 3139 if (Flags.isSExt()) 3140 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 3141 DAG.getValueType(ObjectVT)); 3142 else if (Flags.isZExt()) 3143 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 3144 DAG.getValueType(ObjectVT)); 3145 3146 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); 3147 } 3148 3149 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( 3150 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 3151 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 3152 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3153 // TODO: add description of PPC stack frame format, or at least some docs. 3154 // 3155 bool isELFv2ABI = Subtarget.isELFv2ABI(); 3156 bool isLittleEndian = Subtarget.isLittleEndian(); 3157 MachineFunction &MF = DAG.getMachineFunction(); 3158 MachineFrameInfo &MFI = MF.getFrameInfo(); 3159 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3160 3161 assert(!(CallConv == CallingConv::Fast && isVarArg) && 3162 "fastcc not supported on varargs functions"); 3163 3164 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3165 // Potential tail calls could cause overwriting of argument stack slots. 3166 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 3167 (CallConv == CallingConv::Fast)); 3168 unsigned PtrByteSize = 8; 3169 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3170 3171 static const MCPhysReg GPR[] = { 3172 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3173 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3174 }; 3175 static const MCPhysReg VR[] = { 3176 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3177 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3178 }; 3179 3180 const unsigned Num_GPR_Regs = array_lengthof(GPR); 3181 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13; 3182 const unsigned Num_VR_Regs = array_lengthof(VR); 3183 const unsigned Num_QFPR_Regs = Num_FPR_Regs; 3184 3185 // Do a first pass over the arguments to determine whether the ABI 3186 // guarantees that our caller has allocated the parameter save area 3187 // on its stack frame. In the ELFv1 ABI, this is always the case; 3188 // in the ELFv2 ABI, it is true if this is a vararg function or if 3189 // any parameter is located in a stack slot. 3190 3191 bool HasParameterArea = !isELFv2ABI || isVarArg; 3192 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; 3193 unsigned NumBytes = LinkageSize; 3194 unsigned AvailableFPRs = Num_FPR_Regs; 3195 unsigned AvailableVRs = Num_VR_Regs; 3196 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 3197 if (Ins[i].Flags.isNest()) 3198 continue; 3199 3200 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, 3201 PtrByteSize, LinkageSize, ParamAreaSize, 3202 NumBytes, AvailableFPRs, AvailableVRs, 3203 Subtarget.hasQPX())) 3204 HasParameterArea = true; 3205 } 3206 3207 // Add DAG nodes to load the arguments or copy them out of registers. On 3208 // entry to a function on PPC, the arguments start after the linkage area, 3209 // although the first ones are often in registers. 3210 3211 unsigned ArgOffset = LinkageSize; 3212 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3213 unsigned &QFPR_idx = FPR_idx; 3214 SmallVector<SDValue, 8> MemOps; 3215 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 3216 unsigned CurArgIdx = 0; 3217 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 3218 SDValue ArgVal; 3219 bool needsLoad = false; 3220 EVT ObjectVT = Ins[ArgNo].VT; 3221 EVT OrigVT = Ins[ArgNo].ArgVT; 3222 unsigned ObjSize = ObjectVT.getStoreSize(); 3223 unsigned ArgSize = ObjSize; 3224 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3225 if (Ins[ArgNo].isOrigArg()) { 3226 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 3227 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 3228 } 3229 // We re-align the argument offset for each argument, except when using the 3230 // fast calling convention, when we need to make sure we do that only when 3231 // we'll actually use a stack slot. 3232 unsigned CurArgOffset, Align; 3233 auto ComputeArgOffset = [&]() { 3234 /* Respect alignment of argument on the stack. */ 3235 Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); 3236 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 3237 CurArgOffset = ArgOffset; 3238 }; 3239 3240 if (CallConv != CallingConv::Fast) { 3241 ComputeArgOffset(); 3242 3243 /* Compute GPR index associated with argument offset. */ 3244 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 3245 GPR_idx = std::min(GPR_idx, Num_GPR_Regs); 3246 } 3247 3248 // FIXME the codegen can be much improved in some cases. 3249 // We do not have to keep everything in memory. 3250 if (Flags.isByVal()) { 3251 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 3252 3253 if (CallConv == CallingConv::Fast) 3254 ComputeArgOffset(); 3255 3256 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 3257 ObjSize = Flags.getByValSize(); 3258 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3259 // Empty aggregate parameters do not take up registers. Examples: 3260 // struct { } a; 3261 // union { } b; 3262 // int c[0]; 3263 // etc. However, we have to provide a place-holder in InVals, so 3264 // pretend we have an 8-byte item at the current address for that 3265 // purpose. 3266 if (!ObjSize) { 3267 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true); 3268 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3269 InVals.push_back(FIN); 3270 continue; 3271 } 3272 3273 // Create a stack object covering all stack doublewords occupied 3274 // by the argument. If the argument is (fully or partially) on 3275 // the stack, or if the argument is fully in registers but the 3276 // caller has allocated the parameter save anyway, we can refer 3277 // directly to the caller's stack frame. Otherwise, create a 3278 // local copy in our own frame. 3279 int FI; 3280 if (HasParameterArea || 3281 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize) 3282 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true); 3283 else 3284 FI = MFI.CreateStackObject(ArgSize, Align, false); 3285 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3286 3287 // Handle aggregates smaller than 8 bytes. 3288 if (ObjSize < PtrByteSize) { 3289 // The value of the object is its address, which differs from the 3290 // address of the enclosing doubleword on big-endian systems. 3291 SDValue Arg = FIN; 3292 if (!isLittleEndian) { 3293 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT); 3294 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); 3295 } 3296 InVals.push_back(Arg); 3297 3298 if (GPR_idx != Num_GPR_Regs) { 3299 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3300 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3301 SDValue Store; 3302 3303 if (ObjSize==1 || ObjSize==2 || ObjSize==4) { 3304 EVT ObjType = (ObjSize == 1 ? MVT::i8 : 3305 (ObjSize == 2 ? MVT::i16 : MVT::i32)); 3306 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, 3307 MachinePointerInfo(&*FuncArg), ObjType); 3308 } else { 3309 // For sizes that don't fit a truncating store (3, 5, 6, 7), 3310 // store the whole register as-is to the parameter save area 3311 // slot. 3312 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3313 MachinePointerInfo(&*FuncArg)); 3314 } 3315 3316 MemOps.push_back(Store); 3317 } 3318 // Whether we copied from a register or not, advance the offset 3319 // into the parameter save area by a full doubleword. 3320 ArgOffset += PtrByteSize; 3321 continue; 3322 } 3323 3324 // The value of the object is its address, which is the address of 3325 // its first stack doubleword. 3326 InVals.push_back(FIN); 3327 3328 // Store whatever pieces of the object are in registers to memory. 3329 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3330 if (GPR_idx == Num_GPR_Regs) 3331 break; 3332 3333 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3334 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3335 SDValue Addr = FIN; 3336 if (j) { 3337 SDValue Off = DAG.getConstant(j, dl, PtrVT); 3338 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); 3339 } 3340 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, 3341 MachinePointerInfo(&*FuncArg, j)); 3342 MemOps.push_back(Store); 3343 ++GPR_idx; 3344 } 3345 ArgOffset += ArgSize; 3346 continue; 3347 } 3348 3349 switch (ObjectVT.getSimpleVT().SimpleTy) { 3350 default: llvm_unreachable("Unhandled argument type!"); 3351 case MVT::i1: 3352 case MVT::i32: 3353 case MVT::i64: 3354 if (Flags.isNest()) { 3355 // The 'nest' parameter, if any, is passed in R11. 3356 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass); 3357 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3358 3359 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3360 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3361 3362 break; 3363 } 3364 3365 // These can be scalar arguments or elements of an integer array type 3366 // passed directly. Clang may use those instead of "byval" aggregate 3367 // types to avoid forcing arguments to memory unnecessarily. 3368 if (GPR_idx != Num_GPR_Regs) { 3369 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3370 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3371 3372 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3373 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3374 // value to MVT::i64 and then truncate to the correct register size. 3375 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3376 } else { 3377 if (CallConv == CallingConv::Fast) 3378 ComputeArgOffset(); 3379 3380 needsLoad = true; 3381 ArgSize = PtrByteSize; 3382 } 3383 if (CallConv != CallingConv::Fast || needsLoad) 3384 ArgOffset += 8; 3385 break; 3386 3387 case MVT::f32: 3388 case MVT::f64: 3389 // These can be scalar arguments or elements of a float array type 3390 // passed directly. The latter are used to implement ELFv2 homogenous 3391 // float aggregates. 3392 if (FPR_idx != Num_FPR_Regs) { 3393 unsigned VReg; 3394 3395 if (ObjectVT == MVT::f32) 3396 VReg = MF.addLiveIn(FPR[FPR_idx], 3397 Subtarget.hasP8Vector() 3398 ? &PPC::VSSRCRegClass 3399 : &PPC::F4RCRegClass); 3400 else 3401 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() 3402 ? &PPC::VSFRCRegClass 3403 : &PPC::F8RCRegClass); 3404 3405 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3406 ++FPR_idx; 3407 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) { 3408 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 3409 // once we support fp <-> gpr moves. 3410 3411 // This can only ever happen in the presence of f32 array types, 3412 // since otherwise we never run out of FPRs before running out 3413 // of GPRs. 3414 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3415 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3416 3417 if (ObjectVT == MVT::f32) { 3418 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0)) 3419 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, 3420 DAG.getConstant(32, dl, MVT::i32)); 3421 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); 3422 } 3423 3424 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); 3425 } else { 3426 if (CallConv == CallingConv::Fast) 3427 ComputeArgOffset(); 3428 3429 needsLoad = true; 3430 } 3431 3432 // When passing an array of floats, the array occupies consecutive 3433 // space in the argument area; only round up to the next doubleword 3434 // at the end of the array. Otherwise, each float takes 8 bytes. 3435 if (CallConv != CallingConv::Fast || needsLoad) { 3436 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; 3437 ArgOffset += ArgSize; 3438 if (Flags.isInConsecutiveRegsLast()) 3439 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3440 } 3441 break; 3442 case MVT::v4f32: 3443 case MVT::v4i32: 3444 case MVT::v8i16: 3445 case MVT::v16i8: 3446 case MVT::v2f64: 3447 case MVT::v2i64: 3448 case MVT::v1i128: 3449 if (!Subtarget.hasQPX()) { 3450 // These can be scalar arguments or elements of a vector array type 3451 // passed directly. The latter are used to implement ELFv2 homogenous 3452 // vector aggregates. 3453 if (VR_idx != Num_VR_Regs) { 3454 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3455 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3456 ++VR_idx; 3457 } else { 3458 if (CallConv == CallingConv::Fast) 3459 ComputeArgOffset(); 3460 3461 needsLoad = true; 3462 } 3463 if (CallConv != CallingConv::Fast || needsLoad) 3464 ArgOffset += 16; 3465 break; 3466 } // not QPX 3467 3468 assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && 3469 "Invalid QPX parameter type"); 3470 /* fall through */ 3471 3472 case MVT::v4f64: 3473 case MVT::v4i1: 3474 // QPX vectors are treated like their scalar floating-point subregisters 3475 // (except that they're larger). 3476 unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32; 3477 if (QFPR_idx != Num_QFPR_Regs) { 3478 const TargetRegisterClass *RC; 3479 switch (ObjectVT.getSimpleVT().SimpleTy) { 3480 case MVT::v4f64: RC = &PPC::QFRCRegClass; break; 3481 case MVT::v4f32: RC = &PPC::QSRCRegClass; break; 3482 default: RC = &PPC::QBRCRegClass; break; 3483 } 3484 3485 unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC); 3486 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3487 ++QFPR_idx; 3488 } else { 3489 if (CallConv == CallingConv::Fast) 3490 ComputeArgOffset(); 3491 needsLoad = true; 3492 } 3493 if (CallConv != CallingConv::Fast || needsLoad) 3494 ArgOffset += Sz; 3495 break; 3496 } 3497 3498 // We need to load the argument to a virtual register if we determined 3499 // above that we ran out of physical registers of the appropriate type. 3500 if (needsLoad) { 3501 if (ObjSize < ArgSize && !isLittleEndian) 3502 CurArgOffset += ArgSize - ObjSize; 3503 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable); 3504 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3505 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); 3506 } 3507 3508 InVals.push_back(ArgVal); 3509 } 3510 3511 // Area that is at least reserved in the caller of this function. 3512 unsigned MinReservedArea; 3513 if (HasParameterArea) 3514 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); 3515 else 3516 MinReservedArea = LinkageSize; 3517 3518 // Set the size that is at least reserved in caller of this function. Tail 3519 // call optimized functions' reserved stack space needs to be aligned so that 3520 // taking the difference between two stack areas will result in an aligned 3521 // stack. 3522 MinReservedArea = 3523 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3524 FuncInfo->setMinReservedArea(MinReservedArea); 3525 3526 // If the function takes variable number of arguments, make a frame index for 3527 // the start of the first vararg value... for expansion of llvm.va_start. 3528 if (isVarArg) { 3529 int Depth = ArgOffset; 3530 3531 FuncInfo->setVarArgsFrameIndex( 3532 MFI.CreateFixedObject(PtrByteSize, Depth, true)); 3533 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3534 3535 // If this function is vararg, store any remaining integer argument regs 3536 // to their spots on the stack so that they may be loaded by dereferencing 3537 // the result of va_next. 3538 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 3539 GPR_idx < Num_GPR_Regs; ++GPR_idx) { 3540 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3541 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3542 SDValue Store = 3543 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3544 MemOps.push_back(Store); 3545 // Increment the address by four for the next argument to store 3546 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT); 3547 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3548 } 3549 } 3550 3551 if (!MemOps.empty()) 3552 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3553 3554 return Chain; 3555 } 3556 3557 SDValue PPCTargetLowering::LowerFormalArguments_Darwin( 3558 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 3559 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 3560 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3561 // TODO: add description of PPC stack frame format, or at least some docs. 3562 // 3563 MachineFunction &MF = DAG.getMachineFunction(); 3564 MachineFrameInfo &MFI = MF.getFrameInfo(); 3565 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3566 3567 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3568 bool isPPC64 = PtrVT == MVT::i64; 3569 // Potential tail calls could cause overwriting of argument stack slots. 3570 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 3571 (CallConv == CallingConv::Fast)); 3572 unsigned PtrByteSize = isPPC64 ? 8 : 4; 3573 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3574 unsigned ArgOffset = LinkageSize; 3575 // Area that is at least reserved in caller of this function. 3576 unsigned MinReservedArea = ArgOffset; 3577 3578 static const MCPhysReg GPR_32[] = { // 32-bit registers. 3579 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 3580 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 3581 }; 3582 static const MCPhysReg GPR_64[] = { // 64-bit registers. 3583 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3584 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3585 }; 3586 static const MCPhysReg VR[] = { 3587 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3588 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3589 }; 3590 3591 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 3592 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13; 3593 const unsigned Num_VR_Regs = array_lengthof( VR); 3594 3595 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3596 3597 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 3598 3599 // In 32-bit non-varargs functions, the stack space for vectors is after the 3600 // stack space for non-vectors. We do not use this space unless we have 3601 // too many vectors to fit in registers, something that only occurs in 3602 // constructed examples:), but we have to walk the arglist to figure 3603 // that out...for the pathological case, compute VecArgOffset as the 3604 // start of the vector parameter area. Computing VecArgOffset is the 3605 // entire point of the following loop. 3606 unsigned VecArgOffset = ArgOffset; 3607 if (!isVarArg && !isPPC64) { 3608 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 3609 ++ArgNo) { 3610 EVT ObjectVT = Ins[ArgNo].VT; 3611 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3612 3613 if (Flags.isByVal()) { 3614 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 3615 unsigned ObjSize = Flags.getByValSize(); 3616 unsigned ArgSize = 3617 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3618 VecArgOffset += ArgSize; 3619 continue; 3620 } 3621 3622 switch(ObjectVT.getSimpleVT().SimpleTy) { 3623 default: llvm_unreachable("Unhandled argument type!"); 3624 case MVT::i1: 3625 case MVT::i32: 3626 case MVT::f32: 3627 VecArgOffset += 4; 3628 break; 3629 case MVT::i64: // PPC64 3630 case MVT::f64: 3631 // FIXME: We are guaranteed to be !isPPC64 at this point. 3632 // Does MVT::i64 apply? 3633 VecArgOffset += 8; 3634 break; 3635 case MVT::v4f32: 3636 case MVT::v4i32: 3637 case MVT::v8i16: 3638 case MVT::v16i8: 3639 // Nothing to do, we're only looking at Nonvector args here. 3640 break; 3641 } 3642 } 3643 } 3644 // We've found where the vector parameter area in memory is. Skip the 3645 // first 12 parameters; these don't use that memory. 3646 VecArgOffset = ((VecArgOffset+15)/16)*16; 3647 VecArgOffset += 12*16; 3648 3649 // Add DAG nodes to load the arguments or copy them out of registers. On 3650 // entry to a function on PPC, the arguments start after the linkage area, 3651 // although the first ones are often in registers. 3652 3653 SmallVector<SDValue, 8> MemOps; 3654 unsigned nAltivecParamsAtEnd = 0; 3655 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 3656 unsigned CurArgIdx = 0; 3657 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 3658 SDValue ArgVal; 3659 bool needsLoad = false; 3660 EVT ObjectVT = Ins[ArgNo].VT; 3661 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 3662 unsigned ArgSize = ObjSize; 3663 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3664 if (Ins[ArgNo].isOrigArg()) { 3665 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 3666 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 3667 } 3668 unsigned CurArgOffset = ArgOffset; 3669 3670 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 3671 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 3672 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 3673 if (isVarArg || isPPC64) { 3674 MinReservedArea = ((MinReservedArea+15)/16)*16; 3675 MinReservedArea += CalculateStackSlotSize(ObjectVT, 3676 Flags, 3677 PtrByteSize); 3678 } else nAltivecParamsAtEnd++; 3679 } else 3680 // Calculate min reserved area. 3681 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 3682 Flags, 3683 PtrByteSize); 3684 3685 // FIXME the codegen can be much improved in some cases. 3686 // We do not have to keep everything in memory. 3687 if (Flags.isByVal()) { 3688 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 3689 3690 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 3691 ObjSize = Flags.getByValSize(); 3692 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3693 // Objects of size 1 and 2 are right justified, everything else is 3694 // left justified. This means the memory address is adjusted forwards. 3695 if (ObjSize==1 || ObjSize==2) { 3696 CurArgOffset = CurArgOffset + (4 - ObjSize); 3697 } 3698 // The value of the object is its address. 3699 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true); 3700 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3701 InVals.push_back(FIN); 3702 if (ObjSize==1 || ObjSize==2) { 3703 if (GPR_idx != Num_GPR_Regs) { 3704 unsigned VReg; 3705 if (isPPC64) 3706 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3707 else 3708 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3709 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3710 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; 3711 SDValue Store = 3712 DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 3713 MachinePointerInfo(&*FuncArg), ObjType); 3714 MemOps.push_back(Store); 3715 ++GPR_idx; 3716 } 3717 3718 ArgOffset += PtrByteSize; 3719 3720 continue; 3721 } 3722 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3723 // Store whatever pieces of the object are in registers 3724 // to memory. ArgOffset will be the address of the beginning 3725 // of the object. 3726 if (GPR_idx != Num_GPR_Regs) { 3727 unsigned VReg; 3728 if (isPPC64) 3729 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3730 else 3731 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3732 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true); 3733 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3734 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3735 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3736 MachinePointerInfo(&*FuncArg, j)); 3737 MemOps.push_back(Store); 3738 ++GPR_idx; 3739 ArgOffset += PtrByteSize; 3740 } else { 3741 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 3742 break; 3743 } 3744 } 3745 continue; 3746 } 3747 3748 switch (ObjectVT.getSimpleVT().SimpleTy) { 3749 default: llvm_unreachable("Unhandled argument type!"); 3750 case MVT::i1: 3751 case MVT::i32: 3752 if (!isPPC64) { 3753 if (GPR_idx != Num_GPR_Regs) { 3754 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3755 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 3756 3757 if (ObjectVT == MVT::i1) 3758 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); 3759 3760 ++GPR_idx; 3761 } else { 3762 needsLoad = true; 3763 ArgSize = PtrByteSize; 3764 } 3765 // All int arguments reserve stack space in the Darwin ABI. 3766 ArgOffset += PtrByteSize; 3767 break; 3768 } 3769 LLVM_FALLTHROUGH; 3770 case MVT::i64: // PPC64 3771 if (GPR_idx != Num_GPR_Regs) { 3772 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3773 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3774 3775 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3776 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3777 // value to MVT::i64 and then truncate to the correct register size. 3778 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3779 3780 ++GPR_idx; 3781 } else { 3782 needsLoad = true; 3783 ArgSize = PtrByteSize; 3784 } 3785 // All int arguments reserve stack space in the Darwin ABI. 3786 ArgOffset += 8; 3787 break; 3788 3789 case MVT::f32: 3790 case MVT::f64: 3791 // Every 4 bytes of argument space consumes one of the GPRs available for 3792 // argument passing. 3793 if (GPR_idx != Num_GPR_Regs) { 3794 ++GPR_idx; 3795 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 3796 ++GPR_idx; 3797 } 3798 if (FPR_idx != Num_FPR_Regs) { 3799 unsigned VReg; 3800 3801 if (ObjectVT == MVT::f32) 3802 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 3803 else 3804 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 3805 3806 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3807 ++FPR_idx; 3808 } else { 3809 needsLoad = true; 3810 } 3811 3812 // All FP arguments reserve stack space in the Darwin ABI. 3813 ArgOffset += isPPC64 ? 8 : ObjSize; 3814 break; 3815 case MVT::v4f32: 3816 case MVT::v4i32: 3817 case MVT::v8i16: 3818 case MVT::v16i8: 3819 // Note that vector arguments in registers don't reserve stack space, 3820 // except in varargs functions. 3821 if (VR_idx != Num_VR_Regs) { 3822 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3823 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3824 if (isVarArg) { 3825 while ((ArgOffset % 16) != 0) { 3826 ArgOffset += PtrByteSize; 3827 if (GPR_idx != Num_GPR_Regs) 3828 GPR_idx++; 3829 } 3830 ArgOffset += 16; 3831 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 3832 } 3833 ++VR_idx; 3834 } else { 3835 if (!isVarArg && !isPPC64) { 3836 // Vectors go after all the nonvectors. 3837 CurArgOffset = VecArgOffset; 3838 VecArgOffset += 16; 3839 } else { 3840 // Vectors are aligned. 3841 ArgOffset = ((ArgOffset+15)/16)*16; 3842 CurArgOffset = ArgOffset; 3843 ArgOffset += 16; 3844 } 3845 needsLoad = true; 3846 } 3847 break; 3848 } 3849 3850 // We need to load the argument to a virtual register if we determined above 3851 // that we ran out of physical registers of the appropriate type. 3852 if (needsLoad) { 3853 int FI = MFI.CreateFixedObject(ObjSize, 3854 CurArgOffset + (ArgSize - ObjSize), 3855 isImmutable); 3856 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3857 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); 3858 } 3859 3860 InVals.push_back(ArgVal); 3861 } 3862 3863 // Allow for Altivec parameters at the end, if needed. 3864 if (nAltivecParamsAtEnd) { 3865 MinReservedArea = ((MinReservedArea+15)/16)*16; 3866 MinReservedArea += 16*nAltivecParamsAtEnd; 3867 } 3868 3869 // Area that is at least reserved in the caller of this function. 3870 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); 3871 3872 // Set the size that is at least reserved in caller of this function. Tail 3873 // call optimized functions' reserved stack space needs to be aligned so that 3874 // taking the difference between two stack areas will result in an aligned 3875 // stack. 3876 MinReservedArea = 3877 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3878 FuncInfo->setMinReservedArea(MinReservedArea); 3879 3880 // If the function takes variable number of arguments, make a frame index for 3881 // the start of the first vararg value... for expansion of llvm.va_start. 3882 if (isVarArg) { 3883 int Depth = ArgOffset; 3884 3885 FuncInfo->setVarArgsFrameIndex( 3886 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8, 3887 Depth, true)); 3888 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3889 3890 // If this function is vararg, store any remaining integer argument regs 3891 // to their spots on the stack so that they may be loaded by dereferencing 3892 // the result of va_next. 3893 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 3894 unsigned VReg; 3895 3896 if (isPPC64) 3897 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3898 else 3899 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3900 3901 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3902 SDValue Store = 3903 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3904 MemOps.push_back(Store); 3905 // Increment the address by four for the next argument to store 3906 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); 3907 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3908 } 3909 } 3910 3911 if (!MemOps.empty()) 3912 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3913 3914 return Chain; 3915 } 3916 3917 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 3918 /// adjusted to accommodate the arguments for the tailcall. 3919 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 3920 unsigned ParamSize) { 3921 3922 if (!isTailCall) return 0; 3923 3924 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 3925 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 3926 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 3927 // Remember only if the new adjustement is bigger. 3928 if (SPDiff < FI->getTailCallSPDelta()) 3929 FI->setTailCallSPDelta(SPDiff); 3930 3931 return SPDiff; 3932 } 3933 3934 static bool isFunctionGlobalAddress(SDValue Callee); 3935 3936 static bool 3937 resideInSameModule(SDValue Callee, Reloc::Model RelMod) { 3938 // If !G, Callee can be an external symbol. 3939 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 3940 if (!G) return false; 3941 3942 const GlobalValue *GV = G->getGlobal(); 3943 3944 if (GV->isDeclaration()) return false; 3945 3946 switch(GV->getLinkage()) { 3947 default: llvm_unreachable("unknow linkage type"); 3948 case GlobalValue::AvailableExternallyLinkage: 3949 case GlobalValue::ExternalWeakLinkage: 3950 return false; 3951 3952 // Callee with weak linkage is allowed if it has hidden or protected 3953 // visibility 3954 case GlobalValue::LinkOnceAnyLinkage: 3955 case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions 3956 case GlobalValue::WeakAnyLinkage: 3957 case GlobalValue::WeakODRLinkage: // e.g. c++ template instantiation 3958 if (GV->hasDefaultVisibility()) 3959 return false; 3960 3961 case GlobalValue::ExternalLinkage: 3962 case GlobalValue::InternalLinkage: 3963 case GlobalValue::PrivateLinkage: 3964 break; 3965 } 3966 3967 // With '-fPIC', calling default visiblity function need insert 'nop' after 3968 // function call, no matter that function resides in same module or not, so 3969 // we treat it as in different module. 3970 if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility()) 3971 return false; 3972 3973 return true; 3974 } 3975 3976 static bool 3977 needStackSlotPassParameters(const PPCSubtarget &Subtarget, 3978 const SmallVectorImpl<ISD::OutputArg> &Outs) { 3979 assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64()); 3980 3981 const unsigned PtrByteSize = 8; 3982 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3983 3984 static const MCPhysReg GPR[] = { 3985 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3986 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3987 }; 3988 static const MCPhysReg VR[] = { 3989 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3990 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3991 }; 3992 3993 const unsigned NumGPRs = array_lengthof(GPR); 3994 const unsigned NumFPRs = 13; 3995 const unsigned NumVRs = array_lengthof(VR); 3996 const unsigned ParamAreaSize = NumGPRs * PtrByteSize; 3997 3998 unsigned NumBytes = LinkageSize; 3999 unsigned AvailableFPRs = NumFPRs; 4000 unsigned AvailableVRs = NumVRs; 4001 4002 for (const ISD::OutputArg& Param : Outs) { 4003 if (Param.Flags.isNest()) continue; 4004 4005 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, 4006 PtrByteSize, LinkageSize, ParamAreaSize, 4007 NumBytes, AvailableFPRs, AvailableVRs, 4008 Subtarget.hasQPX())) 4009 return true; 4010 } 4011 return false; 4012 } 4013 4014 static bool 4015 hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) { 4016 if (CS->arg_size() != CallerFn->getArgumentList().size()) 4017 return false; 4018 4019 ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin(); 4020 ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end(); 4021 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin(); 4022 4023 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) { 4024 const Value* CalleeArg = *CalleeArgIter; 4025 const Value* CallerArg = &(*CallerArgIter); 4026 if (CalleeArg == CallerArg) 4027 continue; 4028 4029 // e.g. @caller([4 x i64] %a, [4 x i64] %b) { 4030 // tail call @callee([4 x i64] undef, [4 x i64] %b) 4031 // } 4032 // 1st argument of callee is undef and has the same type as caller. 4033 if (CalleeArg->getType() == CallerArg->getType() && 4034 isa<UndefValue>(CalleeArg)) 4035 continue; 4036 4037 return false; 4038 } 4039 4040 return true; 4041 } 4042 4043 bool 4044 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( 4045 SDValue Callee, 4046 CallingConv::ID CalleeCC, 4047 ImmutableCallSite *CS, 4048 bool isVarArg, 4049 const SmallVectorImpl<ISD::OutputArg> &Outs, 4050 const SmallVectorImpl<ISD::InputArg> &Ins, 4051 SelectionDAG& DAG) const { 4052 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; 4053 4054 if (DisableSCO && !TailCallOpt) return false; 4055 4056 // Variadic argument functions are not supported. 4057 if (isVarArg) return false; 4058 4059 MachineFunction &MF = DAG.getMachineFunction(); 4060 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 4061 4062 // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has 4063 // the same calling convention 4064 if (CallerCC != CalleeCC) return false; 4065 4066 // SCO support C calling convention 4067 if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C) 4068 return false; 4069 4070 // Caller contains any byval parameter is not supported. 4071 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); })) 4072 return false; 4073 4074 // Callee contains any byval parameter is not supported, too. 4075 // Note: This is a quick work around, because in some cases, e.g. 4076 // caller's stack size > callee's stack size, we are still able to apply 4077 // sibling call optimization. See: https://reviews.llvm.org/D23441#513574 4078 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); })) 4079 return false; 4080 4081 // No TCO/SCO on indirect call because Caller have to restore its TOC 4082 if (!isFunctionGlobalAddress(Callee) && 4083 !isa<ExternalSymbolSDNode>(Callee)) 4084 return false; 4085 4086 // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI 4087 // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another 4088 // module. 4089 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 4090 if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel())) 4091 return false; 4092 4093 // TCO allows altering callee ABI, so we don't have to check further. 4094 if (CalleeCC == CallingConv::Fast && TailCallOpt) 4095 return true; 4096 4097 if (DisableSCO) return false; 4098 4099 // If callee use the same argument list that caller is using, then we can 4100 // apply SCO on this case. If it is not, then we need to check if callee needs 4101 // stack for passing arguments. 4102 if (!hasSameArgumentList(MF.getFunction(), CS) && 4103 needStackSlotPassParameters(Subtarget, Outs)) { 4104 return false; 4105 } 4106 4107 return true; 4108 } 4109 4110 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 4111 /// for tail call optimization. Targets which want to do tail call 4112 /// optimization should implement this function. 4113 bool 4114 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 4115 CallingConv::ID CalleeCC, 4116 bool isVarArg, 4117 const SmallVectorImpl<ISD::InputArg> &Ins, 4118 SelectionDAG& DAG) const { 4119 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 4120 return false; 4121 4122 // Variable argument functions are not supported. 4123 if (isVarArg) 4124 return false; 4125 4126 MachineFunction &MF = DAG.getMachineFunction(); 4127 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 4128 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 4129 // Functions containing by val parameters are not supported. 4130 for (unsigned i = 0; i != Ins.size(); i++) { 4131 ISD::ArgFlagsTy Flags = Ins[i].Flags; 4132 if (Flags.isByVal()) return false; 4133 } 4134 4135 // Non-PIC/GOT tail calls are supported. 4136 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 4137 return true; 4138 4139 // At the moment we can only do local tail calls (in same module, hidden 4140 // or protected) if we are generating PIC. 4141 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4142 return G->getGlobal()->hasHiddenVisibility() 4143 || G->getGlobal()->hasProtectedVisibility(); 4144 } 4145 4146 return false; 4147 } 4148 4149 /// isCallCompatibleAddress - Return the immediate to use if the specified 4150 /// 32-bit value is representable in the immediate field of a BxA instruction. 4151 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 4152 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 4153 if (!C) return nullptr; 4154 4155 int Addr = C->getZExtValue(); 4156 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 4157 SignExtend32<26>(Addr) != Addr) 4158 return nullptr; // Top 6 bits have to be sext of immediate. 4159 4160 return DAG 4161 .getConstant( 4162 (int)C->getZExtValue() >> 2, SDLoc(Op), 4163 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())) 4164 .getNode(); 4165 } 4166 4167 namespace { 4168 4169 struct TailCallArgumentInfo { 4170 SDValue Arg; 4171 SDValue FrameIdxOp; 4172 int FrameIdx; 4173 4174 TailCallArgumentInfo() : FrameIdx(0) {} 4175 }; 4176 } 4177 4178 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 4179 static void StoreTailCallArgumentsToStackSlot( 4180 SelectionDAG &DAG, SDValue Chain, 4181 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs, 4182 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) { 4183 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 4184 SDValue Arg = TailCallArgs[i].Arg; 4185 SDValue FIN = TailCallArgs[i].FrameIdxOp; 4186 int FI = TailCallArgs[i].FrameIdx; 4187 // Store relative to framepointer. 4188 MemOpChains.push_back(DAG.getStore( 4189 Chain, dl, Arg, FIN, 4190 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); 4191 } 4192 } 4193 4194 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 4195 /// the appropriate stack slot for the tail call optimized function call. 4196 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, 4197 SDValue OldRetAddr, SDValue OldFP, 4198 int SPDiff, const SDLoc &dl) { 4199 if (SPDiff) { 4200 // Calculate the new stack slot for the return address. 4201 MachineFunction &MF = DAG.getMachineFunction(); 4202 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); 4203 const PPCFrameLowering *FL = Subtarget.getFrameLowering(); 4204 bool isPPC64 = Subtarget.isPPC64(); 4205 int SlotSize = isPPC64 ? 8 : 4; 4206 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset(); 4207 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize, 4208 NewRetAddrLoc, true); 4209 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 4210 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 4211 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, 4212 MachinePointerInfo::getFixedStack(MF, NewRetAddr)); 4213 4214 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 4215 // slot as the FP is never overwritten. 4216 if (Subtarget.isDarwinABI()) { 4217 int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset(); 4218 int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc, 4219 true); 4220 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 4221 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, 4222 MachinePointerInfo::getFixedStack( 4223 DAG.getMachineFunction(), NewFPIdx)); 4224 } 4225 } 4226 return Chain; 4227 } 4228 4229 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 4230 /// the position of the argument. 4231 static void 4232 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 4233 SDValue Arg, int SPDiff, unsigned ArgOffset, 4234 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) { 4235 int Offset = ArgOffset + SPDiff; 4236 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8; 4237 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); 4238 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 4239 SDValue FIN = DAG.getFrameIndex(FI, VT); 4240 TailCallArgumentInfo Info; 4241 Info.Arg = Arg; 4242 Info.FrameIdxOp = FIN; 4243 Info.FrameIdx = FI; 4244 TailCallArguments.push_back(Info); 4245 } 4246 4247 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 4248 /// stack slot. Returns the chain as result and the loaded frame pointers in 4249 /// LROpOut/FPOpout. Used when tail calling. 4250 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr( 4251 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut, 4252 SDValue &FPOpOut, const SDLoc &dl) const { 4253 if (SPDiff) { 4254 // Load the LR and FP stack slot for later adjusting. 4255 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; 4256 LROpOut = getReturnAddrFrameIndex(DAG); 4257 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo()); 4258 Chain = SDValue(LROpOut.getNode(), 1); 4259 4260 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 4261 // slot as the FP is never overwritten. 4262 if (Subtarget.isDarwinABI()) { 4263 FPOpOut = getFramePointerFrameIndex(DAG); 4264 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo()); 4265 Chain = SDValue(FPOpOut.getNode(), 1); 4266 } 4267 } 4268 return Chain; 4269 } 4270 4271 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 4272 /// by "Src" to address "Dst" of size "Size". Alignment information is 4273 /// specified by the specific parameter attribute. The copy will be passed as 4274 /// a byval function parameter. 4275 /// Sometimes what we are copying is the end of a larger object, the part that 4276 /// does not fit in registers. 4277 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, 4278 SDValue Chain, ISD::ArgFlagsTy Flags, 4279 SelectionDAG &DAG, const SDLoc &dl) { 4280 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); 4281 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 4282 false, false, false, MachinePointerInfo(), 4283 MachinePointerInfo()); 4284 } 4285 4286 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 4287 /// tail calls. 4288 static void LowerMemOpCallTo( 4289 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, 4290 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, 4291 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains, 4292 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) { 4293 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 4294 if (!isTailCall) { 4295 if (isVector) { 4296 SDValue StackPtr; 4297 if (isPPC64) 4298 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4299 else 4300 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4301 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 4302 DAG.getConstant(ArgOffset, dl, PtrVT)); 4303 } 4304 MemOpChains.push_back( 4305 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); 4306 // Calculate and remember argument location. 4307 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 4308 TailCallArguments); 4309 } 4310 4311 static void 4312 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 4313 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, 4314 SDValue FPOp, 4315 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) { 4316 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 4317 // might overwrite each other in case of tail call optimization. 4318 SmallVector<SDValue, 8> MemOpChains2; 4319 // Do not flag preceding copytoreg stuff together with the following stuff. 4320 InFlag = SDValue(); 4321 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 4322 MemOpChains2, dl); 4323 if (!MemOpChains2.empty()) 4324 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); 4325 4326 // Store the return address to the appropriate stack slot. 4327 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl); 4328 4329 // Emit callseq_end just before tailcall node. 4330 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 4331 DAG.getIntPtrConstant(0, dl, true), InFlag, dl); 4332 InFlag = Chain.getValue(1); 4333 } 4334 4335 // Is this global address that of a function that can be called by name? (as 4336 // opposed to something that must hold a descriptor for an indirect call). 4337 static bool isFunctionGlobalAddress(SDValue Callee) { 4338 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 4339 if (Callee.getOpcode() == ISD::GlobalTLSAddress || 4340 Callee.getOpcode() == ISD::TargetGlobalTLSAddress) 4341 return false; 4342 4343 return G->getGlobal()->getValueType()->isFunctionTy(); 4344 } 4345 4346 return false; 4347 } 4348 4349 static unsigned 4350 PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, 4351 SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall, 4352 bool isPatchPoint, bool hasNest, 4353 SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, 4354 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, 4355 ImmutableCallSite *CS, const PPCSubtarget &Subtarget) { 4356 4357 bool isPPC64 = Subtarget.isPPC64(); 4358 bool isSVR4ABI = Subtarget.isSVR4ABI(); 4359 bool isELFv2ABI = Subtarget.isELFv2ABI(); 4360 4361 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 4362 NodeTys.push_back(MVT::Other); // Returns a chain 4363 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 4364 4365 unsigned CallOpc = PPCISD::CALL; 4366 4367 bool needIndirectCall = true; 4368 if (!isSVR4ABI || !isPPC64) 4369 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 4370 // If this is an absolute destination address, use the munged value. 4371 Callee = SDValue(Dest, 0); 4372 needIndirectCall = false; 4373 } 4374 4375 // PC-relative references to external symbols should go through $stub, unless 4376 // we're building with the leopard linker or later, which automatically 4377 // synthesizes these stubs. 4378 const TargetMachine &TM = DAG.getTarget(); 4379 const Module *Mod = DAG.getMachineFunction().getFunction()->getParent(); 4380 const GlobalValue *GV = nullptr; 4381 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4382 GV = G->getGlobal(); 4383 bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); 4384 bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64; 4385 4386 if (isFunctionGlobalAddress(Callee)) { 4387 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); 4388 // A call to a TLS address is actually an indirect call to a 4389 // thread-specific pointer. 4390 unsigned OpFlags = 0; 4391 if (UsePlt) 4392 OpFlags = PPCII::MO_PLT; 4393 4394 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 4395 // every direct call is) turn it into a TargetGlobalAddress / 4396 // TargetExternalSymbol node so that legalize doesn't hack it. 4397 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 4398 Callee.getValueType(), 0, OpFlags); 4399 needIndirectCall = false; 4400 } 4401 4402 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 4403 unsigned char OpFlags = 0; 4404 4405 if (UsePlt) 4406 OpFlags = PPCII::MO_PLT; 4407 4408 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 4409 OpFlags); 4410 needIndirectCall = false; 4411 } 4412 4413 if (isPatchPoint) { 4414 // We'll form an invalid direct call when lowering a patchpoint; the full 4415 // sequence for an indirect call is complicated, and many of the 4416 // instructions introduced might have side effects (and, thus, can't be 4417 // removed later). The call itself will be removed as soon as the 4418 // argument/return lowering is complete, so the fact that it has the wrong 4419 // kind of operands should not really matter. 4420 needIndirectCall = false; 4421 } 4422 4423 if (needIndirectCall) { 4424 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 4425 // to do the call, we can't use PPCISD::CALL. 4426 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 4427 4428 if (isSVR4ABI && isPPC64 && !isELFv2ABI) { 4429 // Function pointers in the 64-bit SVR4 ABI do not point to the function 4430 // entry point, but to the function descriptor (the function entry point 4431 // address is part of the function descriptor though). 4432 // The function descriptor is a three doubleword structure with the 4433 // following fields: function entry point, TOC base address and 4434 // environment pointer. 4435 // Thus for a call through a function pointer, the following actions need 4436 // to be performed: 4437 // 1. Save the TOC of the caller in the TOC save area of its stack 4438 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). 4439 // 2. Load the address of the function entry point from the function 4440 // descriptor. 4441 // 3. Load the TOC of the callee from the function descriptor into r2. 4442 // 4. Load the environment pointer from the function descriptor into 4443 // r11. 4444 // 5. Branch to the function entry point address. 4445 // 6. On return of the callee, the TOC of the caller needs to be 4446 // restored (this is done in FinishCall()). 4447 // 4448 // The loads are scheduled at the beginning of the call sequence, and the 4449 // register copies are flagged together to ensure that no other 4450 // operations can be scheduled in between. E.g. without flagging the 4451 // copies together, a TOC access in the caller could be scheduled between 4452 // the assignment of the callee TOC and the branch to the callee, which 4453 // results in the TOC access going through the TOC of the callee instead 4454 // of going through the TOC of the caller, which leads to incorrect code. 4455 4456 // Load the address of the function entry point from the function 4457 // descriptor. 4458 SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1); 4459 if (LDChain.getValueType() == MVT::Glue) 4460 LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2); 4461 4462 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() 4463 ? (MachineMemOperand::MODereferenceable | 4464 MachineMemOperand::MOInvariant) 4465 : MachineMemOperand::MONone; 4466 4467 MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr); 4468 SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, 4469 /* Alignment = */ 8, MMOFlags); 4470 4471 // Load environment pointer into r11. 4472 SDValue PtrOff = DAG.getIntPtrConstant(16, dl); 4473 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 4474 SDValue LoadEnvPtr = 4475 DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16), 4476 /* Alignment = */ 8, MMOFlags); 4477 4478 SDValue TOCOff = DAG.getIntPtrConstant(8, dl); 4479 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); 4480 SDValue TOCPtr = 4481 DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8), 4482 /* Alignment = */ 8, MMOFlags); 4483 4484 setUsesTOCBasePtr(DAG); 4485 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, 4486 InFlag); 4487 Chain = TOCVal.getValue(0); 4488 InFlag = TOCVal.getValue(1); 4489 4490 // If the function call has an explicit 'nest' parameter, it takes the 4491 // place of the environment pointer. 4492 if (!hasNest) { 4493 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 4494 InFlag); 4495 4496 Chain = EnvVal.getValue(0); 4497 InFlag = EnvVal.getValue(1); 4498 } 4499 4500 MTCTROps[0] = Chain; 4501 MTCTROps[1] = LoadFuncPtr; 4502 MTCTROps[2] = InFlag; 4503 } 4504 4505 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, 4506 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); 4507 InFlag = Chain.getValue(1); 4508 4509 NodeTys.clear(); 4510 NodeTys.push_back(MVT::Other); 4511 NodeTys.push_back(MVT::Glue); 4512 Ops.push_back(Chain); 4513 CallOpc = PPCISD::BCTRL; 4514 Callee.setNode(nullptr); 4515 // Add use of X11 (holding environment pointer) 4516 if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest) 4517 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); 4518 // Add CTR register as callee so a bctr can be emitted later. 4519 if (isTailCall) 4520 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 4521 } 4522 4523 // If this is a direct call, pass the chain and the callee. 4524 if (Callee.getNode()) { 4525 Ops.push_back(Chain); 4526 Ops.push_back(Callee); 4527 } 4528 // If this is a tail call add stack pointer delta. 4529 if (isTailCall) 4530 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32)); 4531 4532 // Add argument registers to the end of the list so that they are known live 4533 // into the call. 4534 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 4535 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 4536 RegsToPass[i].second.getValueType())); 4537 4538 // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live 4539 // into the call. 4540 if (isSVR4ABI && isPPC64 && !isPatchPoint) { 4541 setUsesTOCBasePtr(DAG); 4542 Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); 4543 } 4544 4545 return CallOpc; 4546 } 4547 4548 static 4549 bool isLocalCall(const SDValue &Callee) 4550 { 4551 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4552 return G->getGlobal()->isStrongDefinitionForLinker(); 4553 return false; 4554 } 4555 4556 SDValue PPCTargetLowering::LowerCallResult( 4557 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, 4558 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 4559 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 4560 4561 SmallVector<CCValAssign, 16> RVLocs; 4562 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 4563 *DAG.getContext()); 4564 CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); 4565 4566 // Copy all of the result registers out of their specified physreg. 4567 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 4568 CCValAssign &VA = RVLocs[i]; 4569 assert(VA.isRegLoc() && "Can only return in registers!"); 4570 4571 SDValue Val = DAG.getCopyFromReg(Chain, dl, 4572 VA.getLocReg(), VA.getLocVT(), InFlag); 4573 Chain = Val.getValue(1); 4574 InFlag = Val.getValue(2); 4575 4576 switch (VA.getLocInfo()) { 4577 default: llvm_unreachable("Unknown loc info!"); 4578 case CCValAssign::Full: break; 4579 case CCValAssign::AExt: 4580 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4581 break; 4582 case CCValAssign::ZExt: 4583 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, 4584 DAG.getValueType(VA.getValVT())); 4585 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4586 break; 4587 case CCValAssign::SExt: 4588 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, 4589 DAG.getValueType(VA.getValVT())); 4590 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4591 break; 4592 } 4593 4594 InVals.push_back(Val); 4595 } 4596 4597 return Chain; 4598 } 4599 4600 SDValue PPCTargetLowering::FinishCall( 4601 CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg, 4602 bool isPatchPoint, bool hasNest, SelectionDAG &DAG, 4603 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag, 4604 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, 4605 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, 4606 SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const { 4607 4608 std::vector<EVT> NodeTys; 4609 SmallVector<SDValue, 8> Ops; 4610 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, 4611 SPDiff, isTailCall, isPatchPoint, hasNest, 4612 RegsToPass, Ops, NodeTys, CS, Subtarget); 4613 4614 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls 4615 if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) 4616 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); 4617 4618 // When performing tail call optimization the callee pops its arguments off 4619 // the stack. Account for this here so these bytes can be pushed back on in 4620 // PPCFrameLowering::eliminateCallFramePseudoInstr. 4621 int BytesCalleePops = 4622 (CallConv == CallingConv::Fast && 4623 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 4624 4625 // Add a register mask operand representing the call-preserved registers. 4626 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4627 const uint32_t *Mask = 4628 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); 4629 assert(Mask && "Missing call preserved mask for calling convention"); 4630 Ops.push_back(DAG.getRegisterMask(Mask)); 4631 4632 if (InFlag.getNode()) 4633 Ops.push_back(InFlag); 4634 4635 // Emit tail call. 4636 if (isTailCall) { 4637 assert(((Callee.getOpcode() == ISD::Register && 4638 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 4639 Callee.getOpcode() == ISD::TargetExternalSymbol || 4640 Callee.getOpcode() == ISD::TargetGlobalAddress || 4641 isa<ConstantSDNode>(Callee)) && 4642 "Expecting an global address, external symbol, absolute value or register"); 4643 4644 DAG.getMachineFunction().getFrameInfo().setHasTailCall(); 4645 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); 4646 } 4647 4648 // Add a NOP immediately after the branch instruction when using the 64-bit 4649 // SVR4 ABI. At link time, if caller and callee are in a different module and 4650 // thus have a different TOC, the call will be replaced with a call to a stub 4651 // function which saves the current TOC, loads the TOC of the callee and 4652 // branches to the callee. The NOP will be replaced with a load instruction 4653 // which restores the TOC of the caller from the TOC save slot of the current 4654 // stack frame. If caller and callee belong to the same module (and have the 4655 // same TOC), the NOP will remain unchanged. 4656 4657 if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && 4658 !isPatchPoint) { 4659 if (CallOpc == PPCISD::BCTRL) { 4660 // This is a call through a function pointer. 4661 // Restore the caller TOC from the save area into R2. 4662 // See PrepareCall() for more information about calls through function 4663 // pointers in the 64-bit SVR4 ABI. 4664 // We are using a target-specific load with r2 hard coded, because the 4665 // result of a target-independent load would never go directly into r2, 4666 // since r2 is a reserved register (which prevents the register allocator 4667 // from allocating it), resulting in an additional register being 4668 // allocated and an unnecessary move instruction being generated. 4669 CallOpc = PPCISD::BCTRL_LOAD_TOC; 4670 4671 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4672 SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); 4673 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); 4674 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); 4675 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); 4676 4677 // The address needs to go after the chain input but before the flag (or 4678 // any other variadic arguments). 4679 Ops.insert(std::next(Ops.begin()), AddTOC); 4680 } else if ((CallOpc == PPCISD::CALL) && 4681 (!isLocalCall(Callee) || 4682 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) 4683 // Otherwise insert NOP for non-local calls. 4684 CallOpc = PPCISD::CALL_NOP; 4685 } 4686 4687 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); 4688 InFlag = Chain.getValue(1); 4689 4690 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 4691 DAG.getIntPtrConstant(BytesCalleePops, dl, true), 4692 InFlag, dl); 4693 if (!Ins.empty()) 4694 InFlag = Chain.getValue(1); 4695 4696 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 4697 Ins, dl, DAG, InVals); 4698 } 4699 4700 SDValue 4701 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 4702 SmallVectorImpl<SDValue> &InVals) const { 4703 SelectionDAG &DAG = CLI.DAG; 4704 SDLoc &dl = CLI.DL; 4705 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 4706 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 4707 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 4708 SDValue Chain = CLI.Chain; 4709 SDValue Callee = CLI.Callee; 4710 bool &isTailCall = CLI.IsTailCall; 4711 CallingConv::ID CallConv = CLI.CallConv; 4712 bool isVarArg = CLI.IsVarArg; 4713 bool isPatchPoint = CLI.IsPatchPoint; 4714 ImmutableCallSite *CS = CLI.CS; 4715 4716 if (isTailCall) { 4717 if (Subtarget.useLongCalls() && !(CS && CS->isMustTailCall())) 4718 isTailCall = false; 4719 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) 4720 isTailCall = 4721 IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS, 4722 isVarArg, Outs, Ins, DAG); 4723 else 4724 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 4725 Ins, DAG); 4726 if (isTailCall) { 4727 ++NumTailCalls; 4728 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 4729 ++NumSiblingCalls; 4730 4731 assert(isa<GlobalAddressSDNode>(Callee) && 4732 "Callee should be an llvm::Function object."); 4733 DEBUG( 4734 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); 4735 const unsigned Width = 80 - strlen("TCO caller: ") 4736 - strlen(", callee linkage: 0, 0"); 4737 dbgs() << "TCO caller: " 4738 << left_justify(DAG.getMachineFunction().getName(), Width) 4739 << ", callee linkage: " 4740 << GV->getVisibility() << ", " << GV->getLinkage() << "\n" 4741 ); 4742 } 4743 } 4744 4745 if (!isTailCall && CS && CS->isMustTailCall()) 4746 report_fatal_error("failed to perform tail call elimination on a call " 4747 "site marked musttail"); 4748 4749 // When long calls (i.e. indirect calls) are always used, calls are always 4750 // made via function pointer. If we have a function name, first translate it 4751 // into a pointer. 4752 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) && 4753 !isTailCall) 4754 Callee = LowerGlobalAddress(Callee, DAG); 4755 4756 if (Subtarget.isSVR4ABI()) { 4757 if (Subtarget.isPPC64()) 4758 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, 4759 isTailCall, isPatchPoint, Outs, OutVals, Ins, 4760 dl, DAG, InVals, CS); 4761 else 4762 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, 4763 isTailCall, isPatchPoint, Outs, OutVals, Ins, 4764 dl, DAG, InVals, CS); 4765 } 4766 4767 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 4768 isTailCall, isPatchPoint, Outs, OutVals, Ins, 4769 dl, DAG, InVals, CS); 4770 } 4771 4772 SDValue PPCTargetLowering::LowerCall_32SVR4( 4773 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 4774 bool isTailCall, bool isPatchPoint, 4775 const SmallVectorImpl<ISD::OutputArg> &Outs, 4776 const SmallVectorImpl<SDValue> &OutVals, 4777 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 4778 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 4779 ImmutableCallSite *CS) const { 4780 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description 4781 // of the 32-bit SVR4 ABI stack frame layout. 4782 4783 assert((CallConv == CallingConv::C || 4784 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 4785 4786 unsigned PtrByteSize = 4; 4787 4788 MachineFunction &MF = DAG.getMachineFunction(); 4789 4790 // Mark this function as potentially containing a function that contains a 4791 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4792 // and restoring the callers stack pointer in this functions epilog. This is 4793 // done because by tail calling the called function might overwrite the value 4794 // in this function's (MF) stack pointer stack slot 0(SP). 4795 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4796 CallConv == CallingConv::Fast) 4797 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4798 4799 // Count how many bytes are to be pushed on the stack, including the linkage 4800 // area, parameter list area and the part of the local variable space which 4801 // contains copies of aggregates which are passed by value. 4802 4803 // Assign locations to all of the outgoing arguments. 4804 SmallVector<CCValAssign, 16> ArgLocs; 4805 PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); 4806 4807 // Reserve space for the linkage area on the stack. 4808 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(), 4809 PtrByteSize); 4810 if (useSoftFloat()) 4811 CCInfo.PreAnalyzeCallOperands(Outs); 4812 4813 if (isVarArg) { 4814 // Handle fixed and variable vector arguments differently. 4815 // Fixed vector arguments go into registers as long as registers are 4816 // available. Variable vector arguments always go into memory. 4817 unsigned NumArgs = Outs.size(); 4818 4819 for (unsigned i = 0; i != NumArgs; ++i) { 4820 MVT ArgVT = Outs[i].VT; 4821 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 4822 bool Result; 4823 4824 if (Outs[i].IsFixed) { 4825 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 4826 CCInfo); 4827 } else { 4828 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 4829 ArgFlags, CCInfo); 4830 } 4831 4832 if (Result) { 4833 #ifndef NDEBUG 4834 errs() << "Call operand #" << i << " has unhandled type " 4835 << EVT(ArgVT).getEVTString() << "\n"; 4836 #endif 4837 llvm_unreachable(nullptr); 4838 } 4839 } 4840 } else { 4841 // All arguments are treated the same. 4842 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); 4843 } 4844 CCInfo.clearWasPPCF128(); 4845 4846 // Assign locations to all of the outgoing aggregate by value arguments. 4847 SmallVector<CCValAssign, 16> ByValArgLocs; 4848 CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext()); 4849 4850 // Reserve stack space for the allocations in CCInfo. 4851 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 4852 4853 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); 4854 4855 // Size of the linkage area, parameter list area and the part of the local 4856 // space variable where copies of aggregates which are passed by value are 4857 // stored. 4858 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 4859 4860 // Calculate by how many bytes the stack has to be adjusted in case of tail 4861 // call optimization. 4862 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4863 4864 // Adjust the stack pointer for the new arguments... 4865 // These operations are automatically eliminated by the prolog/epilog pass 4866 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 4867 dl); 4868 SDValue CallSeqStart = Chain; 4869 4870 // Load the return address and frame pointer so it can be moved somewhere else 4871 // later. 4872 SDValue LROp, FPOp; 4873 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 4874 4875 // Set up a copy of the stack pointer for use loading and storing any 4876 // arguments that may not fit in the registers available for argument 4877 // passing. 4878 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4879 4880 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4881 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4882 SmallVector<SDValue, 8> MemOpChains; 4883 4884 bool seenFloatArg = false; 4885 // Walk the register/memloc assignments, inserting copies/loads. 4886 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 4887 i != e; 4888 ++i) { 4889 CCValAssign &VA = ArgLocs[i]; 4890 SDValue Arg = OutVals[i]; 4891 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4892 4893 if (Flags.isByVal()) { 4894 // Argument is an aggregate which is passed by value, thus we need to 4895 // create a copy of it in the local variable space of the current stack 4896 // frame (which is the stack frame of the caller) and pass the address of 4897 // this copy to the callee. 4898 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 4899 CCValAssign &ByValVA = ByValArgLocs[j++]; 4900 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 4901 4902 // Memory reserved in the local variable space of the callers stack frame. 4903 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 4904 4905 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 4906 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), 4907 StackPtr, PtrOff); 4908 4909 // Create a copy of the argument in the local area of the current 4910 // stack frame. 4911 SDValue MemcpyCall = 4912 CreateCopyOfByValArgument(Arg, PtrOff, 4913 CallSeqStart.getNode()->getOperand(0), 4914 Flags, DAG, dl); 4915 4916 // This must go outside the CALLSEQ_START..END. 4917 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 4918 CallSeqStart.getNode()->getOperand(1), 4919 SDLoc(MemcpyCall)); 4920 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 4921 NewCallSeqStart.getNode()); 4922 Chain = CallSeqStart = NewCallSeqStart; 4923 4924 // Pass the address of the aggregate copy on the stack either in a 4925 // physical register or in the parameter list area of the current stack 4926 // frame to the callee. 4927 Arg = PtrOff; 4928 } 4929 4930 if (VA.isRegLoc()) { 4931 if (Arg.getValueType() == MVT::i1) 4932 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); 4933 4934 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 4935 // Put argument in a physical register. 4936 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 4937 } else { 4938 // Put argument in the parameter list area of the current stack frame. 4939 assert(VA.isMemLoc()); 4940 unsigned LocMemOffset = VA.getLocMemOffset(); 4941 4942 if (!isTailCall) { 4943 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 4944 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), 4945 StackPtr, PtrOff); 4946 4947 MemOpChains.push_back( 4948 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); 4949 } else { 4950 // Calculate and remember argument location. 4951 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 4952 TailCallArguments); 4953 } 4954 } 4955 } 4956 4957 if (!MemOpChains.empty()) 4958 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 4959 4960 // Build a sequence of copy-to-reg nodes chained together with token chain 4961 // and flag operands which copy the outgoing args into the appropriate regs. 4962 SDValue InFlag; 4963 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4964 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4965 RegsToPass[i].second, InFlag); 4966 InFlag = Chain.getValue(1); 4967 } 4968 4969 // Set CR bit 6 to true if this is a vararg call with floating args passed in 4970 // registers. 4971 if (isVarArg) { 4972 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 4973 SDValue Ops[] = { Chain, InFlag }; 4974 4975 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, 4976 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); 4977 4978 InFlag = Chain.getValue(1); 4979 } 4980 4981 if (isTailCall) 4982 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 4983 TailCallArguments); 4984 4985 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, 4986 /* unused except on PPC64 ELFv1 */ false, DAG, 4987 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 4988 NumBytes, Ins, InVals, CS); 4989 } 4990 4991 // Copy an argument into memory, being careful to do this outside the 4992 // call sequence for the call to which the argument belongs. 4993 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq( 4994 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags, 4995 SelectionDAG &DAG, const SDLoc &dl) const { 4996 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 4997 CallSeqStart.getNode()->getOperand(0), 4998 Flags, DAG, dl); 4999 // The MEMCPY must go outside the CALLSEQ_START..END. 5000 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 5001 CallSeqStart.getNode()->getOperand(1), 5002 SDLoc(MemcpyCall)); 5003 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 5004 NewCallSeqStart.getNode()); 5005 return NewCallSeqStart; 5006 } 5007 5008 SDValue PPCTargetLowering::LowerCall_64SVR4( 5009 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 5010 bool isTailCall, bool isPatchPoint, 5011 const SmallVectorImpl<ISD::OutputArg> &Outs, 5012 const SmallVectorImpl<SDValue> &OutVals, 5013 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 5014 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 5015 ImmutableCallSite *CS) const { 5016 5017 bool isELFv2ABI = Subtarget.isELFv2ABI(); 5018 bool isLittleEndian = Subtarget.isLittleEndian(); 5019 unsigned NumOps = Outs.size(); 5020 bool hasNest = false; 5021 bool IsSibCall = false; 5022 5023 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5024 unsigned PtrByteSize = 8; 5025 5026 MachineFunction &MF = DAG.getMachineFunction(); 5027 5028 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt) 5029 IsSibCall = true; 5030 5031 // Mark this function as potentially containing a function that contains a 5032 // tail call. As a consequence the frame pointer will be used for dynamicalloc 5033 // and restoring the callers stack pointer in this functions epilog. This is 5034 // done because by tail calling the called function might overwrite the value 5035 // in this function's (MF) stack pointer stack slot 0(SP). 5036 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5037 CallConv == CallingConv::Fast) 5038 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 5039 5040 assert(!(CallConv == CallingConv::Fast && isVarArg) && 5041 "fastcc not supported on varargs functions"); 5042 5043 // Count how many bytes are to be pushed on the stack, including the linkage 5044 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes 5045 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage 5046 // area is 32 bytes reserved space for [SP][CR][LR][TOC]. 5047 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 5048 unsigned NumBytes = LinkageSize; 5049 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 5050 unsigned &QFPR_idx = FPR_idx; 5051 5052 static const MCPhysReg GPR[] = { 5053 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 5054 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 5055 }; 5056 static const MCPhysReg VR[] = { 5057 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 5058 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 5059 }; 5060 5061 const unsigned NumGPRs = array_lengthof(GPR); 5062 const unsigned NumFPRs = 13; 5063 const unsigned NumVRs = array_lengthof(VR); 5064 const unsigned NumQFPRs = NumFPRs; 5065 5066 // When using the fast calling convention, we don't provide backing for 5067 // arguments that will be in registers. 5068 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0; 5069 5070 // Add up all the space actually used. 5071 for (unsigned i = 0; i != NumOps; ++i) { 5072 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5073 EVT ArgVT = Outs[i].VT; 5074 EVT OrigVT = Outs[i].ArgVT; 5075 5076 if (Flags.isNest()) 5077 continue; 5078 5079 if (CallConv == CallingConv::Fast) { 5080 if (Flags.isByVal()) 5081 NumGPRsUsed += (Flags.getByValSize()+7)/8; 5082 else 5083 switch (ArgVT.getSimpleVT().SimpleTy) { 5084 default: llvm_unreachable("Unexpected ValueType for argument!"); 5085 case MVT::i1: 5086 case MVT::i32: 5087 case MVT::i64: 5088 if (++NumGPRsUsed <= NumGPRs) 5089 continue; 5090 break; 5091 case MVT::v4i32: 5092 case MVT::v8i16: 5093 case MVT::v16i8: 5094 case MVT::v2f64: 5095 case MVT::v2i64: 5096 case MVT::v1i128: 5097 if (++NumVRsUsed <= NumVRs) 5098 continue; 5099 break; 5100 case MVT::v4f32: 5101 // When using QPX, this is handled like a FP register, otherwise, it 5102 // is an Altivec register. 5103 if (Subtarget.hasQPX()) { 5104 if (++NumFPRsUsed <= NumFPRs) 5105 continue; 5106 } else { 5107 if (++NumVRsUsed <= NumVRs) 5108 continue; 5109 } 5110 break; 5111 case MVT::f32: 5112 case MVT::f64: 5113 case MVT::v4f64: // QPX 5114 case MVT::v4i1: // QPX 5115 if (++NumFPRsUsed <= NumFPRs) 5116 continue; 5117 break; 5118 } 5119 } 5120 5121 /* Respect alignment of argument on the stack. */ 5122 unsigned Align = 5123 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 5124 NumBytes = ((NumBytes + Align - 1) / Align) * Align; 5125 5126 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 5127 if (Flags.isInConsecutiveRegsLast()) 5128 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 5129 } 5130 5131 unsigned NumBytesActuallyUsed = NumBytes; 5132 5133 // The prolog code of the callee may store up to 8 GPR argument registers to 5134 // the stack, allowing va_start to index over them in memory if its varargs. 5135 // Because we cannot tell if this is needed on the caller side, we have to 5136 // conservatively assume that it is needed. As such, make sure we have at 5137 // least enough stack space for the caller to store the 8 GPRs. 5138 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. 5139 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 5140 5141 // Tail call needs the stack to be aligned. 5142 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5143 CallConv == CallingConv::Fast) 5144 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); 5145 5146 int SPDiff = 0; 5147 5148 // Calculate by how many bytes the stack has to be adjusted in case of tail 5149 // call optimization. 5150 if (!IsSibCall) 5151 SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 5152 5153 // To protect arguments on the stack from being clobbered in a tail call, 5154 // force all the loads to happen before doing any other lowering. 5155 if (isTailCall) 5156 Chain = DAG.getStackArgumentTokenFactor(Chain); 5157 5158 // Adjust the stack pointer for the new arguments... 5159 // These operations are automatically eliminated by the prolog/epilog pass 5160 if (!IsSibCall) 5161 Chain = DAG.getCALLSEQ_START(Chain, 5162 DAG.getIntPtrConstant(NumBytes, dl, true), dl); 5163 SDValue CallSeqStart = Chain; 5164 5165 // Load the return address and frame pointer so it can be move somewhere else 5166 // later. 5167 SDValue LROp, FPOp; 5168 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 5169 5170 // Set up a copy of the stack pointer for use loading and storing any 5171 // arguments that may not fit in the registers available for argument 5172 // passing. 5173 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 5174 5175 // Figure out which arguments are going to go in registers, and which in 5176 // memory. Also, if this is a vararg function, floating point operations 5177 // must be stored to our stack, and loaded into integer regs as well, if 5178 // any integer regs are available for argument passing. 5179 unsigned ArgOffset = LinkageSize; 5180 5181 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 5182 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 5183 5184 SmallVector<SDValue, 8> MemOpChains; 5185 for (unsigned i = 0; i != NumOps; ++i) { 5186 SDValue Arg = OutVals[i]; 5187 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5188 EVT ArgVT = Outs[i].VT; 5189 EVT OrigVT = Outs[i].ArgVT; 5190 5191 // PtrOff will be used to store the current argument to the stack if a 5192 // register cannot be found for it. 5193 SDValue PtrOff; 5194 5195 // We re-align the argument offset for each argument, except when using the 5196 // fast calling convention, when we need to make sure we do that only when 5197 // we'll actually use a stack slot. 5198 auto ComputePtrOff = [&]() { 5199 /* Respect alignment of argument on the stack. */ 5200 unsigned Align = 5201 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 5202 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 5203 5204 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); 5205 5206 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5207 }; 5208 5209 if (CallConv != CallingConv::Fast) { 5210 ComputePtrOff(); 5211 5212 /* Compute GPR index associated with argument offset. */ 5213 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 5214 GPR_idx = std::min(GPR_idx, NumGPRs); 5215 } 5216 5217 // Promote integers to 64-bit values. 5218 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { 5219 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 5220 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 5221 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 5222 } 5223 5224 // FIXME memcpy is used way more than necessary. Correctness first. 5225 // Note: "by value" is code for passing a structure by value, not 5226 // basic types. 5227 if (Flags.isByVal()) { 5228 // Note: Size includes alignment padding, so 5229 // struct x { short a; char b; } 5230 // will have Size = 4. With #pragma pack(1), it will have Size = 3. 5231 // These are the proper values we need for right-justifying the 5232 // aggregate in a parameter register. 5233 unsigned Size = Flags.getByValSize(); 5234 5235 // An empty aggregate parameter takes up no storage and no 5236 // registers. 5237 if (Size == 0) 5238 continue; 5239 5240 if (CallConv == CallingConv::Fast) 5241 ComputePtrOff(); 5242 5243 // All aggregates smaller than 8 bytes must be passed right-justified. 5244 if (Size==1 || Size==2 || Size==4) { 5245 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); 5246 if (GPR_idx != NumGPRs) { 5247 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 5248 MachinePointerInfo(), VT); 5249 MemOpChains.push_back(Load.getValue(1)); 5250 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5251 5252 ArgOffset += PtrByteSize; 5253 continue; 5254 } 5255 } 5256 5257 if (GPR_idx == NumGPRs && Size < 8) { 5258 SDValue AddPtr = PtrOff; 5259 if (!isLittleEndian) { 5260 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, 5261 PtrOff.getValueType()); 5262 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5263 } 5264 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5265 CallSeqStart, 5266 Flags, DAG, dl); 5267 ArgOffset += PtrByteSize; 5268 continue; 5269 } 5270 // Copy entire object into memory. There are cases where gcc-generated 5271 // code assumes it is there, even if it could be put entirely into 5272 // registers. (This is not what the doc says.) 5273 5274 // FIXME: The above statement is likely due to a misunderstanding of the 5275 // documents. All arguments must be copied into the parameter area BY 5276 // THE CALLEE in the event that the callee takes the address of any 5277 // formal argument. That has not yet been implemented. However, it is 5278 // reasonable to use the stack area as a staging area for the register 5279 // load. 5280 5281 // Skip this for small aggregates, as we will use the same slot for a 5282 // right-justified copy, below. 5283 if (Size >= 8) 5284 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 5285 CallSeqStart, 5286 Flags, DAG, dl); 5287 5288 // When a register is available, pass a small aggregate right-justified. 5289 if (Size < 8 && GPR_idx != NumGPRs) { 5290 // The easiest way to get this right-justified in a register 5291 // is to copy the structure into the rightmost portion of a 5292 // local variable slot, then load the whole slot into the 5293 // register. 5294 // FIXME: The memcpy seems to produce pretty awful code for 5295 // small aggregates, particularly for packed ones. 5296 // FIXME: It would be preferable to use the slot in the 5297 // parameter save area instead of a new local variable. 5298 SDValue AddPtr = PtrOff; 5299 if (!isLittleEndian) { 5300 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType()); 5301 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5302 } 5303 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5304 CallSeqStart, 5305 Flags, DAG, dl); 5306 5307 // Load the slot into the register. 5308 SDValue Load = 5309 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo()); 5310 MemOpChains.push_back(Load.getValue(1)); 5311 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5312 5313 // Done with this argument. 5314 ArgOffset += PtrByteSize; 5315 continue; 5316 } 5317 5318 // For aggregates larger than PtrByteSize, copy the pieces of the 5319 // object that fit into registers from the parameter save area. 5320 for (unsigned j=0; j<Size; j+=PtrByteSize) { 5321 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); 5322 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 5323 if (GPR_idx != NumGPRs) { 5324 SDValue Load = 5325 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo()); 5326 MemOpChains.push_back(Load.getValue(1)); 5327 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5328 ArgOffset += PtrByteSize; 5329 } else { 5330 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 5331 break; 5332 } 5333 } 5334 continue; 5335 } 5336 5337 switch (Arg.getSimpleValueType().SimpleTy) { 5338 default: llvm_unreachable("Unexpected ValueType for argument!"); 5339 case MVT::i1: 5340 case MVT::i32: 5341 case MVT::i64: 5342 if (Flags.isNest()) { 5343 // The 'nest' parameter, if any, is passed in R11. 5344 RegsToPass.push_back(std::make_pair(PPC::X11, Arg)); 5345 hasNest = true; 5346 break; 5347 } 5348 5349 // These can be scalar arguments or elements of an integer array type 5350 // passed directly. Clang may use those instead of "byval" aggregate 5351 // types to avoid forcing arguments to memory unnecessarily. 5352 if (GPR_idx != NumGPRs) { 5353 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 5354 } else { 5355 if (CallConv == CallingConv::Fast) 5356 ComputePtrOff(); 5357 5358 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5359 true, isTailCall, false, MemOpChains, 5360 TailCallArguments, dl); 5361 if (CallConv == CallingConv::Fast) 5362 ArgOffset += PtrByteSize; 5363 } 5364 if (CallConv != CallingConv::Fast) 5365 ArgOffset += PtrByteSize; 5366 break; 5367 case MVT::f32: 5368 case MVT::f64: { 5369 // These can be scalar arguments or elements of a float array type 5370 // passed directly. The latter are used to implement ELFv2 homogenous 5371 // float aggregates. 5372 5373 // Named arguments go into FPRs first, and once they overflow, the 5374 // remaining arguments go into GPRs and then the parameter save area. 5375 // Unnamed arguments for vararg functions always go to GPRs and 5376 // then the parameter save area. For now, put all arguments to vararg 5377 // routines always in both locations (FPR *and* GPR or stack slot). 5378 bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs; 5379 bool NeededLoad = false; 5380 5381 // First load the argument into the next available FPR. 5382 if (FPR_idx != NumFPRs) 5383 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 5384 5385 // Next, load the argument into GPR or stack slot if needed. 5386 if (!NeedGPROrStack) 5387 ; 5388 else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) { 5389 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 5390 // once we support fp <-> gpr moves. 5391 5392 // In the non-vararg case, this can only ever happen in the 5393 // presence of f32 array types, since otherwise we never run 5394 // out of FPRs before running out of GPRs. 5395 SDValue ArgVal; 5396 5397 // Double values are always passed in a single GPR. 5398 if (Arg.getValueType() != MVT::f32) { 5399 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); 5400 5401 // Non-array float values are extended and passed in a GPR. 5402 } else if (!Flags.isInConsecutiveRegs()) { 5403 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5404 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 5405 5406 // If we have an array of floats, we collect every odd element 5407 // together with its predecessor into one GPR. 5408 } else if (ArgOffset % PtrByteSize != 0) { 5409 SDValue Lo, Hi; 5410 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); 5411 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5412 if (!isLittleEndian) 5413 std::swap(Lo, Hi); 5414 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 5415 5416 // The final element, if even, goes into the first half of a GPR. 5417 } else if (Flags.isInConsecutiveRegsLast()) { 5418 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5419 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 5420 if (!isLittleEndian) 5421 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, 5422 DAG.getConstant(32, dl, MVT::i32)); 5423 5424 // Non-final even elements are skipped; they will be handled 5425 // together the with subsequent argument on the next go-around. 5426 } else 5427 ArgVal = SDValue(); 5428 5429 if (ArgVal.getNode()) 5430 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal)); 5431 } else { 5432 if (CallConv == CallingConv::Fast) 5433 ComputePtrOff(); 5434 5435 // Single-precision floating-point values are mapped to the 5436 // second (rightmost) word of the stack doubleword. 5437 if (Arg.getValueType() == MVT::f32 && 5438 !isLittleEndian && !Flags.isInConsecutiveRegs()) { 5439 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); 5440 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 5441 } 5442 5443 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5444 true, isTailCall, false, MemOpChains, 5445 TailCallArguments, dl); 5446 5447 NeededLoad = true; 5448 } 5449 // When passing an array of floats, the array occupies consecutive 5450 // space in the argument area; only round up to the next doubleword 5451 // at the end of the array. Otherwise, each float takes 8 bytes. 5452 if (CallConv != CallingConv::Fast || NeededLoad) { 5453 ArgOffset += (Arg.getValueType() == MVT::f32 && 5454 Flags.isInConsecutiveRegs()) ? 4 : 8; 5455 if (Flags.isInConsecutiveRegsLast()) 5456 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 5457 } 5458 break; 5459 } 5460 case MVT::v4f32: 5461 case MVT::v4i32: 5462 case MVT::v8i16: 5463 case MVT::v16i8: 5464 case MVT::v2f64: 5465 case MVT::v2i64: 5466 case MVT::v1i128: 5467 if (!Subtarget.hasQPX()) { 5468 // These can be scalar arguments or elements of a vector array type 5469 // passed directly. The latter are used to implement ELFv2 homogenous 5470 // vector aggregates. 5471 5472 // For a varargs call, named arguments go into VRs or on the stack as 5473 // usual; unnamed arguments always go to the stack or the corresponding 5474 // GPRs when within range. For now, we always put the value in both 5475 // locations (or even all three). 5476 if (isVarArg) { 5477 // We could elide this store in the case where the object fits 5478 // entirely in R registers. Maybe later. 5479 SDValue Store = 5480 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 5481 MemOpChains.push_back(Store); 5482 if (VR_idx != NumVRs) { 5483 SDValue Load = 5484 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); 5485 MemOpChains.push_back(Load.getValue(1)); 5486 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 5487 } 5488 ArgOffset += 16; 5489 for (unsigned i=0; i<16; i+=PtrByteSize) { 5490 if (GPR_idx == NumGPRs) 5491 break; 5492 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5493 DAG.getConstant(i, dl, PtrVT)); 5494 SDValue Load = 5495 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 5496 MemOpChains.push_back(Load.getValue(1)); 5497 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5498 } 5499 break; 5500 } 5501 5502 // Non-varargs Altivec params go into VRs or on the stack. 5503 if (VR_idx != NumVRs) { 5504 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 5505 } else { 5506 if (CallConv == CallingConv::Fast) 5507 ComputePtrOff(); 5508 5509 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5510 true, isTailCall, true, MemOpChains, 5511 TailCallArguments, dl); 5512 if (CallConv == CallingConv::Fast) 5513 ArgOffset += 16; 5514 } 5515 5516 if (CallConv != CallingConv::Fast) 5517 ArgOffset += 16; 5518 break; 5519 } // not QPX 5520 5521 assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 && 5522 "Invalid QPX parameter type"); 5523 5524 /* fall through */ 5525 case MVT::v4f64: 5526 case MVT::v4i1: { 5527 bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32; 5528 if (isVarArg) { 5529 // We could elide this store in the case where the object fits 5530 // entirely in R registers. Maybe later. 5531 SDValue Store = 5532 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 5533 MemOpChains.push_back(Store); 5534 if (QFPR_idx != NumQFPRs) { 5535 SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store, 5536 PtrOff, MachinePointerInfo()); 5537 MemOpChains.push_back(Load.getValue(1)); 5538 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load)); 5539 } 5540 ArgOffset += (IsF32 ? 16 : 32); 5541 for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) { 5542 if (GPR_idx == NumGPRs) 5543 break; 5544 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5545 DAG.getConstant(i, dl, PtrVT)); 5546 SDValue Load = 5547 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 5548 MemOpChains.push_back(Load.getValue(1)); 5549 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5550 } 5551 break; 5552 } 5553 5554 // Non-varargs QPX params go into registers or on the stack. 5555 if (QFPR_idx != NumQFPRs) { 5556 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg)); 5557 } else { 5558 if (CallConv == CallingConv::Fast) 5559 ComputePtrOff(); 5560 5561 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5562 true, isTailCall, true, MemOpChains, 5563 TailCallArguments, dl); 5564 if (CallConv == CallingConv::Fast) 5565 ArgOffset += (IsF32 ? 16 : 32); 5566 } 5567 5568 if (CallConv != CallingConv::Fast) 5569 ArgOffset += (IsF32 ? 16 : 32); 5570 break; 5571 } 5572 } 5573 } 5574 5575 assert(NumBytesActuallyUsed == ArgOffset); 5576 (void)NumBytesActuallyUsed; 5577 5578 if (!MemOpChains.empty()) 5579 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5580 5581 // Check if this is an indirect call (MTCTR/BCTRL). 5582 // See PrepareCall() for more information about calls through function 5583 // pointers in the 64-bit SVR4 ABI. 5584 if (!isTailCall && !isPatchPoint && 5585 !isFunctionGlobalAddress(Callee) && 5586 !isa<ExternalSymbolSDNode>(Callee)) { 5587 // Load r2 into a virtual register and store it to the TOC save area. 5588 setUsesTOCBasePtr(DAG); 5589 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 5590 // TOC save area offset. 5591 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); 5592 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); 5593 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5594 Chain = DAG.getStore( 5595 Val.getValue(1), dl, Val, AddPtr, 5596 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset)); 5597 // In the ELFv2 ABI, R12 must contain the address of an indirect callee. 5598 // This does not mean the MTCTR instruction must use R12; it's easier 5599 // to model this as an extra parameter, so do that. 5600 if (isELFv2ABI && !isPatchPoint) 5601 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); 5602 } 5603 5604 // Build a sequence of copy-to-reg nodes chained together with token chain 5605 // and flag operands which copy the outgoing args into the appropriate regs. 5606 SDValue InFlag; 5607 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5608 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5609 RegsToPass[i].second, InFlag); 5610 InFlag = Chain.getValue(1); 5611 } 5612 5613 if (isTailCall && !IsSibCall) 5614 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 5615 TailCallArguments); 5616 5617 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest, 5618 DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, 5619 SPDiff, NumBytes, Ins, InVals, CS); 5620 } 5621 5622 SDValue PPCTargetLowering::LowerCall_Darwin( 5623 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 5624 bool isTailCall, bool isPatchPoint, 5625 const SmallVectorImpl<ISD::OutputArg> &Outs, 5626 const SmallVectorImpl<SDValue> &OutVals, 5627 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 5628 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 5629 ImmutableCallSite *CS) const { 5630 5631 unsigned NumOps = Outs.size(); 5632 5633 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5634 bool isPPC64 = PtrVT == MVT::i64; 5635 unsigned PtrByteSize = isPPC64 ? 8 : 4; 5636 5637 MachineFunction &MF = DAG.getMachineFunction(); 5638 5639 // Mark this function as potentially containing a function that contains a 5640 // tail call. As a consequence the frame pointer will be used for dynamicalloc 5641 // and restoring the callers stack pointer in this functions epilog. This is 5642 // done because by tail calling the called function might overwrite the value 5643 // in this function's (MF) stack pointer stack slot 0(SP). 5644 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5645 CallConv == CallingConv::Fast) 5646 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 5647 5648 // Count how many bytes are to be pushed on the stack, including the linkage 5649 // area, and parameter passing area. We start with 24/48 bytes, which is 5650 // prereserved space for [SP][CR][LR][3 x unused]. 5651 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 5652 unsigned NumBytes = LinkageSize; 5653 5654 // Add up all the space actually used. 5655 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 5656 // they all go in registers, but we must reserve stack space for them for 5657 // possible use by the caller. In varargs or 64-bit calls, parameters are 5658 // assigned stack space in order, with padding so Altivec parameters are 5659 // 16-byte aligned. 5660 unsigned nAltivecParamsAtEnd = 0; 5661 for (unsigned i = 0; i != NumOps; ++i) { 5662 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5663 EVT ArgVT = Outs[i].VT; 5664 // Varargs Altivec parameters are padded to a 16 byte boundary. 5665 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 5666 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 5667 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) { 5668 if (!isVarArg && !isPPC64) { 5669 // Non-varargs Altivec parameters go after all the non-Altivec 5670 // parameters; handle those later so we know how much padding we need. 5671 nAltivecParamsAtEnd++; 5672 continue; 5673 } 5674 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 5675 NumBytes = ((NumBytes+15)/16)*16; 5676 } 5677 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 5678 } 5679 5680 // Allow for Altivec parameters at the end, if needed. 5681 if (nAltivecParamsAtEnd) { 5682 NumBytes = ((NumBytes+15)/16)*16; 5683 NumBytes += 16*nAltivecParamsAtEnd; 5684 } 5685 5686 // The prolog code of the callee may store up to 8 GPR argument registers to 5687 // the stack, allowing va_start to index over them in memory if its varargs. 5688 // Because we cannot tell if this is needed on the caller side, we have to 5689 // conservatively assume that it is needed. As such, make sure we have at 5690 // least enough stack space for the caller to store the 8 GPRs. 5691 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 5692 5693 // Tail call needs the stack to be aligned. 5694 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5695 CallConv == CallingConv::Fast) 5696 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); 5697 5698 // Calculate by how many bytes the stack has to be adjusted in case of tail 5699 // call optimization. 5700 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 5701 5702 // To protect arguments on the stack from being clobbered in a tail call, 5703 // force all the loads to happen before doing any other lowering. 5704 if (isTailCall) 5705 Chain = DAG.getStackArgumentTokenFactor(Chain); 5706 5707 // Adjust the stack pointer for the new arguments... 5708 // These operations are automatically eliminated by the prolog/epilog pass 5709 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 5710 dl); 5711 SDValue CallSeqStart = Chain; 5712 5713 // Load the return address and frame pointer so it can be move somewhere else 5714 // later. 5715 SDValue LROp, FPOp; 5716 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 5717 5718 // Set up a copy of the stack pointer for use loading and storing any 5719 // arguments that may not fit in the registers available for argument 5720 // passing. 5721 SDValue StackPtr; 5722 if (isPPC64) 5723 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 5724 else 5725 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 5726 5727 // Figure out which arguments are going to go in registers, and which in 5728 // memory. Also, if this is a vararg function, floating point operations 5729 // must be stored to our stack, and loaded into integer regs as well, if 5730 // any integer regs are available for argument passing. 5731 unsigned ArgOffset = LinkageSize; 5732 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 5733 5734 static const MCPhysReg GPR_32[] = { // 32-bit registers. 5735 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 5736 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 5737 }; 5738 static const MCPhysReg GPR_64[] = { // 64-bit registers. 5739 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 5740 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 5741 }; 5742 static const MCPhysReg VR[] = { 5743 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 5744 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 5745 }; 5746 const unsigned NumGPRs = array_lengthof(GPR_32); 5747 const unsigned NumFPRs = 13; 5748 const unsigned NumVRs = array_lengthof(VR); 5749 5750 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 5751 5752 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 5753 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 5754 5755 SmallVector<SDValue, 8> MemOpChains; 5756 for (unsigned i = 0; i != NumOps; ++i) { 5757 SDValue Arg = OutVals[i]; 5758 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5759 5760 // PtrOff will be used to store the current argument to the stack if a 5761 // register cannot be found for it. 5762 SDValue PtrOff; 5763 5764 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); 5765 5766 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5767 5768 // On PPC64, promote integers to 64-bit values. 5769 if (isPPC64 && Arg.getValueType() == MVT::i32) { 5770 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 5771 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 5772 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 5773 } 5774 5775 // FIXME memcpy is used way more than necessary. Correctness first. 5776 // Note: "by value" is code for passing a structure by value, not 5777 // basic types. 5778 if (Flags.isByVal()) { 5779 unsigned Size = Flags.getByValSize(); 5780 // Very small objects are passed right-justified. Everything else is 5781 // passed left-justified. 5782 if (Size==1 || Size==2) { 5783 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 5784 if (GPR_idx != NumGPRs) { 5785 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 5786 MachinePointerInfo(), VT); 5787 MemOpChains.push_back(Load.getValue(1)); 5788 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5789 5790 ArgOffset += PtrByteSize; 5791 } else { 5792 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, 5793 PtrOff.getValueType()); 5794 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5795 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5796 CallSeqStart, 5797 Flags, DAG, dl); 5798 ArgOffset += PtrByteSize; 5799 } 5800 continue; 5801 } 5802 // Copy entire object into memory. There are cases where gcc-generated 5803 // code assumes it is there, even if it could be put entirely into 5804 // registers. (This is not what the doc says.) 5805 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 5806 CallSeqStart, 5807 Flags, DAG, dl); 5808 5809 // For small aggregates (Darwin only) and aggregates >= PtrByteSize, 5810 // copy the pieces of the object that fit into registers from the 5811 // parameter save area. 5812 for (unsigned j=0; j<Size; j+=PtrByteSize) { 5813 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); 5814 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 5815 if (GPR_idx != NumGPRs) { 5816 SDValue Load = 5817 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo()); 5818 MemOpChains.push_back(Load.getValue(1)); 5819 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5820 ArgOffset += PtrByteSize; 5821 } else { 5822 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 5823 break; 5824 } 5825 } 5826 continue; 5827 } 5828 5829 switch (Arg.getSimpleValueType().SimpleTy) { 5830 default: llvm_unreachable("Unexpected ValueType for argument!"); 5831 case MVT::i1: 5832 case MVT::i32: 5833 case MVT::i64: 5834 if (GPR_idx != NumGPRs) { 5835 if (Arg.getValueType() == MVT::i1) 5836 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg); 5837 5838 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 5839 } else { 5840 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5841 isPPC64, isTailCall, false, MemOpChains, 5842 TailCallArguments, dl); 5843 } 5844 ArgOffset += PtrByteSize; 5845 break; 5846 case MVT::f32: 5847 case MVT::f64: 5848 if (FPR_idx != NumFPRs) { 5849 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 5850 5851 if (isVarArg) { 5852 SDValue Store = 5853 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 5854 MemOpChains.push_back(Store); 5855 5856 // Float varargs are always shadowed in available integer registers 5857 if (GPR_idx != NumGPRs) { 5858 SDValue Load = 5859 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo()); 5860 MemOpChains.push_back(Load.getValue(1)); 5861 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5862 } 5863 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 5864 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); 5865 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 5866 SDValue Load = 5867 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo()); 5868 MemOpChains.push_back(Load.getValue(1)); 5869 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5870 } 5871 } else { 5872 // If we have any FPRs remaining, we may also have GPRs remaining. 5873 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 5874 // GPRs. 5875 if (GPR_idx != NumGPRs) 5876 ++GPR_idx; 5877 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 5878 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 5879 ++GPR_idx; 5880 } 5881 } else 5882 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5883 isPPC64, isTailCall, false, MemOpChains, 5884 TailCallArguments, dl); 5885 if (isPPC64) 5886 ArgOffset += 8; 5887 else 5888 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 5889 break; 5890 case MVT::v4f32: 5891 case MVT::v4i32: 5892 case MVT::v8i16: 5893 case MVT::v16i8: 5894 if (isVarArg) { 5895 // These go aligned on the stack, or in the corresponding R registers 5896 // when within range. The Darwin PPC ABI doc claims they also go in 5897 // V registers; in fact gcc does this only for arguments that are 5898 // prototyped, not for those that match the ... We do it for all 5899 // arguments, seems to work. 5900 while (ArgOffset % 16 !=0) { 5901 ArgOffset += PtrByteSize; 5902 if (GPR_idx != NumGPRs) 5903 GPR_idx++; 5904 } 5905 // We could elide this store in the case where the object fits 5906 // entirely in R registers. Maybe later. 5907 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 5908 DAG.getConstant(ArgOffset, dl, PtrVT)); 5909 SDValue Store = 5910 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 5911 MemOpChains.push_back(Store); 5912 if (VR_idx != NumVRs) { 5913 SDValue Load = 5914 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); 5915 MemOpChains.push_back(Load.getValue(1)); 5916 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 5917 } 5918 ArgOffset += 16; 5919 for (unsigned i=0; i<16; i+=PtrByteSize) { 5920 if (GPR_idx == NumGPRs) 5921 break; 5922 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5923 DAG.getConstant(i, dl, PtrVT)); 5924 SDValue Load = 5925 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 5926 MemOpChains.push_back(Load.getValue(1)); 5927 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5928 } 5929 break; 5930 } 5931 5932 // Non-varargs Altivec params generally go in registers, but have 5933 // stack space allocated at the end. 5934 if (VR_idx != NumVRs) { 5935 // Doesn't have GPR space allocated. 5936 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 5937 } else if (nAltivecParamsAtEnd==0) { 5938 // We are emitting Altivec params in order. 5939 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5940 isPPC64, isTailCall, true, MemOpChains, 5941 TailCallArguments, dl); 5942 ArgOffset += 16; 5943 } 5944 break; 5945 } 5946 } 5947 // If all Altivec parameters fit in registers, as they usually do, 5948 // they get stack space following the non-Altivec parameters. We 5949 // don't track this here because nobody below needs it. 5950 // If there are more Altivec parameters than fit in registers emit 5951 // the stores here. 5952 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 5953 unsigned j = 0; 5954 // Offset is aligned; skip 1st 12 params which go in V registers. 5955 ArgOffset = ((ArgOffset+15)/16)*16; 5956 ArgOffset += 12*16; 5957 for (unsigned i = 0; i != NumOps; ++i) { 5958 SDValue Arg = OutVals[i]; 5959 EVT ArgType = Outs[i].VT; 5960 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 5961 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 5962 if (++j > NumVRs) { 5963 SDValue PtrOff; 5964 // We are emitting Altivec params in order. 5965 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5966 isPPC64, isTailCall, true, MemOpChains, 5967 TailCallArguments, dl); 5968 ArgOffset += 16; 5969 } 5970 } 5971 } 5972 } 5973 5974 if (!MemOpChains.empty()) 5975 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5976 5977 // On Darwin, R12 must contain the address of an indirect callee. This does 5978 // not mean the MTCTR instruction must use R12; it's easier to model this as 5979 // an extra parameter, so do that. 5980 if (!isTailCall && 5981 !isFunctionGlobalAddress(Callee) && 5982 !isa<ExternalSymbolSDNode>(Callee) && 5983 !isBLACompatibleAddress(Callee, DAG)) 5984 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 5985 PPC::R12), Callee)); 5986 5987 // Build a sequence of copy-to-reg nodes chained together with token chain 5988 // and flag operands which copy the outgoing args into the appropriate regs. 5989 SDValue InFlag; 5990 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5991 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5992 RegsToPass[i].second, InFlag); 5993 InFlag = Chain.getValue(1); 5994 } 5995 5996 if (isTailCall) 5997 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 5998 TailCallArguments); 5999 6000 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, 6001 /* unused except on PPC64 ELFv1 */ false, DAG, 6002 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 6003 NumBytes, Ins, InVals, CS); 6004 } 6005 6006 bool 6007 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 6008 MachineFunction &MF, bool isVarArg, 6009 const SmallVectorImpl<ISD::OutputArg> &Outs, 6010 LLVMContext &Context) const { 6011 SmallVector<CCValAssign, 16> RVLocs; 6012 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 6013 return CCInfo.CheckReturn(Outs, RetCC_PPC); 6014 } 6015 6016 SDValue 6017 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 6018 bool isVarArg, 6019 const SmallVectorImpl<ISD::OutputArg> &Outs, 6020 const SmallVectorImpl<SDValue> &OutVals, 6021 const SDLoc &dl, SelectionDAG &DAG) const { 6022 6023 SmallVector<CCValAssign, 16> RVLocs; 6024 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 6025 *DAG.getContext()); 6026 CCInfo.AnalyzeReturn(Outs, RetCC_PPC); 6027 6028 SDValue Flag; 6029 SmallVector<SDValue, 4> RetOps(1, Chain); 6030 6031 // Copy the result values into the output registers. 6032 for (unsigned i = 0; i != RVLocs.size(); ++i) { 6033 CCValAssign &VA = RVLocs[i]; 6034 assert(VA.isRegLoc() && "Can only return in registers!"); 6035 6036 SDValue Arg = OutVals[i]; 6037 6038 switch (VA.getLocInfo()) { 6039 default: llvm_unreachable("Unknown loc info!"); 6040 case CCValAssign::Full: break; 6041 case CCValAssign::AExt: 6042 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 6043 break; 6044 case CCValAssign::ZExt: 6045 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 6046 break; 6047 case CCValAssign::SExt: 6048 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 6049 break; 6050 } 6051 6052 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 6053 Flag = Chain.getValue(1); 6054 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 6055 } 6056 6057 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 6058 const MCPhysReg *I = 6059 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); 6060 if (I) { 6061 for (; *I; ++I) { 6062 6063 if (PPC::G8RCRegClass.contains(*I)) 6064 RetOps.push_back(DAG.getRegister(*I, MVT::i64)); 6065 else if (PPC::F8RCRegClass.contains(*I)) 6066 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); 6067 else if (PPC::CRRCRegClass.contains(*I)) 6068 RetOps.push_back(DAG.getRegister(*I, MVT::i1)); 6069 else if (PPC::VRRCRegClass.contains(*I)) 6070 RetOps.push_back(DAG.getRegister(*I, MVT::Other)); 6071 else 6072 llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 6073 } 6074 } 6075 6076 RetOps[0] = Chain; // Update chain. 6077 6078 // Add the flag if we have it. 6079 if (Flag.getNode()) 6080 RetOps.push_back(Flag); 6081 6082 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); 6083 } 6084 6085 SDValue 6086 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, 6087 SelectionDAG &DAG) const { 6088 SDLoc dl(Op); 6089 6090 // Get the corect type for integers. 6091 EVT IntVT = Op.getValueType(); 6092 6093 // Get the inputs. 6094 SDValue Chain = Op.getOperand(0); 6095 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 6096 // Build a DYNAREAOFFSET node. 6097 SDValue Ops[2] = {Chain, FPSIdx}; 6098 SDVTList VTs = DAG.getVTList(IntVT); 6099 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops); 6100 } 6101 6102 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, 6103 SelectionDAG &DAG) const { 6104 // When we pop the dynamic allocation we need to restore the SP link. 6105 SDLoc dl(Op); 6106 6107 // Get the corect type for pointers. 6108 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6109 6110 // Construct the stack pointer operand. 6111 bool isPPC64 = Subtarget.isPPC64(); 6112 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 6113 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 6114 6115 // Get the operands for the STACKRESTORE. 6116 SDValue Chain = Op.getOperand(0); 6117 SDValue SaveSP = Op.getOperand(1); 6118 6119 // Load the old link SP. 6120 SDValue LoadLinkSP = 6121 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo()); 6122 6123 // Restore the stack pointer. 6124 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 6125 6126 // Store the old link SP. 6127 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo()); 6128 } 6129 6130 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const { 6131 MachineFunction &MF = DAG.getMachineFunction(); 6132 bool isPPC64 = Subtarget.isPPC64(); 6133 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6134 6135 // Get current frame pointer save index. The users of this index will be 6136 // primarily DYNALLOC instructions. 6137 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 6138 int RASI = FI->getReturnAddrSaveIndex(); 6139 6140 // If the frame pointer save index hasn't been defined yet. 6141 if (!RASI) { 6142 // Find out what the fix offset of the frame pointer save area. 6143 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset(); 6144 // Allocate the frame index for frame pointer save area. 6145 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false); 6146 // Save the result. 6147 FI->setReturnAddrSaveIndex(RASI); 6148 } 6149 return DAG.getFrameIndex(RASI, PtrVT); 6150 } 6151 6152 SDValue 6153 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 6154 MachineFunction &MF = DAG.getMachineFunction(); 6155 bool isPPC64 = Subtarget.isPPC64(); 6156 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6157 6158 // Get current frame pointer save index. The users of this index will be 6159 // primarily DYNALLOC instructions. 6160 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 6161 int FPSI = FI->getFramePointerSaveIndex(); 6162 6163 // If the frame pointer save index hasn't been defined yet. 6164 if (!FPSI) { 6165 // Find out what the fix offset of the frame pointer save area. 6166 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset(); 6167 // Allocate the frame index for frame pointer save area. 6168 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 6169 // Save the result. 6170 FI->setFramePointerSaveIndex(FPSI); 6171 } 6172 return DAG.getFrameIndex(FPSI, PtrVT); 6173 } 6174 6175 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 6176 SelectionDAG &DAG) const { 6177 // Get the inputs. 6178 SDValue Chain = Op.getOperand(0); 6179 SDValue Size = Op.getOperand(1); 6180 SDLoc dl(Op); 6181 6182 // Get the corect type for pointers. 6183 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6184 // Negate the size. 6185 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 6186 DAG.getConstant(0, dl, PtrVT), Size); 6187 // Construct a node for the frame pointer save index. 6188 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 6189 // Build a DYNALLOC node. 6190 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 6191 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 6192 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); 6193 } 6194 6195 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op, 6196 SelectionDAG &DAG) const { 6197 MachineFunction &MF = DAG.getMachineFunction(); 6198 6199 bool isPPC64 = Subtarget.isPPC64(); 6200 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6201 6202 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false); 6203 return DAG.getFrameIndex(FI, PtrVT); 6204 } 6205 6206 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, 6207 SelectionDAG &DAG) const { 6208 SDLoc DL(Op); 6209 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, 6210 DAG.getVTList(MVT::i32, MVT::Other), 6211 Op.getOperand(0), Op.getOperand(1)); 6212 } 6213 6214 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, 6215 SelectionDAG &DAG) const { 6216 SDLoc DL(Op); 6217 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, 6218 Op.getOperand(0), Op.getOperand(1)); 6219 } 6220 6221 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 6222 if (Op.getValueType().isVector()) 6223 return LowerVectorLoad(Op, DAG); 6224 6225 assert(Op.getValueType() == MVT::i1 && 6226 "Custom lowering only for i1 loads"); 6227 6228 // First, load 8 bits into 32 bits, then truncate to 1 bit. 6229 6230 SDLoc dl(Op); 6231 LoadSDNode *LD = cast<LoadSDNode>(Op); 6232 6233 SDValue Chain = LD->getChain(); 6234 SDValue BasePtr = LD->getBasePtr(); 6235 MachineMemOperand *MMO = LD->getMemOperand(); 6236 6237 SDValue NewLD = 6238 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain, 6239 BasePtr, MVT::i8, MMO); 6240 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); 6241 6242 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; 6243 return DAG.getMergeValues(Ops, dl); 6244 } 6245 6246 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 6247 if (Op.getOperand(1).getValueType().isVector()) 6248 return LowerVectorStore(Op, DAG); 6249 6250 assert(Op.getOperand(1).getValueType() == MVT::i1 && 6251 "Custom lowering only for i1 stores"); 6252 6253 // First, zero extend to 32 bits, then use a truncating store to 8 bits. 6254 6255 SDLoc dl(Op); 6256 StoreSDNode *ST = cast<StoreSDNode>(Op); 6257 6258 SDValue Chain = ST->getChain(); 6259 SDValue BasePtr = ST->getBasePtr(); 6260 SDValue Value = ST->getValue(); 6261 MachineMemOperand *MMO = ST->getMemOperand(); 6262 6263 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()), 6264 Value); 6265 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); 6266 } 6267 6268 // FIXME: Remove this once the ANDI glue bug is fixed: 6269 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { 6270 assert(Op.getValueType() == MVT::i1 && 6271 "Custom lowering only for i1 results"); 6272 6273 SDLoc DL(Op); 6274 return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, 6275 Op.getOperand(0)); 6276 } 6277 6278 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 6279 /// possible. 6280 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 6281 // Not FP? Not a fsel. 6282 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 6283 !Op.getOperand(2).getValueType().isFloatingPoint()) 6284 return Op; 6285 6286 // We might be able to do better than this under some circumstances, but in 6287 // general, fsel-based lowering of select is a finite-math-only optimization. 6288 // For more information, see section F.3 of the 2.06 ISA specification. 6289 if (!DAG.getTarget().Options.NoInfsFPMath || 6290 !DAG.getTarget().Options.NoNaNsFPMath) 6291 return Op; 6292 // TODO: Propagate flags from the select rather than global settings. 6293 SDNodeFlags Flags; 6294 Flags.setNoInfs(true); 6295 Flags.setNoNaNs(true); 6296 6297 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 6298 6299 EVT ResVT = Op.getValueType(); 6300 EVT CmpVT = Op.getOperand(0).getValueType(); 6301 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6302 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 6303 SDLoc dl(Op); 6304 6305 // If the RHS of the comparison is a 0.0, we don't need to do the 6306 // subtraction at all. 6307 SDValue Sel1; 6308 if (isFloatingPointZero(RHS)) 6309 switch (CC) { 6310 default: break; // SETUO etc aren't handled by fsel. 6311 case ISD::SETNE: 6312 std::swap(TV, FV); 6313 case ISD::SETEQ: 6314 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6315 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6316 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 6317 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 6318 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 6319 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6320 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); 6321 case ISD::SETULT: 6322 case ISD::SETLT: 6323 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 6324 case ISD::SETOGE: 6325 case ISD::SETGE: 6326 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6327 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6328 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 6329 case ISD::SETUGT: 6330 case ISD::SETGT: 6331 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 6332 case ISD::SETOLE: 6333 case ISD::SETLE: 6334 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6335 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6336 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6337 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 6338 } 6339 6340 SDValue Cmp; 6341 switch (CC) { 6342 default: break; // SETUO etc aren't handled by fsel. 6343 case ISD::SETNE: 6344 std::swap(TV, FV); 6345 case ISD::SETEQ: 6346 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); 6347 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6348 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6349 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6350 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 6351 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 6352 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6353 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); 6354 case ISD::SETULT: 6355 case ISD::SETLT: 6356 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); 6357 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6358 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6359 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 6360 case ISD::SETOGE: 6361 case ISD::SETGE: 6362 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); 6363 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6364 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6365 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6366 case ISD::SETUGT: 6367 case ISD::SETGT: 6368 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); 6369 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6370 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6371 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 6372 case ISD::SETOLE: 6373 case ISD::SETLE: 6374 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); 6375 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6376 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6377 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6378 } 6379 return Op; 6380 } 6381 6382 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, 6383 SelectionDAG &DAG, 6384 const SDLoc &dl) const { 6385 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 6386 SDValue Src = Op.getOperand(0); 6387 if (Src.getValueType() == MVT::f32) 6388 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 6389 6390 SDValue Tmp; 6391 switch (Op.getSimpleValueType().SimpleTy) { 6392 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 6393 case MVT::i32: 6394 Tmp = DAG.getNode( 6395 Op.getOpcode() == ISD::FP_TO_SINT 6396 ? PPCISD::FCTIWZ 6397 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), 6398 dl, MVT::f64, Src); 6399 break; 6400 case MVT::i64: 6401 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 6402 "i64 FP_TO_UINT is supported only with FPCVT"); 6403 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 6404 PPCISD::FCTIDUZ, 6405 dl, MVT::f64, Src); 6406 break; 6407 } 6408 6409 // Convert the FP value to an int value through memory. 6410 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && 6411 (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); 6412 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); 6413 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); 6414 MachinePointerInfo MPI = 6415 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); 6416 6417 // Emit a store to the stack slot. 6418 SDValue Chain; 6419 if (i32Stack) { 6420 MachineFunction &MF = DAG.getMachineFunction(); 6421 MachineMemOperand *MMO = 6422 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); 6423 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; 6424 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 6425 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); 6426 } else 6427 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI); 6428 6429 // Result is a load from the stack slot. If loading 4 bytes, make sure to 6430 // add in a bias on big endian. 6431 if (Op.getValueType() == MVT::i32 && !i32Stack) { 6432 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 6433 DAG.getConstant(4, dl, FIPtr.getValueType())); 6434 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4); 6435 } 6436 6437 RLI.Chain = Chain; 6438 RLI.Ptr = FIPtr; 6439 RLI.MPI = MPI; 6440 } 6441 6442 /// \brief Custom lowers floating point to integer conversions to use 6443 /// the direct move instructions available in ISA 2.07 to avoid the 6444 /// need for load/store combinations. 6445 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, 6446 SelectionDAG &DAG, 6447 const SDLoc &dl) const { 6448 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 6449 SDValue Src = Op.getOperand(0); 6450 6451 if (Src.getValueType() == MVT::f32) 6452 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 6453 6454 SDValue Tmp; 6455 switch (Op.getSimpleValueType().SimpleTy) { 6456 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 6457 case MVT::i32: 6458 Tmp = DAG.getNode( 6459 Op.getOpcode() == ISD::FP_TO_SINT 6460 ? PPCISD::FCTIWZ 6461 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), 6462 dl, MVT::f64, Src); 6463 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); 6464 break; 6465 case MVT::i64: 6466 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 6467 "i64 FP_TO_UINT is supported only with FPCVT"); 6468 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 6469 PPCISD::FCTIDUZ, 6470 dl, MVT::f64, Src); 6471 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); 6472 break; 6473 } 6474 return Tmp; 6475 } 6476 6477 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 6478 const SDLoc &dl) const { 6479 if (Subtarget.hasDirectMove() && Subtarget.isPPC64()) 6480 return LowerFP_TO_INTDirectMove(Op, DAG, dl); 6481 6482 ReuseLoadInfo RLI; 6483 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 6484 6485 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, 6486 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges); 6487 } 6488 6489 // We're trying to insert a regular store, S, and then a load, L. If the 6490 // incoming value, O, is a load, we might just be able to have our load use the 6491 // address used by O. However, we don't know if anything else will store to 6492 // that address before we can load from it. To prevent this situation, we need 6493 // to insert our load, L, into the chain as a peer of O. To do this, we give L 6494 // the same chain operand as O, we create a token factor from the chain results 6495 // of O and L, and we replace all uses of O's chain result with that token 6496 // factor (see spliceIntoChain below for this last part). 6497 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, 6498 ReuseLoadInfo &RLI, 6499 SelectionDAG &DAG, 6500 ISD::LoadExtType ET) const { 6501 SDLoc dl(Op); 6502 if (ET == ISD::NON_EXTLOAD && 6503 (Op.getOpcode() == ISD::FP_TO_UINT || 6504 Op.getOpcode() == ISD::FP_TO_SINT) && 6505 isOperationLegalOrCustom(Op.getOpcode(), 6506 Op.getOperand(0).getValueType())) { 6507 6508 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 6509 return true; 6510 } 6511 6512 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op); 6513 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() || 6514 LD->isNonTemporal()) 6515 return false; 6516 if (LD->getMemoryVT() != MemVT) 6517 return false; 6518 6519 RLI.Ptr = LD->getBasePtr(); 6520 if (LD->isIndexed() && !LD->getOffset().isUndef()) { 6521 assert(LD->getAddressingMode() == ISD::PRE_INC && 6522 "Non-pre-inc AM on PPC?"); 6523 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, 6524 LD->getOffset()); 6525 } 6526 6527 RLI.Chain = LD->getChain(); 6528 RLI.MPI = LD->getPointerInfo(); 6529 RLI.IsDereferenceable = LD->isDereferenceable(); 6530 RLI.IsInvariant = LD->isInvariant(); 6531 RLI.Alignment = LD->getAlignment(); 6532 RLI.AAInfo = LD->getAAInfo(); 6533 RLI.Ranges = LD->getRanges(); 6534 6535 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1); 6536 return true; 6537 } 6538 6539 // Given the head of the old chain, ResChain, insert a token factor containing 6540 // it and NewResChain, and make users of ResChain now be users of that token 6541 // factor. 6542 void PPCTargetLowering::spliceIntoChain(SDValue ResChain, 6543 SDValue NewResChain, 6544 SelectionDAG &DAG) const { 6545 if (!ResChain) 6546 return; 6547 6548 SDLoc dl(NewResChain); 6549 6550 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 6551 NewResChain, DAG.getUNDEF(MVT::Other)); 6552 assert(TF.getNode() != NewResChain.getNode() && 6553 "A new TF really is required here"); 6554 6555 DAG.ReplaceAllUsesOfValueWith(ResChain, TF); 6556 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); 6557 } 6558 6559 /// \brief Analyze profitability of direct move 6560 /// prefer float load to int load plus direct move 6561 /// when there is no integer use of int load 6562 static bool directMoveIsProfitable(const SDValue &Op) { 6563 SDNode *Origin = Op.getOperand(0).getNode(); 6564 if (Origin->getOpcode() != ISD::LOAD) 6565 return true; 6566 6567 for (SDNode::use_iterator UI = Origin->use_begin(), 6568 UE = Origin->use_end(); 6569 UI != UE; ++UI) { 6570 6571 // Only look at the users of the loaded value. 6572 if (UI.getUse().get().getResNo() != 0) 6573 continue; 6574 6575 if (UI->getOpcode() != ISD::SINT_TO_FP && 6576 UI->getOpcode() != ISD::UINT_TO_FP) 6577 return true; 6578 } 6579 6580 return false; 6581 } 6582 6583 /// \brief Custom lowers integer to floating point conversions to use 6584 /// the direct move instructions available in ISA 2.07 to avoid the 6585 /// need for load/store combinations. 6586 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, 6587 SelectionDAG &DAG, 6588 const SDLoc &dl) const { 6589 assert((Op.getValueType() == MVT::f32 || 6590 Op.getValueType() == MVT::f64) && 6591 "Invalid floating point type as target of conversion"); 6592 assert(Subtarget.hasFPCVT() && 6593 "Int to FP conversions with direct moves require FPCVT"); 6594 SDValue FP; 6595 SDValue Src = Op.getOperand(0); 6596 bool SinglePrec = Op.getValueType() == MVT::f32; 6597 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; 6598 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; 6599 unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) : 6600 (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU); 6601 6602 if (WordInt) { 6603 FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ, 6604 dl, MVT::f64, Src); 6605 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); 6606 } 6607 else { 6608 FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); 6609 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); 6610 } 6611 6612 return FP; 6613 } 6614 6615 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, 6616 SelectionDAG &DAG) const { 6617 SDLoc dl(Op); 6618 6619 if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) { 6620 if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64) 6621 return SDValue(); 6622 6623 SDValue Value = Op.getOperand(0); 6624 // The values are now known to be -1 (false) or 1 (true). To convert this 6625 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 6626 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 6627 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 6628 6629 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 6630 6631 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 6632 6633 if (Op.getValueType() != MVT::v4f64) 6634 Value = DAG.getNode(ISD::FP_ROUND, dl, 6635 Op.getValueType(), Value, 6636 DAG.getIntPtrConstant(1, dl)); 6637 return Value; 6638 } 6639 6640 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 6641 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 6642 return SDValue(); 6643 6644 if (Op.getOperand(0).getValueType() == MVT::i1) 6645 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), 6646 DAG.getConstantFP(1.0, dl, Op.getValueType()), 6647 DAG.getConstantFP(0.0, dl, Op.getValueType())); 6648 6649 // If we have direct moves, we can do all the conversion, skip the store/load 6650 // however, without FPCVT we can't do most conversions. 6651 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) && 6652 Subtarget.isPPC64() && Subtarget.hasFPCVT()) 6653 return LowerINT_TO_FPDirectMove(Op, DAG, dl); 6654 6655 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 6656 "UINT_TO_FP is supported only with FPCVT"); 6657 6658 // If we have FCFIDS, then use it when converting to single-precision. 6659 // Otherwise, convert to double-precision and then round. 6660 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 6661 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS 6662 : PPCISD::FCFIDS) 6663 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU 6664 : PPCISD::FCFID); 6665 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 6666 ? MVT::f32 6667 : MVT::f64; 6668 6669 if (Op.getOperand(0).getValueType() == MVT::i64) { 6670 SDValue SINT = Op.getOperand(0); 6671 // When converting to single-precision, we actually need to convert 6672 // to double-precision first and then round to single-precision. 6673 // To avoid double-rounding effects during that operation, we have 6674 // to prepare the input operand. Bits that might be truncated when 6675 // converting to double-precision are replaced by a bit that won't 6676 // be lost at this stage, but is below the single-precision rounding 6677 // position. 6678 // 6679 // However, if -enable-unsafe-fp-math is in effect, accept double 6680 // rounding to avoid the extra overhead. 6681 if (Op.getValueType() == MVT::f32 && 6682 !Subtarget.hasFPCVT() && 6683 !DAG.getTarget().Options.UnsafeFPMath) { 6684 6685 // Twiddle input to make sure the low 11 bits are zero. (If this 6686 // is the case, we are guaranteed the value will fit into the 53 bit 6687 // mantissa of an IEEE double-precision value without rounding.) 6688 // If any of those low 11 bits were not zero originally, make sure 6689 // bit 12 (value 2048) is set instead, so that the final rounding 6690 // to single-precision gets the correct result. 6691 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, 6692 SINT, DAG.getConstant(2047, dl, MVT::i64)); 6693 Round = DAG.getNode(ISD::ADD, dl, MVT::i64, 6694 Round, DAG.getConstant(2047, dl, MVT::i64)); 6695 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); 6696 Round = DAG.getNode(ISD::AND, dl, MVT::i64, 6697 Round, DAG.getConstant(-2048, dl, MVT::i64)); 6698 6699 // However, we cannot use that value unconditionally: if the magnitude 6700 // of the input value is small, the bit-twiddling we did above might 6701 // end up visibly changing the output. Fortunately, in that case, we 6702 // don't need to twiddle bits since the original input will convert 6703 // exactly to double-precision floating-point already. Therefore, 6704 // construct a conditional to use the original value if the top 11 6705 // bits are all sign-bit copies, and use the rounded value computed 6706 // above otherwise. 6707 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, 6708 SINT, DAG.getConstant(53, dl, MVT::i32)); 6709 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, 6710 Cond, DAG.getConstant(1, dl, MVT::i64)); 6711 Cond = DAG.getSetCC(dl, MVT::i32, 6712 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT); 6713 6714 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); 6715 } 6716 6717 ReuseLoadInfo RLI; 6718 SDValue Bits; 6719 6720 MachineFunction &MF = DAG.getMachineFunction(); 6721 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { 6722 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, 6723 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges); 6724 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6725 } else if (Subtarget.hasLFIWAX() && 6726 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) { 6727 MachineMemOperand *MMO = 6728 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6729 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6730 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6731 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, 6732 DAG.getVTList(MVT::f64, MVT::Other), 6733 Ops, MVT::i32, MMO); 6734 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6735 } else if (Subtarget.hasFPCVT() && 6736 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) { 6737 MachineMemOperand *MMO = 6738 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6739 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6740 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6741 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl, 6742 DAG.getVTList(MVT::f64, MVT::Other), 6743 Ops, MVT::i32, MMO); 6744 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6745 } else if (((Subtarget.hasLFIWAX() && 6746 SINT.getOpcode() == ISD::SIGN_EXTEND) || 6747 (Subtarget.hasFPCVT() && 6748 SINT.getOpcode() == ISD::ZERO_EXTEND)) && 6749 SINT.getOperand(0).getValueType() == MVT::i32) { 6750 MachineFrameInfo &MFI = MF.getFrameInfo(); 6751 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6752 6753 int FrameIdx = MFI.CreateStackObject(4, 4, false); 6754 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6755 6756 SDValue Store = 6757 DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, 6758 MachinePointerInfo::getFixedStack( 6759 DAG.getMachineFunction(), FrameIdx)); 6760 6761 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 6762 "Expected an i32 store"); 6763 6764 RLI.Ptr = FIdx; 6765 RLI.Chain = Store; 6766 RLI.MPI = 6767 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 6768 RLI.Alignment = 4; 6769 6770 MachineMemOperand *MMO = 6771 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6772 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6773 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6774 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ? 6775 PPCISD::LFIWZX : PPCISD::LFIWAX, 6776 dl, DAG.getVTList(MVT::f64, MVT::Other), 6777 Ops, MVT::i32, MMO); 6778 } else 6779 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); 6780 6781 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); 6782 6783 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 6784 FP = DAG.getNode(ISD::FP_ROUND, dl, 6785 MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); 6786 return FP; 6787 } 6788 6789 assert(Op.getOperand(0).getValueType() == MVT::i32 && 6790 "Unhandled INT_TO_FP type in custom expander!"); 6791 // Since we only generate this in 64-bit mode, we can take advantage of 6792 // 64-bit registers. In particular, sign extend the input value into the 6793 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 6794 // then lfd it and fcfid it. 6795 MachineFunction &MF = DAG.getMachineFunction(); 6796 MachineFrameInfo &MFI = MF.getFrameInfo(); 6797 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6798 6799 SDValue Ld; 6800 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { 6801 ReuseLoadInfo RLI; 6802 bool ReusingLoad; 6803 if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, 6804 DAG))) { 6805 int FrameIdx = MFI.CreateStackObject(4, 4, false); 6806 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6807 6808 SDValue Store = 6809 DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 6810 MachinePointerInfo::getFixedStack( 6811 DAG.getMachineFunction(), FrameIdx)); 6812 6813 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 6814 "Expected an i32 store"); 6815 6816 RLI.Ptr = FIdx; 6817 RLI.Chain = Store; 6818 RLI.MPI = 6819 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 6820 RLI.Alignment = 4; 6821 } 6822 6823 MachineMemOperand *MMO = 6824 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6825 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6826 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6827 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? 6828 PPCISD::LFIWZX : PPCISD::LFIWAX, 6829 dl, DAG.getVTList(MVT::f64, MVT::Other), 6830 Ops, MVT::i32, MMO); 6831 if (ReusingLoad) 6832 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); 6833 } else { 6834 assert(Subtarget.isPPC64() && 6835 "i32->FP without LFIWAX supported only on PPC64"); 6836 6837 int FrameIdx = MFI.CreateStackObject(8, 8, false); 6838 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6839 6840 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, 6841 Op.getOperand(0)); 6842 6843 // STD the extended value into the stack slot. 6844 SDValue Store = DAG.getStore( 6845 DAG.getEntryNode(), dl, Ext64, FIdx, 6846 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); 6847 6848 // Load the value as a double. 6849 Ld = DAG.getLoad( 6850 MVT::f64, dl, Store, FIdx, 6851 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); 6852 } 6853 6854 // FCFID it and return it. 6855 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); 6856 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 6857 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, 6858 DAG.getIntPtrConstant(0, dl)); 6859 return FP; 6860 } 6861 6862 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 6863 SelectionDAG &DAG) const { 6864 SDLoc dl(Op); 6865 /* 6866 The rounding mode is in bits 30:31 of FPSR, and has the following 6867 settings: 6868 00 Round to nearest 6869 01 Round to 0 6870 10 Round to +inf 6871 11 Round to -inf 6872 6873 FLT_ROUNDS, on the other hand, expects the following: 6874 -1 Undefined 6875 0 Round to 0 6876 1 Round to nearest 6877 2 Round to +inf 6878 3 Round to -inf 6879 6880 To perform the conversion, we do: 6881 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 6882 */ 6883 6884 MachineFunction &MF = DAG.getMachineFunction(); 6885 EVT VT = Op.getValueType(); 6886 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6887 6888 // Save FP Control Word to register 6889 EVT NodeTys[] = { 6890 MVT::f64, // return register 6891 MVT::Glue // unused in this context 6892 }; 6893 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); 6894 6895 // Save FP register to stack slot 6896 int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false); 6897 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 6898 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot, 6899 MachinePointerInfo()); 6900 6901 // Load FP Control Word from low 32 bits of stack slot. 6902 SDValue Four = DAG.getConstant(4, dl, PtrVT); 6903 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 6904 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo()); 6905 6906 // Transform as necessary 6907 SDValue CWD1 = 6908 DAG.getNode(ISD::AND, dl, MVT::i32, 6909 CWD, DAG.getConstant(3, dl, MVT::i32)); 6910 SDValue CWD2 = 6911 DAG.getNode(ISD::SRL, dl, MVT::i32, 6912 DAG.getNode(ISD::AND, dl, MVT::i32, 6913 DAG.getNode(ISD::XOR, dl, MVT::i32, 6914 CWD, DAG.getConstant(3, dl, MVT::i32)), 6915 DAG.getConstant(3, dl, MVT::i32)), 6916 DAG.getConstant(1, dl, MVT::i32)); 6917 6918 SDValue RetVal = 6919 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 6920 6921 return DAG.getNode((VT.getSizeInBits() < 16 ? 6922 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 6923 } 6924 6925 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 6926 EVT VT = Op.getValueType(); 6927 unsigned BitWidth = VT.getSizeInBits(); 6928 SDLoc dl(Op); 6929 assert(Op.getNumOperands() == 3 && 6930 VT == Op.getOperand(1).getValueType() && 6931 "Unexpected SHL!"); 6932 6933 // Expand into a bunch of logical ops. Note that these ops 6934 // depend on the PPC behavior for oversized shift amounts. 6935 SDValue Lo = Op.getOperand(0); 6936 SDValue Hi = Op.getOperand(1); 6937 SDValue Amt = Op.getOperand(2); 6938 EVT AmtVT = Amt.getValueType(); 6939 6940 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 6941 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 6942 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 6943 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 6944 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 6945 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 6946 DAG.getConstant(-BitWidth, dl, AmtVT)); 6947 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 6948 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 6949 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 6950 SDValue OutOps[] = { OutLo, OutHi }; 6951 return DAG.getMergeValues(OutOps, dl); 6952 } 6953 6954 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 6955 EVT VT = Op.getValueType(); 6956 SDLoc dl(Op); 6957 unsigned BitWidth = VT.getSizeInBits(); 6958 assert(Op.getNumOperands() == 3 && 6959 VT == Op.getOperand(1).getValueType() && 6960 "Unexpected SRL!"); 6961 6962 // Expand into a bunch of logical ops. Note that these ops 6963 // depend on the PPC behavior for oversized shift amounts. 6964 SDValue Lo = Op.getOperand(0); 6965 SDValue Hi = Op.getOperand(1); 6966 SDValue Amt = Op.getOperand(2); 6967 EVT AmtVT = Amt.getValueType(); 6968 6969 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 6970 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 6971 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 6972 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 6973 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 6974 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 6975 DAG.getConstant(-BitWidth, dl, AmtVT)); 6976 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 6977 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 6978 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 6979 SDValue OutOps[] = { OutLo, OutHi }; 6980 return DAG.getMergeValues(OutOps, dl); 6981 } 6982 6983 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 6984 SDLoc dl(Op); 6985 EVT VT = Op.getValueType(); 6986 unsigned BitWidth = VT.getSizeInBits(); 6987 assert(Op.getNumOperands() == 3 && 6988 VT == Op.getOperand(1).getValueType() && 6989 "Unexpected SRA!"); 6990 6991 // Expand into a bunch of logical ops, followed by a select_cc. 6992 SDValue Lo = Op.getOperand(0); 6993 SDValue Hi = Op.getOperand(1); 6994 SDValue Amt = Op.getOperand(2); 6995 EVT AmtVT = Amt.getValueType(); 6996 6997 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 6998 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 6999 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 7000 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 7001 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 7002 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 7003 DAG.getConstant(-BitWidth, dl, AmtVT)); 7004 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 7005 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 7006 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT), 7007 Tmp4, Tmp6, ISD::SETLE); 7008 SDValue OutOps[] = { OutLo, OutHi }; 7009 return DAG.getMergeValues(OutOps, dl); 7010 } 7011 7012 //===----------------------------------------------------------------------===// 7013 // Vector related lowering. 7014 // 7015 7016 /// BuildSplatI - Build a canonical splati of Val with an element size of 7017 /// SplatSize. Cast the result to VT. 7018 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 7019 SelectionDAG &DAG, const SDLoc &dl) { 7020 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 7021 7022 static const MVT VTys[] = { // canonical VT to use for each size. 7023 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 7024 }; 7025 7026 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 7027 7028 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 7029 if (Val == -1) 7030 SplatSize = 1; 7031 7032 EVT CanonicalVT = VTys[SplatSize-1]; 7033 7034 // Build a canonical splat for this value. 7035 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT)); 7036 } 7037 7038 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the 7039 /// specified intrinsic ID. 7040 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, 7041 const SDLoc &dl, EVT DestVT = MVT::Other) { 7042 if (DestVT == MVT::Other) DestVT = Op.getValueType(); 7043 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7044 DAG.getConstant(IID, dl, MVT::i32), Op); 7045 } 7046 7047 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the 7048 /// specified intrinsic ID. 7049 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 7050 SelectionDAG &DAG, const SDLoc &dl, 7051 EVT DestVT = MVT::Other) { 7052 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 7053 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7054 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS); 7055 } 7056 7057 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 7058 /// specified intrinsic ID. 7059 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 7060 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl, 7061 EVT DestVT = MVT::Other) { 7062 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 7063 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7064 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2); 7065 } 7066 7067 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 7068 /// amount. The result has the specified value type. 7069 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, 7070 SelectionDAG &DAG, const SDLoc &dl) { 7071 // Force LHS/RHS to be the right type. 7072 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 7073 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 7074 7075 int Ops[16]; 7076 for (unsigned i = 0; i != 16; ++i) 7077 Ops[i] = i + Amt; 7078 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 7079 return DAG.getNode(ISD::BITCAST, dl, VT, T); 7080 } 7081 7082 static bool isNonConstSplatBV(BuildVectorSDNode *BVN, EVT Type) { 7083 if (BVN->isConstant() || BVN->getValueType(0) != Type) 7084 return false; 7085 auto OpZero = BVN->getOperand(0); 7086 for (int i = 1, e = BVN->getNumOperands(); i < e; i++) 7087 if (BVN->getOperand(i) != OpZero) 7088 return false; 7089 return true; 7090 } 7091 7092 // If this is a case we can't handle, return null and let the default 7093 // expansion code take care of it. If we CAN select this case, and if it 7094 // selects to a single instruction, return Op. Otherwise, if we can codegen 7095 // this case more efficiently than a constant pool load, lower it to the 7096 // sequence of ops that should be used. 7097 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 7098 SelectionDAG &DAG) const { 7099 SDLoc dl(Op); 7100 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 7101 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 7102 7103 if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) { 7104 // We first build an i32 vector, load it into a QPX register, 7105 // then convert it to a floating-point vector and compare it 7106 // to a zero vector to get the boolean result. 7107 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 7108 int FrameIdx = MFI.CreateStackObject(16, 16, false); 7109 MachinePointerInfo PtrInfo = 7110 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 7111 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 7112 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7113 7114 assert(BVN->getNumOperands() == 4 && 7115 "BUILD_VECTOR for v4i1 does not have 4 operands"); 7116 7117 bool IsConst = true; 7118 for (unsigned i = 0; i < 4; ++i) { 7119 if (BVN->getOperand(i).isUndef()) continue; 7120 if (!isa<ConstantSDNode>(BVN->getOperand(i))) { 7121 IsConst = false; 7122 break; 7123 } 7124 } 7125 7126 if (IsConst) { 7127 Constant *One = 7128 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0); 7129 Constant *NegOne = 7130 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0); 7131 7132 Constant *CV[4]; 7133 for (unsigned i = 0; i < 4; ++i) { 7134 if (BVN->getOperand(i).isUndef()) 7135 CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext())); 7136 else if (isNullConstant(BVN->getOperand(i))) 7137 CV[i] = NegOne; 7138 else 7139 CV[i] = One; 7140 } 7141 7142 Constant *CP = ConstantVector::get(CV); 7143 SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), 7144 16 /* alignment */); 7145 7146 SDValue Ops[] = {DAG.getEntryNode(), CPIdx}; 7147 SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other}); 7148 return DAG.getMemIntrinsicNode( 7149 PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32, 7150 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 7151 } 7152 7153 SmallVector<SDValue, 4> Stores; 7154 for (unsigned i = 0; i < 4; ++i) { 7155 if (BVN->getOperand(i).isUndef()) continue; 7156 7157 unsigned Offset = 4*i; 7158 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 7159 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 7160 7161 unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize(); 7162 if (StoreSize > 4) { 7163 Stores.push_back( 7164 DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx, 7165 PtrInfo.getWithOffset(Offset), MVT::i32)); 7166 } else { 7167 SDValue StoreValue = BVN->getOperand(i); 7168 if (StoreSize < 4) 7169 StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue); 7170 7171 Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx, 7172 PtrInfo.getWithOffset(Offset))); 7173 } 7174 } 7175 7176 SDValue StoreChain; 7177 if (!Stores.empty()) 7178 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 7179 else 7180 StoreChain = DAG.getEntryNode(); 7181 7182 // Now load from v4i32 into the QPX register; this will extend it to 7183 // v4i64 but not yet convert it to a floating point. Nevertheless, this 7184 // is typed as v4f64 because the QPX register integer states are not 7185 // explicitly represented. 7186 7187 SDValue Ops[] = {StoreChain, 7188 DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32), 7189 FIdx}; 7190 SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other}); 7191 7192 SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, 7193 dl, VTs, Ops, MVT::v4i32, PtrInfo); 7194 LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 7195 DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32), 7196 LoadedVect); 7197 7198 SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64); 7199 7200 return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ); 7201 } 7202 7203 // All other QPX vectors are handled by generic code. 7204 if (Subtarget.hasQPX()) 7205 return SDValue(); 7206 7207 // Check if this is a splat of a constant value. 7208 APInt APSplatBits, APSplatUndef; 7209 unsigned SplatBitSize; 7210 bool HasAnyUndefs; 7211 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 7212 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || 7213 SplatBitSize > 32) { 7214 // We can splat a non-const value on CPU's that implement ISA 3.0 7215 // in two ways: LXVWSX (load and splat) and MTVSRWS(move and splat). 7216 auto OpZero = BVN->getOperand(0); 7217 bool CanLoadAndSplat = OpZero.getOpcode() == ISD::LOAD && 7218 BVN->isOnlyUserOf(OpZero.getNode()); 7219 if (Subtarget.isISA3_0() && !CanLoadAndSplat && 7220 (isNonConstSplatBV(BVN, MVT::v4i32) || 7221 isNonConstSplatBV(BVN, MVT::v2i64))) 7222 return Op; 7223 return SDValue(); 7224 } 7225 7226 unsigned SplatBits = APSplatBits.getZExtValue(); 7227 unsigned SplatUndef = APSplatUndef.getZExtValue(); 7228 unsigned SplatSize = SplatBitSize / 8; 7229 7230 // First, handle single instruction cases. 7231 7232 // All zeros? 7233 if (SplatBits == 0) { 7234 // Canonicalize all zero vectors to be v4i32. 7235 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 7236 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32); 7237 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 7238 } 7239 return Op; 7240 } 7241 7242 // We have XXSPLTIB for constant splats one byte wide 7243 if (Subtarget.isISA3_0() && Op.getValueType() == MVT::v16i8) 7244 return Op; 7245 7246 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 7247 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 7248 (32-SplatBitSize)); 7249 if (SextVal >= -16 && SextVal <= 15) 7250 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 7251 7252 // Two instruction sequences. 7253 7254 // If this value is in the range [-32,30] and is even, use: 7255 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) 7256 // If this value is in the range [17,31] and is odd, use: 7257 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) 7258 // If this value is in the range [-31,-17] and is odd, use: 7259 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) 7260 // Note the last two are three-instruction sequences. 7261 if (SextVal >= -32 && SextVal <= 31) { 7262 // To avoid having these optimizations undone by constant folding, 7263 // we convert to a pseudo that will be expanded later into one of 7264 // the above forms. 7265 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32); 7266 EVT VT = (SplatSize == 1 ? MVT::v16i8 : 7267 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); 7268 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32); 7269 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); 7270 if (VT == Op.getValueType()) 7271 return RetVal; 7272 else 7273 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); 7274 } 7275 7276 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 7277 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 7278 // for fneg/fabs. 7279 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 7280 // Make -1 and vspltisw -1: 7281 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 7282 7283 // Make the VSLW intrinsic, computing 0x8000_0000. 7284 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 7285 OnesV, DAG, dl); 7286 7287 // xor by OnesV to invert it. 7288 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 7289 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7290 } 7291 7292 // Check to see if this is a wide variety of vsplti*, binop self cases. 7293 static const signed char SplatCsts[] = { 7294 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 7295 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 7296 }; 7297 7298 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 7299 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 7300 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 7301 int i = SplatCsts[idx]; 7302 7303 // Figure out what shift amount will be used by altivec if shifted by i in 7304 // this splat size. 7305 unsigned TypeShiftAmt = i & (SplatBitSize-1); 7306 7307 // vsplti + shl self. 7308 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { 7309 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7310 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7311 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 7312 Intrinsic::ppc_altivec_vslw 7313 }; 7314 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7315 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7316 } 7317 7318 // vsplti + srl self. 7319 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 7320 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7321 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7322 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 7323 Intrinsic::ppc_altivec_vsrw 7324 }; 7325 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7326 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7327 } 7328 7329 // vsplti + sra self. 7330 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 7331 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7332 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7333 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 7334 Intrinsic::ppc_altivec_vsraw 7335 }; 7336 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7337 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7338 } 7339 7340 // vsplti + rol self. 7341 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 7342 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 7343 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7344 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7345 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 7346 Intrinsic::ppc_altivec_vrlw 7347 }; 7348 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7349 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7350 } 7351 7352 // t = vsplti c, result = vsldoi t, t, 1 7353 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { 7354 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 7355 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1; 7356 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 7357 } 7358 // t = vsplti c, result = vsldoi t, t, 2 7359 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { 7360 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 7361 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2; 7362 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 7363 } 7364 // t = vsplti c, result = vsldoi t, t, 3 7365 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 7366 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 7367 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3; 7368 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 7369 } 7370 } 7371 7372 return SDValue(); 7373 } 7374 7375 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 7376 /// the specified operations to build the shuffle. 7377 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 7378 SDValue RHS, SelectionDAG &DAG, 7379 const SDLoc &dl) { 7380 unsigned OpNum = (PFEntry >> 26) & 0x0F; 7381 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 7382 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 7383 7384 enum { 7385 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 7386 OP_VMRGHW, 7387 OP_VMRGLW, 7388 OP_VSPLTISW0, 7389 OP_VSPLTISW1, 7390 OP_VSPLTISW2, 7391 OP_VSPLTISW3, 7392 OP_VSLDOI4, 7393 OP_VSLDOI8, 7394 OP_VSLDOI12 7395 }; 7396 7397 if (OpNum == OP_COPY) { 7398 if (LHSID == (1*9+2)*9+3) return LHS; 7399 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 7400 return RHS; 7401 } 7402 7403 SDValue OpLHS, OpRHS; 7404 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 7405 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 7406 7407 int ShufIdxs[16]; 7408 switch (OpNum) { 7409 default: llvm_unreachable("Unknown i32 permute!"); 7410 case OP_VMRGHW: 7411 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 7412 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 7413 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 7414 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 7415 break; 7416 case OP_VMRGLW: 7417 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 7418 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 7419 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 7420 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 7421 break; 7422 case OP_VSPLTISW0: 7423 for (unsigned i = 0; i != 16; ++i) 7424 ShufIdxs[i] = (i&3)+0; 7425 break; 7426 case OP_VSPLTISW1: 7427 for (unsigned i = 0; i != 16; ++i) 7428 ShufIdxs[i] = (i&3)+4; 7429 break; 7430 case OP_VSPLTISW2: 7431 for (unsigned i = 0; i != 16; ++i) 7432 ShufIdxs[i] = (i&3)+8; 7433 break; 7434 case OP_VSPLTISW3: 7435 for (unsigned i = 0; i != 16; ++i) 7436 ShufIdxs[i] = (i&3)+12; 7437 break; 7438 case OP_VSLDOI4: 7439 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 7440 case OP_VSLDOI8: 7441 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 7442 case OP_VSLDOI12: 7443 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 7444 } 7445 EVT VT = OpLHS.getValueType(); 7446 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 7447 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 7448 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 7449 return DAG.getNode(ISD::BITCAST, dl, VT, T); 7450 } 7451 7452 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 7453 /// is a shuffle we can handle in a single instruction, return it. Otherwise, 7454 /// return the code it can be lowered into. Worst case, it can always be 7455 /// lowered into a vperm. 7456 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 7457 SelectionDAG &DAG) const { 7458 SDLoc dl(Op); 7459 SDValue V1 = Op.getOperand(0); 7460 SDValue V2 = Op.getOperand(1); 7461 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 7462 EVT VT = Op.getValueType(); 7463 bool isLittleEndian = Subtarget.isLittleEndian(); 7464 7465 unsigned ShiftElts, InsertAtByte; 7466 bool Swap; 7467 if (Subtarget.hasP9Vector() && 7468 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, 7469 isLittleEndian)) { 7470 if (Swap) 7471 std::swap(V1, V2); 7472 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); 7473 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2); 7474 if (ShiftElts) { 7475 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2, 7476 DAG.getConstant(ShiftElts, dl, MVT::i32)); 7477 SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl, 7478 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 7479 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); 7480 } 7481 SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2, 7482 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 7483 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); 7484 } 7485 7486 if (Subtarget.hasVSX()) { 7487 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { 7488 int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); 7489 7490 // If the source for the shuffle is a scalar_to_vector that came from a 7491 // 32-bit load, it will have used LXVWSX so we don't need to splat again. 7492 if (Subtarget.isISA3_0() && 7493 ((isLittleEndian && SplatIdx == 3) || 7494 (!isLittleEndian && SplatIdx == 0))) { 7495 SDValue Src = V1.getOperand(0); 7496 if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR && 7497 Src.getOperand(0).getOpcode() == ISD::LOAD && 7498 Src.getOperand(0).hasOneUse()) 7499 return V1; 7500 } 7501 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); 7502 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, 7503 DAG.getConstant(SplatIdx, dl, MVT::i32)); 7504 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat); 7505 } 7506 7507 // Left shifts of 8 bytes are actually swaps. Convert accordingly. 7508 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) { 7509 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); 7510 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv); 7511 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap); 7512 } 7513 7514 } 7515 7516 if (Subtarget.hasQPX()) { 7517 if (VT.getVectorNumElements() != 4) 7518 return SDValue(); 7519 7520 if (V2.isUndef()) V2 = V1; 7521 7522 int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp); 7523 if (AlignIdx != -1) { 7524 return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2, 7525 DAG.getConstant(AlignIdx, dl, MVT::i32)); 7526 } else if (SVOp->isSplat()) { 7527 int SplatIdx = SVOp->getSplatIndex(); 7528 if (SplatIdx >= 4) { 7529 std::swap(V1, V2); 7530 SplatIdx -= 4; 7531 } 7532 7533 return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1, 7534 DAG.getConstant(SplatIdx, dl, MVT::i32)); 7535 } 7536 7537 // Lower this into a qvgpci/qvfperm pair. 7538 7539 // Compute the qvgpci literal 7540 unsigned idx = 0; 7541 for (unsigned i = 0; i < 4; ++i) { 7542 int m = SVOp->getMaskElt(i); 7543 unsigned mm = m >= 0 ? (unsigned) m : i; 7544 idx |= mm << (3-i)*3; 7545 } 7546 7547 SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64, 7548 DAG.getConstant(idx, dl, MVT::i32)); 7549 return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3); 7550 } 7551 7552 // Cases that are handled by instructions that take permute immediates 7553 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 7554 // selected by the instruction selector. 7555 if (V2.isUndef()) { 7556 if (PPC::isSplatShuffleMask(SVOp, 1) || 7557 PPC::isSplatShuffleMask(SVOp, 2) || 7558 PPC::isSplatShuffleMask(SVOp, 4) || 7559 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || 7560 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || 7561 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || 7562 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || 7563 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || 7564 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || 7565 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || 7566 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || 7567 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || 7568 (Subtarget.hasP8Altivec() && ( 7569 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || 7570 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || 7571 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) { 7572 return Op; 7573 } 7574 } 7575 7576 // Altivec has a variety of "shuffle immediates" that take two vector inputs 7577 // and produce a fixed permutation. If any of these match, do not lower to 7578 // VPERM. 7579 unsigned int ShuffleKind = isLittleEndian ? 2 : 0; 7580 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || 7581 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || 7582 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || 7583 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || 7584 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || 7585 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || 7586 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || 7587 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || 7588 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || 7589 (Subtarget.hasP8Altivec() && ( 7590 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || 7591 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || 7592 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)))) 7593 return Op; 7594 7595 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 7596 // perfect shuffle table to emit an optimal matching sequence. 7597 ArrayRef<int> PermMask = SVOp->getMask(); 7598 7599 unsigned PFIndexes[4]; 7600 bool isFourElementShuffle = true; 7601 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 7602 unsigned EltNo = 8; // Start out undef. 7603 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 7604 if (PermMask[i*4+j] < 0) 7605 continue; // Undef, ignore it. 7606 7607 unsigned ByteSource = PermMask[i*4+j]; 7608 if ((ByteSource & 3) != j) { 7609 isFourElementShuffle = false; 7610 break; 7611 } 7612 7613 if (EltNo == 8) { 7614 EltNo = ByteSource/4; 7615 } else if (EltNo != ByteSource/4) { 7616 isFourElementShuffle = false; 7617 break; 7618 } 7619 } 7620 PFIndexes[i] = EltNo; 7621 } 7622 7623 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 7624 // perfect shuffle vector to determine if it is cost effective to do this as 7625 // discrete instructions, or whether we should use a vperm. 7626 // For now, we skip this for little endian until such time as we have a 7627 // little-endian perfect shuffle table. 7628 if (isFourElementShuffle && !isLittleEndian) { 7629 // Compute the index in the perfect shuffle table. 7630 unsigned PFTableIndex = 7631 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 7632 7633 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 7634 unsigned Cost = (PFEntry >> 30); 7635 7636 // Determining when to avoid vperm is tricky. Many things affect the cost 7637 // of vperm, particularly how many times the perm mask needs to be computed. 7638 // For example, if the perm mask can be hoisted out of a loop or is already 7639 // used (perhaps because there are multiple permutes with the same shuffle 7640 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 7641 // the loop requires an extra register. 7642 // 7643 // As a compromise, we only emit discrete instructions if the shuffle can be 7644 // generated in 3 or fewer operations. When we have loop information 7645 // available, if this block is within a loop, we should avoid using vperm 7646 // for 3-operation perms and use a constant pool load instead. 7647 if (Cost < 3) 7648 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 7649 } 7650 7651 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 7652 // vector that will get spilled to the constant pool. 7653 if (V2.isUndef()) V2 = V1; 7654 7655 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 7656 // that it is in input element units, not in bytes. Convert now. 7657 7658 // For little endian, the order of the input vectors is reversed, and 7659 // the permutation mask is complemented with respect to 31. This is 7660 // necessary to produce proper semantics with the big-endian-biased vperm 7661 // instruction. 7662 EVT EltVT = V1.getValueType().getVectorElementType(); 7663 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 7664 7665 SmallVector<SDValue, 16> ResultMask; 7666 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 7667 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 7668 7669 for (unsigned j = 0; j != BytesPerElement; ++j) 7670 if (isLittleEndian) 7671 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j), 7672 dl, MVT::i32)); 7673 else 7674 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl, 7675 MVT::i32)); 7676 } 7677 7678 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask); 7679 if (isLittleEndian) 7680 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 7681 V2, V1, VPermMask); 7682 else 7683 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 7684 V1, V2, VPermMask); 7685 } 7686 7687 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a 7688 /// vector comparison. If it is, return true and fill in Opc/isDot with 7689 /// information about the intrinsic. 7690 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, 7691 bool &isDot, const PPCSubtarget &Subtarget) { 7692 unsigned IntrinsicID = 7693 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 7694 CompareOpc = -1; 7695 isDot = false; 7696 switch (IntrinsicID) { 7697 default: return false; 7698 // Comparison predicates. 7699 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 7700 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 7701 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 7702 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 7703 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 7704 case Intrinsic::ppc_altivec_vcmpequd_p: 7705 if (Subtarget.hasP8Altivec()) { 7706 CompareOpc = 199; 7707 isDot = 1; 7708 } else 7709 return false; 7710 7711 break; 7712 case Intrinsic::ppc_altivec_vcmpneb_p: 7713 case Intrinsic::ppc_altivec_vcmpneh_p: 7714 case Intrinsic::ppc_altivec_vcmpnew_p: 7715 case Intrinsic::ppc_altivec_vcmpnezb_p: 7716 case Intrinsic::ppc_altivec_vcmpnezh_p: 7717 case Intrinsic::ppc_altivec_vcmpnezw_p: 7718 if (Subtarget.hasP9Altivec()) { 7719 switch(IntrinsicID) { 7720 default: llvm_unreachable("Unknown comparison intrinsic."); 7721 case Intrinsic::ppc_altivec_vcmpneb_p: CompareOpc = 7; break; 7722 case Intrinsic::ppc_altivec_vcmpneh_p: CompareOpc = 71; break; 7723 case Intrinsic::ppc_altivec_vcmpnew_p: CompareOpc = 135; break; 7724 case Intrinsic::ppc_altivec_vcmpnezb_p: CompareOpc = 263; break; 7725 case Intrinsic::ppc_altivec_vcmpnezh_p: CompareOpc = 327; break; 7726 case Intrinsic::ppc_altivec_vcmpnezw_p: CompareOpc = 391; break; 7727 } 7728 isDot = 1; 7729 } else 7730 return false; 7731 7732 break; 7733 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 7734 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 7735 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 7736 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 7737 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 7738 case Intrinsic::ppc_altivec_vcmpgtsd_p: 7739 if (Subtarget.hasP8Altivec()) { 7740 CompareOpc = 967; 7741 isDot = 1; 7742 } else 7743 return false; 7744 7745 break; 7746 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 7747 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 7748 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 7749 case Intrinsic::ppc_altivec_vcmpgtud_p: 7750 if (Subtarget.hasP8Altivec()) { 7751 CompareOpc = 711; 7752 isDot = 1; 7753 } else 7754 return false; 7755 7756 break; 7757 // VSX predicate comparisons use the same infrastructure 7758 case Intrinsic::ppc_vsx_xvcmpeqdp_p: 7759 case Intrinsic::ppc_vsx_xvcmpgedp_p: 7760 case Intrinsic::ppc_vsx_xvcmpgtdp_p: 7761 case Intrinsic::ppc_vsx_xvcmpeqsp_p: 7762 case Intrinsic::ppc_vsx_xvcmpgesp_p: 7763 case Intrinsic::ppc_vsx_xvcmpgtsp_p: 7764 if (Subtarget.hasVSX()) { 7765 switch (IntrinsicID) { 7766 case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break; 7767 case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break; 7768 case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break; 7769 case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break; 7770 case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break; 7771 case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break; 7772 } 7773 isDot = 1; 7774 } 7775 else 7776 return false; 7777 7778 break; 7779 7780 // Normal Comparisons. 7781 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 7782 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 7783 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 7784 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 7785 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 7786 case Intrinsic::ppc_altivec_vcmpequd: 7787 if (Subtarget.hasP8Altivec()) { 7788 CompareOpc = 199; 7789 isDot = 0; 7790 } else 7791 return false; 7792 7793 break; 7794 case Intrinsic::ppc_altivec_vcmpneb: 7795 case Intrinsic::ppc_altivec_vcmpneh: 7796 case Intrinsic::ppc_altivec_vcmpnew: 7797 case Intrinsic::ppc_altivec_vcmpnezb: 7798 case Intrinsic::ppc_altivec_vcmpnezh: 7799 case Intrinsic::ppc_altivec_vcmpnezw: 7800 if (Subtarget.hasP9Altivec()) { 7801 switch (IntrinsicID) { 7802 default: llvm_unreachable("Unknown comparison intrinsic."); 7803 case Intrinsic::ppc_altivec_vcmpneb: CompareOpc = 7; break; 7804 case Intrinsic::ppc_altivec_vcmpneh: CompareOpc = 71; break; 7805 case Intrinsic::ppc_altivec_vcmpnew: CompareOpc = 135; break; 7806 case Intrinsic::ppc_altivec_vcmpnezb: CompareOpc = 263; break; 7807 case Intrinsic::ppc_altivec_vcmpnezh: CompareOpc = 327; break; 7808 case Intrinsic::ppc_altivec_vcmpnezw: CompareOpc = 391; break; 7809 } 7810 isDot = 0; 7811 } else 7812 return false; 7813 break; 7814 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 7815 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 7816 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 7817 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 7818 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 7819 case Intrinsic::ppc_altivec_vcmpgtsd: 7820 if (Subtarget.hasP8Altivec()) { 7821 CompareOpc = 967; 7822 isDot = 0; 7823 } else 7824 return false; 7825 7826 break; 7827 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 7828 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 7829 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 7830 case Intrinsic::ppc_altivec_vcmpgtud: 7831 if (Subtarget.hasP8Altivec()) { 7832 CompareOpc = 711; 7833 isDot = 0; 7834 } else 7835 return false; 7836 7837 break; 7838 } 7839 return true; 7840 } 7841 7842 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 7843 /// lower, do it, otherwise return null. 7844 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 7845 SelectionDAG &DAG) const { 7846 unsigned IntrinsicID = 7847 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 7848 7849 if (IntrinsicID == Intrinsic::thread_pointer) { 7850 // Reads the thread pointer register, used for __builtin_thread_pointer. 7851 bool is64bit = Subtarget.isPPC64(); 7852 return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 7853 is64bit ? MVT::i64 : MVT::i32); 7854 } 7855 7856 // If this is a lowered altivec predicate compare, CompareOpc is set to the 7857 // opcode number of the comparison. 7858 SDLoc dl(Op); 7859 int CompareOpc; 7860 bool isDot; 7861 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget)) 7862 return SDValue(); // Don't custom lower most intrinsics. 7863 7864 // If this is a non-dot comparison, make the VCMP node and we are done. 7865 if (!isDot) { 7866 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 7867 Op.getOperand(1), Op.getOperand(2), 7868 DAG.getConstant(CompareOpc, dl, MVT::i32)); 7869 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 7870 } 7871 7872 // Create the PPCISD altivec 'dot' comparison node. 7873 SDValue Ops[] = { 7874 Op.getOperand(2), // LHS 7875 Op.getOperand(3), // RHS 7876 DAG.getConstant(CompareOpc, dl, MVT::i32) 7877 }; 7878 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; 7879 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 7880 7881 // Now that we have the comparison, emit a copy from the CR to a GPR. 7882 // This is flagged to the above dot comparison. 7883 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, 7884 DAG.getRegister(PPC::CR6, MVT::i32), 7885 CompNode.getValue(1)); 7886 7887 // Unpack the result based on how the target uses it. 7888 unsigned BitNo; // Bit # of CR6. 7889 bool InvertBit; // Invert result? 7890 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 7891 default: // Can't happen, don't crash on invalid number though. 7892 case 0: // Return the value of the EQ bit of CR6. 7893 BitNo = 0; InvertBit = false; 7894 break; 7895 case 1: // Return the inverted value of the EQ bit of CR6. 7896 BitNo = 0; InvertBit = true; 7897 break; 7898 case 2: // Return the value of the LT bit of CR6. 7899 BitNo = 2; InvertBit = false; 7900 break; 7901 case 3: // Return the inverted value of the LT bit of CR6. 7902 BitNo = 2; InvertBit = true; 7903 break; 7904 } 7905 7906 // Shift the bit into the low position. 7907 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 7908 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32)); 7909 // Isolate the bit. 7910 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 7911 DAG.getConstant(1, dl, MVT::i32)); 7912 7913 // If we are supposed to, toggle the bit. 7914 if (InvertBit) 7915 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 7916 DAG.getConstant(1, dl, MVT::i32)); 7917 return Flags; 7918 } 7919 7920 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 7921 SelectionDAG &DAG) const { 7922 SDLoc dl(Op); 7923 // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int 7924 // instructions), but for smaller types, we need to first extend up to v2i32 7925 // before doing going farther. 7926 if (Op.getValueType() == MVT::v2i64) { 7927 EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 7928 if (ExtVT != MVT::v2i32) { 7929 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); 7930 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, 7931 DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), 7932 ExtVT.getVectorElementType(), 4))); 7933 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); 7934 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, 7935 DAG.getValueType(MVT::v2i32)); 7936 } 7937 7938 return Op; 7939 } 7940 7941 return SDValue(); 7942 } 7943 7944 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 7945 SelectionDAG &DAG) const { 7946 SDLoc dl(Op); 7947 // Create a stack slot that is 16-byte aligned. 7948 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 7949 int FrameIdx = MFI.CreateStackObject(16, 16, false); 7950 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 7951 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7952 7953 // Store the input value into Value#0 of the stack slot. 7954 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 7955 MachinePointerInfo()); 7956 // Load it out. 7957 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo()); 7958 } 7959 7960 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, 7961 SelectionDAG &DAG) const { 7962 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && 7963 "Should only be called for ISD::INSERT_VECTOR_ELT"); 7964 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 7965 // We have legal lowering for constant indices but not for variable ones. 7966 if (C) 7967 return Op; 7968 return SDValue(); 7969 } 7970 7971 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, 7972 SelectionDAG &DAG) const { 7973 SDLoc dl(Op); 7974 SDNode *N = Op.getNode(); 7975 7976 assert(N->getOperand(0).getValueType() == MVT::v4i1 && 7977 "Unknown extract_vector_elt type"); 7978 7979 SDValue Value = N->getOperand(0); 7980 7981 // The first part of this is like the store lowering except that we don't 7982 // need to track the chain. 7983 7984 // The values are now known to be -1 (false) or 1 (true). To convert this 7985 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 7986 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 7987 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 7988 7989 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to 7990 // understand how to form the extending load. 7991 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 7992 7993 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 7994 7995 // Now convert to an integer and store. 7996 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 7997 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), 7998 Value); 7999 8000 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 8001 int FrameIdx = MFI.CreateStackObject(16, 16, false); 8002 MachinePointerInfo PtrInfo = 8003 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 8004 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 8005 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 8006 8007 SDValue StoreChain = DAG.getEntryNode(); 8008 SDValue Ops[] = {StoreChain, 8009 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), 8010 Value, FIdx}; 8011 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); 8012 8013 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, 8014 dl, VTs, Ops, MVT::v4i32, PtrInfo); 8015 8016 // Extract the value requested. 8017 unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 8018 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 8019 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 8020 8021 SDValue IntVal = 8022 DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset)); 8023 8024 if (!Subtarget.useCRBits()) 8025 return IntVal; 8026 8027 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal); 8028 } 8029 8030 /// Lowering for QPX v4i1 loads 8031 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, 8032 SelectionDAG &DAG) const { 8033 SDLoc dl(Op); 8034 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode()); 8035 SDValue LoadChain = LN->getChain(); 8036 SDValue BasePtr = LN->getBasePtr(); 8037 8038 if (Op.getValueType() == MVT::v4f64 || 8039 Op.getValueType() == MVT::v4f32) { 8040 EVT MemVT = LN->getMemoryVT(); 8041 unsigned Alignment = LN->getAlignment(); 8042 8043 // If this load is properly aligned, then it is legal. 8044 if (Alignment >= MemVT.getStoreSize()) 8045 return Op; 8046 8047 EVT ScalarVT = Op.getValueType().getScalarType(), 8048 ScalarMemVT = MemVT.getScalarType(); 8049 unsigned Stride = ScalarMemVT.getStoreSize(); 8050 8051 SDValue Vals[4], LoadChains[4]; 8052 for (unsigned Idx = 0; Idx < 4; ++Idx) { 8053 SDValue Load; 8054 if (ScalarVT != ScalarMemVT) 8055 Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain, 8056 BasePtr, 8057 LN->getPointerInfo().getWithOffset(Idx * Stride), 8058 ScalarMemVT, MinAlign(Alignment, Idx * Stride), 8059 LN->getMemOperand()->getFlags(), LN->getAAInfo()); 8060 else 8061 Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr, 8062 LN->getPointerInfo().getWithOffset(Idx * Stride), 8063 MinAlign(Alignment, Idx * Stride), 8064 LN->getMemOperand()->getFlags(), LN->getAAInfo()); 8065 8066 if (Idx == 0 && LN->isIndexed()) { 8067 assert(LN->getAddressingMode() == ISD::PRE_INC && 8068 "Unknown addressing mode on vector load"); 8069 Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(), 8070 LN->getAddressingMode()); 8071 } 8072 8073 Vals[Idx] = Load; 8074 LoadChains[Idx] = Load.getValue(1); 8075 8076 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 8077 DAG.getConstant(Stride, dl, 8078 BasePtr.getValueType())); 8079 } 8080 8081 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 8082 SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals); 8083 8084 if (LN->isIndexed()) { 8085 SDValue RetOps[] = { Value, Vals[0].getValue(1), TF }; 8086 return DAG.getMergeValues(RetOps, dl); 8087 } 8088 8089 SDValue RetOps[] = { Value, TF }; 8090 return DAG.getMergeValues(RetOps, dl); 8091 } 8092 8093 assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower"); 8094 assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported"); 8095 8096 // To lower v4i1 from a byte array, we load the byte elements of the 8097 // vector and then reuse the BUILD_VECTOR logic. 8098 8099 SDValue VectElmts[4], VectElmtChains[4]; 8100 for (unsigned i = 0; i < 4; ++i) { 8101 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); 8102 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); 8103 8104 VectElmts[i] = DAG.getExtLoad( 8105 ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx, 8106 LN->getPointerInfo().getWithOffset(i), MVT::i8, 8107 /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo()); 8108 VectElmtChains[i] = VectElmts[i].getValue(1); 8109 } 8110 8111 LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains); 8112 SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts); 8113 8114 SDValue RVals[] = { Value, LoadChain }; 8115 return DAG.getMergeValues(RVals, dl); 8116 } 8117 8118 /// Lowering for QPX v4i1 stores 8119 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, 8120 SelectionDAG &DAG) const { 8121 SDLoc dl(Op); 8122 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode()); 8123 SDValue StoreChain = SN->getChain(); 8124 SDValue BasePtr = SN->getBasePtr(); 8125 SDValue Value = SN->getValue(); 8126 8127 if (Value.getValueType() == MVT::v4f64 || 8128 Value.getValueType() == MVT::v4f32) { 8129 EVT MemVT = SN->getMemoryVT(); 8130 unsigned Alignment = SN->getAlignment(); 8131 8132 // If this store is properly aligned, then it is legal. 8133 if (Alignment >= MemVT.getStoreSize()) 8134 return Op; 8135 8136 EVT ScalarVT = Value.getValueType().getScalarType(), 8137 ScalarMemVT = MemVT.getScalarType(); 8138 unsigned Stride = ScalarMemVT.getStoreSize(); 8139 8140 SDValue Stores[4]; 8141 for (unsigned Idx = 0; Idx < 4; ++Idx) { 8142 SDValue Ex = DAG.getNode( 8143 ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, 8144 DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout()))); 8145 SDValue Store; 8146 if (ScalarVT != ScalarMemVT) 8147 Store = 8148 DAG.getTruncStore(StoreChain, dl, Ex, BasePtr, 8149 SN->getPointerInfo().getWithOffset(Idx * Stride), 8150 ScalarMemVT, MinAlign(Alignment, Idx * Stride), 8151 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 8152 else 8153 Store = DAG.getStore(StoreChain, dl, Ex, BasePtr, 8154 SN->getPointerInfo().getWithOffset(Idx * Stride), 8155 MinAlign(Alignment, Idx * Stride), 8156 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 8157 8158 if (Idx == 0 && SN->isIndexed()) { 8159 assert(SN->getAddressingMode() == ISD::PRE_INC && 8160 "Unknown addressing mode on vector store"); 8161 Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(), 8162 SN->getAddressingMode()); 8163 } 8164 8165 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 8166 DAG.getConstant(Stride, dl, 8167 BasePtr.getValueType())); 8168 Stores[Idx] = Store; 8169 } 8170 8171 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 8172 8173 if (SN->isIndexed()) { 8174 SDValue RetOps[] = { TF, Stores[0].getValue(1) }; 8175 return DAG.getMergeValues(RetOps, dl); 8176 } 8177 8178 return TF; 8179 } 8180 8181 assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported"); 8182 assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower"); 8183 8184 // The values are now known to be -1 (false) or 1 (true). To convert this 8185 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 8186 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 8187 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 8188 8189 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to 8190 // understand how to form the extending load. 8191 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 8192 8193 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 8194 8195 // Now convert to an integer and store. 8196 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 8197 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), 8198 Value); 8199 8200 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 8201 int FrameIdx = MFI.CreateStackObject(16, 16, false); 8202 MachinePointerInfo PtrInfo = 8203 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 8204 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 8205 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 8206 8207 SDValue Ops[] = {StoreChain, 8208 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), 8209 Value, FIdx}; 8210 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); 8211 8212 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, 8213 dl, VTs, Ops, MVT::v4i32, PtrInfo); 8214 8215 // Move data into the byte array. 8216 SDValue Loads[4], LoadChains[4]; 8217 for (unsigned i = 0; i < 4; ++i) { 8218 unsigned Offset = 4*i; 8219 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 8220 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 8221 8222 Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, 8223 PtrInfo.getWithOffset(Offset)); 8224 LoadChains[i] = Loads[i].getValue(1); 8225 } 8226 8227 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 8228 8229 SDValue Stores[4]; 8230 for (unsigned i = 0; i < 4; ++i) { 8231 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); 8232 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); 8233 8234 Stores[i] = DAG.getTruncStore( 8235 StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i), 8236 MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(), 8237 SN->getAAInfo()); 8238 } 8239 8240 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 8241 8242 return StoreChain; 8243 } 8244 8245 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 8246 SDLoc dl(Op); 8247 if (Op.getValueType() == MVT::v4i32) { 8248 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 8249 8250 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 8251 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 8252 8253 SDValue RHSSwap = // = vrlw RHS, 16 8254 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 8255 8256 // Shrinkify inputs to v8i16. 8257 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 8258 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 8259 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 8260 8261 // Low parts multiplied together, generating 32-bit results (we ignore the 8262 // top parts). 8263 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 8264 LHS, RHS, DAG, dl, MVT::v4i32); 8265 8266 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 8267 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 8268 // Shift the high parts up 16 bits. 8269 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 8270 Neg16, DAG, dl); 8271 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 8272 } else if (Op.getValueType() == MVT::v8i16) { 8273 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 8274 8275 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 8276 8277 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 8278 LHS, RHS, Zero, DAG, dl); 8279 } else if (Op.getValueType() == MVT::v16i8) { 8280 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 8281 bool isLittleEndian = Subtarget.isLittleEndian(); 8282 8283 // Multiply the even 8-bit parts, producing 16-bit sums. 8284 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 8285 LHS, RHS, DAG, dl, MVT::v8i16); 8286 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 8287 8288 // Multiply the odd 8-bit parts, producing 16-bit sums. 8289 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 8290 LHS, RHS, DAG, dl, MVT::v8i16); 8291 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 8292 8293 // Merge the results together. Because vmuleub and vmuloub are 8294 // instructions with a big-endian bias, we must reverse the 8295 // element numbering and reverse the meaning of "odd" and "even" 8296 // when generating little endian code. 8297 int Ops[16]; 8298 for (unsigned i = 0; i != 8; ++i) { 8299 if (isLittleEndian) { 8300 Ops[i*2 ] = 2*i; 8301 Ops[i*2+1] = 2*i+16; 8302 } else { 8303 Ops[i*2 ] = 2*i+1; 8304 Ops[i*2+1] = 2*i+1+16; 8305 } 8306 } 8307 if (isLittleEndian) 8308 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); 8309 else 8310 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 8311 } else { 8312 llvm_unreachable("Unknown mul to lower!"); 8313 } 8314 } 8315 8316 /// LowerOperation - Provide custom lowering hooks for some operations. 8317 /// 8318 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 8319 switch (Op.getOpcode()) { 8320 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 8321 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 8322 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 8323 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 8324 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 8325 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 8326 case ISD::SETCC: return LowerSETCC(Op, DAG); 8327 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 8328 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 8329 case ISD::VASTART: 8330 return LowerVASTART(Op, DAG); 8331 8332 case ISD::VAARG: 8333 return LowerVAARG(Op, DAG); 8334 8335 case ISD::VACOPY: 8336 return LowerVACOPY(Op, DAG); 8337 8338 case ISD::STACKRESTORE: 8339 return LowerSTACKRESTORE(Op, DAG); 8340 8341 case ISD::DYNAMIC_STACKALLOC: 8342 return LowerDYNAMIC_STACKALLOC(Op, DAG); 8343 8344 case ISD::GET_DYNAMIC_AREA_OFFSET: 8345 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); 8346 8347 case ISD::EH_DWARF_CFA: 8348 return LowerEH_DWARF_CFA(Op, DAG); 8349 8350 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); 8351 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); 8352 8353 case ISD::LOAD: return LowerLOAD(Op, DAG); 8354 case ISD::STORE: return LowerSTORE(Op, DAG); 8355 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); 8356 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 8357 case ISD::FP_TO_UINT: 8358 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, 8359 SDLoc(Op)); 8360 case ISD::UINT_TO_FP: 8361 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 8362 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 8363 8364 // Lower 64-bit shifts. 8365 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 8366 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 8367 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 8368 8369 // Vector-related lowering. 8370 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 8371 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 8372 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 8373 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 8374 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 8375 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 8376 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 8377 case ISD::MUL: return LowerMUL(Op, DAG); 8378 8379 // For counter-based loop handling. 8380 case ISD::INTRINSIC_W_CHAIN: return SDValue(); 8381 8382 // Frame & Return address. 8383 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 8384 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 8385 } 8386 } 8387 8388 void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 8389 SmallVectorImpl<SDValue>&Results, 8390 SelectionDAG &DAG) const { 8391 SDLoc dl(N); 8392 switch (N->getOpcode()) { 8393 default: 8394 llvm_unreachable("Do not know how to custom type legalize this operation!"); 8395 case ISD::READCYCLECOUNTER: { 8396 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 8397 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); 8398 8399 Results.push_back(RTB); 8400 Results.push_back(RTB.getValue(1)); 8401 Results.push_back(RTB.getValue(2)); 8402 break; 8403 } 8404 case ISD::INTRINSIC_W_CHAIN: { 8405 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 8406 Intrinsic::ppc_is_decremented_ctr_nonzero) 8407 break; 8408 8409 assert(N->getValueType(0) == MVT::i1 && 8410 "Unexpected result type for CTR decrement intrinsic"); 8411 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), 8412 N->getValueType(0)); 8413 SDVTList VTs = DAG.getVTList(SVT, MVT::Other); 8414 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), 8415 N->getOperand(1)); 8416 8417 Results.push_back(NewInt); 8418 Results.push_back(NewInt.getValue(1)); 8419 break; 8420 } 8421 case ISD::VAARG: { 8422 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64()) 8423 return; 8424 8425 EVT VT = N->getValueType(0); 8426 8427 if (VT == MVT::i64) { 8428 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG); 8429 8430 Results.push_back(NewNode); 8431 Results.push_back(NewNode.getValue(1)); 8432 } 8433 return; 8434 } 8435 case ISD::FP_ROUND_INREG: { 8436 assert(N->getValueType(0) == MVT::ppcf128); 8437 assert(N->getOperand(0).getValueType() == MVT::ppcf128); 8438 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 8439 MVT::f64, N->getOperand(0), 8440 DAG.getIntPtrConstant(0, dl)); 8441 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 8442 MVT::f64, N->getOperand(0), 8443 DAG.getIntPtrConstant(1, dl)); 8444 8445 // Add the two halves of the long double in round-to-zero mode. 8446 SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); 8447 8448 // We know the low half is about to be thrown away, so just use something 8449 // convenient. 8450 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, 8451 FPreg, FPreg)); 8452 return; 8453 } 8454 case ISD::FP_TO_SINT: 8455 case ISD::FP_TO_UINT: 8456 // LowerFP_TO_INT() can only handle f32 and f64. 8457 if (N->getOperand(0).getValueType() == MVT::ppcf128) 8458 return; 8459 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 8460 return; 8461 } 8462 } 8463 8464 //===----------------------------------------------------------------------===// 8465 // Other Lowering Code 8466 //===----------------------------------------------------------------------===// 8467 8468 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { 8469 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 8470 Function *Func = Intrinsic::getDeclaration(M, Id); 8471 return Builder.CreateCall(Func, {}); 8472 } 8473 8474 // The mappings for emitLeading/TrailingFence is taken from 8475 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html 8476 Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 8477 AtomicOrdering Ord, bool IsStore, 8478 bool IsLoad) const { 8479 if (Ord == AtomicOrdering::SequentiallyConsistent) 8480 return callIntrinsic(Builder, Intrinsic::ppc_sync); 8481 if (isReleaseOrStronger(Ord)) 8482 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 8483 return nullptr; 8484 } 8485 8486 Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 8487 AtomicOrdering Ord, bool IsStore, 8488 bool IsLoad) const { 8489 if (IsLoad && isAcquireOrStronger(Ord)) 8490 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 8491 // FIXME: this is too conservative, a dependent branch + isync is enough. 8492 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and 8493 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html 8494 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. 8495 return nullptr; 8496 } 8497 8498 MachineBasicBlock * 8499 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, 8500 unsigned AtomicSize, 8501 unsigned BinOpcode, 8502 unsigned CmpOpcode, 8503 unsigned CmpPred) const { 8504 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 8505 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8506 8507 auto LoadMnemonic = PPC::LDARX; 8508 auto StoreMnemonic = PPC::STDCX; 8509 switch (AtomicSize) { 8510 default: 8511 llvm_unreachable("Unexpected size of atomic entity"); 8512 case 1: 8513 LoadMnemonic = PPC::LBARX; 8514 StoreMnemonic = PPC::STBCX; 8515 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); 8516 break; 8517 case 2: 8518 LoadMnemonic = PPC::LHARX; 8519 StoreMnemonic = PPC::STHCX; 8520 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); 8521 break; 8522 case 4: 8523 LoadMnemonic = PPC::LWARX; 8524 StoreMnemonic = PPC::STWCX; 8525 break; 8526 case 8: 8527 LoadMnemonic = PPC::LDARX; 8528 StoreMnemonic = PPC::STDCX; 8529 break; 8530 } 8531 8532 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 8533 MachineFunction *F = BB->getParent(); 8534 MachineFunction::iterator It = ++BB->getIterator(); 8535 8536 unsigned dest = MI.getOperand(0).getReg(); 8537 unsigned ptrA = MI.getOperand(1).getReg(); 8538 unsigned ptrB = MI.getOperand(2).getReg(); 8539 unsigned incr = MI.getOperand(3).getReg(); 8540 DebugLoc dl = MI.getDebugLoc(); 8541 8542 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 8543 MachineBasicBlock *loop2MBB = 8544 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; 8545 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 8546 F->insert(It, loopMBB); 8547 if (CmpOpcode) 8548 F->insert(It, loop2MBB); 8549 F->insert(It, exitMBB); 8550 exitMBB->splice(exitMBB->begin(), BB, 8551 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8552 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 8553 8554 MachineRegisterInfo &RegInfo = F->getRegInfo(); 8555 unsigned TmpReg = (!BinOpcode) ? incr : 8556 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass 8557 : &PPC::GPRCRegClass); 8558 8559 // thisMBB: 8560 // ... 8561 // fallthrough --> loopMBB 8562 BB->addSuccessor(loopMBB); 8563 8564 // loopMBB: 8565 // l[wd]arx dest, ptr 8566 // add r0, dest, incr 8567 // st[wd]cx. r0, ptr 8568 // bne- loopMBB 8569 // fallthrough --> exitMBB 8570 8571 // For max/min... 8572 // loopMBB: 8573 // l[wd]arx dest, ptr 8574 // cmpl?[wd] incr, dest 8575 // bgt exitMBB 8576 // loop2MBB: 8577 // st[wd]cx. dest, ptr 8578 // bne- loopMBB 8579 // fallthrough --> exitMBB 8580 8581 BB = loopMBB; 8582 BuildMI(BB, dl, TII->get(LoadMnemonic), dest) 8583 .addReg(ptrA).addReg(ptrB); 8584 if (BinOpcode) 8585 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 8586 if (CmpOpcode) { 8587 // Signed comparisons of byte or halfword values must be sign-extended. 8588 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) { 8589 unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 8590 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH), 8591 ExtReg).addReg(dest); 8592 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) 8593 .addReg(incr).addReg(ExtReg); 8594 } else 8595 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) 8596 .addReg(incr).addReg(dest); 8597 8598 BuildMI(BB, dl, TII->get(PPC::BCC)) 8599 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); 8600 BB->addSuccessor(loop2MBB); 8601 BB->addSuccessor(exitMBB); 8602 BB = loop2MBB; 8603 } 8604 BuildMI(BB, dl, TII->get(StoreMnemonic)) 8605 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 8606 BuildMI(BB, dl, TII->get(PPC::BCC)) 8607 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 8608 BB->addSuccessor(loopMBB); 8609 BB->addSuccessor(exitMBB); 8610 8611 // exitMBB: 8612 // ... 8613 BB = exitMBB; 8614 return BB; 8615 } 8616 8617 MachineBasicBlock * 8618 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, 8619 MachineBasicBlock *BB, 8620 bool is8bit, // operation 8621 unsigned BinOpcode, 8622 unsigned CmpOpcode, 8623 unsigned CmpPred) const { 8624 // If we support part-word atomic mnemonics, just use them 8625 if (Subtarget.hasPartwordAtomics()) 8626 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, 8627 CmpOpcode, CmpPred); 8628 8629 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 8630 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8631 // In 64 bit mode we have to use 64 bits for addresses, even though the 8632 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 8633 // registers without caring whether they're 32 or 64, but here we're 8634 // doing actual arithmetic on the addresses. 8635 bool is64bit = Subtarget.isPPC64(); 8636 bool isLittleEndian = Subtarget.isLittleEndian(); 8637 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 8638 8639 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 8640 MachineFunction *F = BB->getParent(); 8641 MachineFunction::iterator It = ++BB->getIterator(); 8642 8643 unsigned dest = MI.getOperand(0).getReg(); 8644 unsigned ptrA = MI.getOperand(1).getReg(); 8645 unsigned ptrB = MI.getOperand(2).getReg(); 8646 unsigned incr = MI.getOperand(3).getReg(); 8647 DebugLoc dl = MI.getDebugLoc(); 8648 8649 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 8650 MachineBasicBlock *loop2MBB = 8651 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; 8652 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 8653 F->insert(It, loopMBB); 8654 if (CmpOpcode) 8655 F->insert(It, loop2MBB); 8656 F->insert(It, exitMBB); 8657 exitMBB->splice(exitMBB->begin(), BB, 8658 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8659 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 8660 8661 MachineRegisterInfo &RegInfo = F->getRegInfo(); 8662 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 8663 : &PPC::GPRCRegClass; 8664 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 8665 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 8666 unsigned ShiftReg = 8667 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC); 8668 unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); 8669 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 8670 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 8671 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 8672 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 8673 unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); 8674 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 8675 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 8676 unsigned Ptr1Reg; 8677 unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); 8678 8679 // thisMBB: 8680 // ... 8681 // fallthrough --> loopMBB 8682 BB->addSuccessor(loopMBB); 8683 8684 // The 4-byte load must be aligned, while a char or short may be 8685 // anywhere in the word. Hence all this nasty bookkeeping code. 8686 // add ptr1, ptrA, ptrB [copy if ptrA==0] 8687 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 8688 // xori shift, shift1, 24 [16] 8689 // rlwinm ptr, ptr1, 0, 0, 29 8690 // slw incr2, incr, shift 8691 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 8692 // slw mask, mask2, shift 8693 // loopMBB: 8694 // lwarx tmpDest, ptr 8695 // add tmp, tmpDest, incr2 8696 // andc tmp2, tmpDest, mask 8697 // and tmp3, tmp, mask 8698 // or tmp4, tmp3, tmp2 8699 // stwcx. tmp4, ptr 8700 // bne- loopMBB 8701 // fallthrough --> exitMBB 8702 // srw dest, tmpDest, shift 8703 if (ptrA != ZeroReg) { 8704 Ptr1Reg = RegInfo.createVirtualRegister(RC); 8705 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 8706 .addReg(ptrA).addReg(ptrB); 8707 } else { 8708 Ptr1Reg = ptrB; 8709 } 8710 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 8711 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 8712 if (!isLittleEndian) 8713 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 8714 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 8715 if (is64bit) 8716 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 8717 .addReg(Ptr1Reg).addImm(0).addImm(61); 8718 else 8719 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 8720 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 8721 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) 8722 .addReg(incr).addReg(ShiftReg); 8723 if (is8bit) 8724 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 8725 else { 8726 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 8727 BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); 8728 } 8729 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 8730 .addReg(Mask2Reg).addReg(ShiftReg); 8731 8732 BB = loopMBB; 8733 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 8734 .addReg(ZeroReg).addReg(PtrReg); 8735 if (BinOpcode) 8736 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 8737 .addReg(Incr2Reg).addReg(TmpDestReg); 8738 BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) 8739 .addReg(TmpDestReg).addReg(MaskReg); 8740 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) 8741 .addReg(TmpReg).addReg(MaskReg); 8742 if (CmpOpcode) { 8743 // For unsigned comparisons, we can directly compare the shifted values. 8744 // For signed comparisons we shift and sign extend. 8745 unsigned SReg = RegInfo.createVirtualRegister(RC); 8746 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg) 8747 .addReg(TmpDestReg).addReg(MaskReg); 8748 unsigned ValueReg = SReg; 8749 unsigned CmpReg = Incr2Reg; 8750 if (CmpOpcode == PPC::CMPW) { 8751 ValueReg = RegInfo.createVirtualRegister(RC); 8752 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) 8753 .addReg(SReg).addReg(ShiftReg); 8754 unsigned ValueSReg = RegInfo.createVirtualRegister(RC); 8755 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg) 8756 .addReg(ValueReg); 8757 ValueReg = ValueSReg; 8758 CmpReg = incr; 8759 } 8760 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) 8761 .addReg(CmpReg).addReg(ValueReg); 8762 BuildMI(BB, dl, TII->get(PPC::BCC)) 8763 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); 8764 BB->addSuccessor(loop2MBB); 8765 BB->addSuccessor(exitMBB); 8766 BB = loop2MBB; 8767 } 8768 BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) 8769 .addReg(Tmp3Reg).addReg(Tmp2Reg); 8770 BuildMI(BB, dl, TII->get(PPC::STWCX)) 8771 .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); 8772 BuildMI(BB, dl, TII->get(PPC::BCC)) 8773 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 8774 BB->addSuccessor(loopMBB); 8775 BB->addSuccessor(exitMBB); 8776 8777 // exitMBB: 8778 // ... 8779 BB = exitMBB; 8780 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) 8781 .addReg(ShiftReg); 8782 return BB; 8783 } 8784 8785 llvm::MachineBasicBlock * 8786 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, 8787 MachineBasicBlock *MBB) const { 8788 DebugLoc DL = MI.getDebugLoc(); 8789 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8790 8791 MachineFunction *MF = MBB->getParent(); 8792 MachineRegisterInfo &MRI = MF->getRegInfo(); 8793 8794 const BasicBlock *BB = MBB->getBasicBlock(); 8795 MachineFunction::iterator I = ++MBB->getIterator(); 8796 8797 // Memory Reference 8798 MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); 8799 MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); 8800 8801 unsigned DstReg = MI.getOperand(0).getReg(); 8802 const TargetRegisterClass *RC = MRI.getRegClass(DstReg); 8803 assert(RC->hasType(MVT::i32) && "Invalid destination!"); 8804 unsigned mainDstReg = MRI.createVirtualRegister(RC); 8805 unsigned restoreDstReg = MRI.createVirtualRegister(RC); 8806 8807 MVT PVT = getPointerTy(MF->getDataLayout()); 8808 assert((PVT == MVT::i64 || PVT == MVT::i32) && 8809 "Invalid Pointer Size!"); 8810 // For v = setjmp(buf), we generate 8811 // 8812 // thisMBB: 8813 // SjLjSetup mainMBB 8814 // bl mainMBB 8815 // v_restore = 1 8816 // b sinkMBB 8817 // 8818 // mainMBB: 8819 // buf[LabelOffset] = LR 8820 // v_main = 0 8821 // 8822 // sinkMBB: 8823 // v = phi(main, restore) 8824 // 8825 8826 MachineBasicBlock *thisMBB = MBB; 8827 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); 8828 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); 8829 MF->insert(I, mainMBB); 8830 MF->insert(I, sinkMBB); 8831 8832 MachineInstrBuilder MIB; 8833 8834 // Transfer the remainder of BB and its successor edges to sinkMBB. 8835 sinkMBB->splice(sinkMBB->begin(), MBB, 8836 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 8837 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 8838 8839 // Note that the structure of the jmp_buf used here is not compatible 8840 // with that used by libc, and is not designed to be. Specifically, it 8841 // stores only those 'reserved' registers that LLVM does not otherwise 8842 // understand how to spill. Also, by convention, by the time this 8843 // intrinsic is called, Clang has already stored the frame address in the 8844 // first slot of the buffer and stack address in the third. Following the 8845 // X86 target code, we'll store the jump address in the second slot. We also 8846 // need to save the TOC pointer (R2) to handle jumps between shared 8847 // libraries, and that will be stored in the fourth slot. The thread 8848 // identifier (R13) is not affected. 8849 8850 // thisMBB: 8851 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 8852 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 8853 const int64_t BPOffset = 4 * PVT.getStoreSize(); 8854 8855 // Prepare IP either in reg. 8856 const TargetRegisterClass *PtrRC = getRegClassFor(PVT); 8857 unsigned LabelReg = MRI.createVirtualRegister(PtrRC); 8858 unsigned BufReg = MI.getOperand(1).getReg(); 8859 8860 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { 8861 setUsesTOCBasePtr(*MBB->getParent()); 8862 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) 8863 .addReg(PPC::X2) 8864 .addImm(TOCOffset) 8865 .addReg(BufReg); 8866 MIB.setMemRefs(MMOBegin, MMOEnd); 8867 } 8868 8869 // Naked functions never have a base pointer, and so we use r1. For all 8870 // other functions, this decision must be delayed until during PEI. 8871 unsigned BaseReg; 8872 if (MF->getFunction()->hasFnAttribute(Attribute::Naked)) 8873 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; 8874 else 8875 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; 8876 8877 MIB = BuildMI(*thisMBB, MI, DL, 8878 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) 8879 .addReg(BaseReg) 8880 .addImm(BPOffset) 8881 .addReg(BufReg); 8882 MIB.setMemRefs(MMOBegin, MMOEnd); 8883 8884 // Setup 8885 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); 8886 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 8887 MIB.addRegMask(TRI->getNoPreservedMask()); 8888 8889 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); 8890 8891 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) 8892 .addMBB(mainMBB); 8893 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); 8894 8895 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero()); 8896 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne()); 8897 8898 // mainMBB: 8899 // mainDstReg = 0 8900 MIB = 8901 BuildMI(mainMBB, DL, 8902 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); 8903 8904 // Store IP 8905 if (Subtarget.isPPC64()) { 8906 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) 8907 .addReg(LabelReg) 8908 .addImm(LabelOffset) 8909 .addReg(BufReg); 8910 } else { 8911 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) 8912 .addReg(LabelReg) 8913 .addImm(LabelOffset) 8914 .addReg(BufReg); 8915 } 8916 8917 MIB.setMemRefs(MMOBegin, MMOEnd); 8918 8919 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); 8920 mainMBB->addSuccessor(sinkMBB); 8921 8922 // sinkMBB: 8923 BuildMI(*sinkMBB, sinkMBB->begin(), DL, 8924 TII->get(PPC::PHI), DstReg) 8925 .addReg(mainDstReg).addMBB(mainMBB) 8926 .addReg(restoreDstReg).addMBB(thisMBB); 8927 8928 MI.eraseFromParent(); 8929 return sinkMBB; 8930 } 8931 8932 MachineBasicBlock * 8933 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, 8934 MachineBasicBlock *MBB) const { 8935 DebugLoc DL = MI.getDebugLoc(); 8936 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8937 8938 MachineFunction *MF = MBB->getParent(); 8939 MachineRegisterInfo &MRI = MF->getRegInfo(); 8940 8941 // Memory Reference 8942 MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); 8943 MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); 8944 8945 MVT PVT = getPointerTy(MF->getDataLayout()); 8946 assert((PVT == MVT::i64 || PVT == MVT::i32) && 8947 "Invalid Pointer Size!"); 8948 8949 const TargetRegisterClass *RC = 8950 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 8951 unsigned Tmp = MRI.createVirtualRegister(RC); 8952 // Since FP is only updated here but NOT referenced, it's treated as GPR. 8953 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; 8954 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; 8955 unsigned BP = 8956 (PVT == MVT::i64) 8957 ? PPC::X30 8958 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29 8959 : PPC::R30); 8960 8961 MachineInstrBuilder MIB; 8962 8963 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 8964 const int64_t SPOffset = 2 * PVT.getStoreSize(); 8965 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 8966 const int64_t BPOffset = 4 * PVT.getStoreSize(); 8967 8968 unsigned BufReg = MI.getOperand(0).getReg(); 8969 8970 // Reload FP (the jumped-to function may not have had a 8971 // frame pointer, and if so, then its r31 will be restored 8972 // as necessary). 8973 if (PVT == MVT::i64) { 8974 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) 8975 .addImm(0) 8976 .addReg(BufReg); 8977 } else { 8978 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) 8979 .addImm(0) 8980 .addReg(BufReg); 8981 } 8982 MIB.setMemRefs(MMOBegin, MMOEnd); 8983 8984 // Reload IP 8985 if (PVT == MVT::i64) { 8986 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) 8987 .addImm(LabelOffset) 8988 .addReg(BufReg); 8989 } else { 8990 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) 8991 .addImm(LabelOffset) 8992 .addReg(BufReg); 8993 } 8994 MIB.setMemRefs(MMOBegin, MMOEnd); 8995 8996 // Reload SP 8997 if (PVT == MVT::i64) { 8998 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) 8999 .addImm(SPOffset) 9000 .addReg(BufReg); 9001 } else { 9002 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) 9003 .addImm(SPOffset) 9004 .addReg(BufReg); 9005 } 9006 MIB.setMemRefs(MMOBegin, MMOEnd); 9007 9008 // Reload BP 9009 if (PVT == MVT::i64) { 9010 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) 9011 .addImm(BPOffset) 9012 .addReg(BufReg); 9013 } else { 9014 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) 9015 .addImm(BPOffset) 9016 .addReg(BufReg); 9017 } 9018 MIB.setMemRefs(MMOBegin, MMOEnd); 9019 9020 // Reload TOC 9021 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { 9022 setUsesTOCBasePtr(*MBB->getParent()); 9023 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) 9024 .addImm(TOCOffset) 9025 .addReg(BufReg); 9026 9027 MIB.setMemRefs(MMOBegin, MMOEnd); 9028 } 9029 9030 // Jump 9031 BuildMI(*MBB, MI, DL, 9032 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); 9033 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); 9034 9035 MI.eraseFromParent(); 9036 return MBB; 9037 } 9038 9039 MachineBasicBlock * 9040 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 9041 MachineBasicBlock *BB) const { 9042 if (MI.getOpcode() == TargetOpcode::STACKMAP || 9043 MI.getOpcode() == TargetOpcode::PATCHPOINT) { 9044 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() && 9045 MI.getOpcode() == TargetOpcode::PATCHPOINT) { 9046 // Call lowering should have added an r2 operand to indicate a dependence 9047 // on the TOC base pointer value. It can't however, because there is no 9048 // way to mark the dependence as implicit there, and so the stackmap code 9049 // will confuse it with a regular operand. Instead, add the dependence 9050 // here. 9051 setUsesTOCBasePtr(*BB->getParent()); 9052 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true)); 9053 } 9054 9055 return emitPatchPoint(MI, BB); 9056 } 9057 9058 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 || 9059 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) { 9060 return emitEHSjLjSetJmp(MI, BB); 9061 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 || 9062 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) { 9063 return emitEHSjLjLongJmp(MI, BB); 9064 } 9065 9066 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 9067 9068 // To "insert" these instructions we actually have to insert their 9069 // control-flow patterns. 9070 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 9071 MachineFunction::iterator It = ++BB->getIterator(); 9072 9073 MachineFunction *F = BB->getParent(); 9074 9075 if (Subtarget.hasISEL() && 9076 (MI.getOpcode() == PPC::SELECT_CC_I4 || 9077 MI.getOpcode() == PPC::SELECT_CC_I8 || 9078 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) { 9079 SmallVector<MachineOperand, 2> Cond; 9080 if (MI.getOpcode() == PPC::SELECT_CC_I4 || 9081 MI.getOpcode() == PPC::SELECT_CC_I8) 9082 Cond.push_back(MI.getOperand(4)); 9083 else 9084 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); 9085 Cond.push_back(MI.getOperand(1)); 9086 9087 DebugLoc dl = MI.getDebugLoc(); 9088 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond, 9089 MI.getOperand(2).getReg(), MI.getOperand(3).getReg()); 9090 } else if (MI.getOpcode() == PPC::SELECT_CC_I4 || 9091 MI.getOpcode() == PPC::SELECT_CC_I8 || 9092 MI.getOpcode() == PPC::SELECT_CC_F4 || 9093 MI.getOpcode() == PPC::SELECT_CC_F8 || 9094 MI.getOpcode() == PPC::SELECT_CC_QFRC || 9095 MI.getOpcode() == PPC::SELECT_CC_QSRC || 9096 MI.getOpcode() == PPC::SELECT_CC_QBRC || 9097 MI.getOpcode() == PPC::SELECT_CC_VRRC || 9098 MI.getOpcode() == PPC::SELECT_CC_VSFRC || 9099 MI.getOpcode() == PPC::SELECT_CC_VSSRC || 9100 MI.getOpcode() == PPC::SELECT_CC_VSRC || 9101 MI.getOpcode() == PPC::SELECT_I4 || 9102 MI.getOpcode() == PPC::SELECT_I8 || 9103 MI.getOpcode() == PPC::SELECT_F4 || 9104 MI.getOpcode() == PPC::SELECT_F8 || 9105 MI.getOpcode() == PPC::SELECT_QFRC || 9106 MI.getOpcode() == PPC::SELECT_QSRC || 9107 MI.getOpcode() == PPC::SELECT_QBRC || 9108 MI.getOpcode() == PPC::SELECT_VRRC || 9109 MI.getOpcode() == PPC::SELECT_VSFRC || 9110 MI.getOpcode() == PPC::SELECT_VSSRC || 9111 MI.getOpcode() == PPC::SELECT_VSRC) { 9112 // The incoming instruction knows the destination vreg to set, the 9113 // condition code register to branch on, the true/false values to 9114 // select between, and a branch opcode to use. 9115 9116 // thisMBB: 9117 // ... 9118 // TrueVal = ... 9119 // cmpTY ccX, r1, r2 9120 // bCC copy1MBB 9121 // fallthrough --> copy0MBB 9122 MachineBasicBlock *thisMBB = BB; 9123 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 9124 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 9125 DebugLoc dl = MI.getDebugLoc(); 9126 F->insert(It, copy0MBB); 9127 F->insert(It, sinkMBB); 9128 9129 // Transfer the remainder of BB and its successor edges to sinkMBB. 9130 sinkMBB->splice(sinkMBB->begin(), BB, 9131 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9132 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 9133 9134 // Next, add the true and fallthrough blocks as its successors. 9135 BB->addSuccessor(copy0MBB); 9136 BB->addSuccessor(sinkMBB); 9137 9138 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 || 9139 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || 9140 MI.getOpcode() == PPC::SELECT_QFRC || 9141 MI.getOpcode() == PPC::SELECT_QSRC || 9142 MI.getOpcode() == PPC::SELECT_QBRC || 9143 MI.getOpcode() == PPC::SELECT_VRRC || 9144 MI.getOpcode() == PPC::SELECT_VSFRC || 9145 MI.getOpcode() == PPC::SELECT_VSSRC || 9146 MI.getOpcode() == PPC::SELECT_VSRC) { 9147 BuildMI(BB, dl, TII->get(PPC::BC)) 9148 .addReg(MI.getOperand(1).getReg()) 9149 .addMBB(sinkMBB); 9150 } else { 9151 unsigned SelectPred = MI.getOperand(4).getImm(); 9152 BuildMI(BB, dl, TII->get(PPC::BCC)) 9153 .addImm(SelectPred) 9154 .addReg(MI.getOperand(1).getReg()) 9155 .addMBB(sinkMBB); 9156 } 9157 9158 // copy0MBB: 9159 // %FalseValue = ... 9160 // # fallthrough to sinkMBB 9161 BB = copy0MBB; 9162 9163 // Update machine-CFG edges 9164 BB->addSuccessor(sinkMBB); 9165 9166 // sinkMBB: 9167 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 9168 // ... 9169 BB = sinkMBB; 9170 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg()) 9171 .addReg(MI.getOperand(3).getReg()) 9172 .addMBB(copy0MBB) 9173 .addReg(MI.getOperand(2).getReg()) 9174 .addMBB(thisMBB); 9175 } else if (MI.getOpcode() == PPC::ReadTB) { 9176 // To read the 64-bit time-base register on a 32-bit target, we read the 9177 // two halves. Should the counter have wrapped while it was being read, we 9178 // need to try again. 9179 // ... 9180 // readLoop: 9181 // mfspr Rx,TBU # load from TBU 9182 // mfspr Ry,TB # load from TB 9183 // mfspr Rz,TBU # load from TBU 9184 // cmpw crX,Rx,Rz # check if 'old'='new' 9185 // bne readLoop # branch if they're not equal 9186 // ... 9187 9188 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); 9189 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 9190 DebugLoc dl = MI.getDebugLoc(); 9191 F->insert(It, readMBB); 9192 F->insert(It, sinkMBB); 9193 9194 // Transfer the remainder of BB and its successor edges to sinkMBB. 9195 sinkMBB->splice(sinkMBB->begin(), BB, 9196 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9197 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 9198 9199 BB->addSuccessor(readMBB); 9200 BB = readMBB; 9201 9202 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9203 unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 9204 unsigned LoReg = MI.getOperand(0).getReg(); 9205 unsigned HiReg = MI.getOperand(1).getReg(); 9206 9207 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); 9208 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); 9209 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); 9210 9211 unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 9212 9213 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) 9214 .addReg(HiReg).addReg(ReadAgainReg); 9215 BuildMI(BB, dl, TII->get(PPC::BCC)) 9216 .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB); 9217 9218 BB->addSuccessor(readMBB); 9219 BB->addSuccessor(sinkMBB); 9220 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 9221 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 9222 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 9223 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 9224 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 9225 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); 9226 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 9227 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); 9228 9229 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 9230 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 9231 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 9232 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 9233 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 9234 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); 9235 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 9236 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); 9237 9238 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 9239 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 9240 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 9241 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 9242 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 9243 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); 9244 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 9245 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); 9246 9247 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 9248 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 9249 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 9250 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 9251 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 9252 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); 9253 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 9254 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); 9255 9256 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 9257 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); 9258 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 9259 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); 9260 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 9261 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); 9262 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 9263 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); 9264 9265 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 9266 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 9267 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 9268 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 9269 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 9270 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); 9271 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 9272 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); 9273 9274 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8) 9275 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE); 9276 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16) 9277 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE); 9278 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32) 9279 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE); 9280 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64) 9281 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE); 9282 9283 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8) 9284 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE); 9285 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16) 9286 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE); 9287 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32) 9288 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE); 9289 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64) 9290 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE); 9291 9292 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8) 9293 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE); 9294 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16) 9295 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE); 9296 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32) 9297 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE); 9298 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64) 9299 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE); 9300 9301 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8) 9302 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE); 9303 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16) 9304 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE); 9305 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32) 9306 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE); 9307 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64) 9308 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE); 9309 9310 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8) 9311 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 9312 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16) 9313 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 9314 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32) 9315 BB = EmitAtomicBinary(MI, BB, 4, 0); 9316 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64) 9317 BB = EmitAtomicBinary(MI, BB, 8, 0); 9318 9319 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 9320 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || 9321 (Subtarget.hasPartwordAtomics() && 9322 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || 9323 (Subtarget.hasPartwordAtomics() && 9324 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { 9325 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 9326 9327 auto LoadMnemonic = PPC::LDARX; 9328 auto StoreMnemonic = PPC::STDCX; 9329 switch (MI.getOpcode()) { 9330 default: 9331 llvm_unreachable("Compare and swap of unknown size"); 9332 case PPC::ATOMIC_CMP_SWAP_I8: 9333 LoadMnemonic = PPC::LBARX; 9334 StoreMnemonic = PPC::STBCX; 9335 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); 9336 break; 9337 case PPC::ATOMIC_CMP_SWAP_I16: 9338 LoadMnemonic = PPC::LHARX; 9339 StoreMnemonic = PPC::STHCX; 9340 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); 9341 break; 9342 case PPC::ATOMIC_CMP_SWAP_I32: 9343 LoadMnemonic = PPC::LWARX; 9344 StoreMnemonic = PPC::STWCX; 9345 break; 9346 case PPC::ATOMIC_CMP_SWAP_I64: 9347 LoadMnemonic = PPC::LDARX; 9348 StoreMnemonic = PPC::STDCX; 9349 break; 9350 } 9351 unsigned dest = MI.getOperand(0).getReg(); 9352 unsigned ptrA = MI.getOperand(1).getReg(); 9353 unsigned ptrB = MI.getOperand(2).getReg(); 9354 unsigned oldval = MI.getOperand(3).getReg(); 9355 unsigned newval = MI.getOperand(4).getReg(); 9356 DebugLoc dl = MI.getDebugLoc(); 9357 9358 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 9359 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 9360 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 9361 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 9362 F->insert(It, loop1MBB); 9363 F->insert(It, loop2MBB); 9364 F->insert(It, midMBB); 9365 F->insert(It, exitMBB); 9366 exitMBB->splice(exitMBB->begin(), BB, 9367 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9368 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 9369 9370 // thisMBB: 9371 // ... 9372 // fallthrough --> loopMBB 9373 BB->addSuccessor(loop1MBB); 9374 9375 // loop1MBB: 9376 // l[bhwd]arx dest, ptr 9377 // cmp[wd] dest, oldval 9378 // bne- midMBB 9379 // loop2MBB: 9380 // st[bhwd]cx. newval, ptr 9381 // bne- loopMBB 9382 // b exitBB 9383 // midMBB: 9384 // st[bhwd]cx. dest, ptr 9385 // exitBB: 9386 BB = loop1MBB; 9387 BuildMI(BB, dl, TII->get(LoadMnemonic), dest) 9388 .addReg(ptrA).addReg(ptrB); 9389 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 9390 .addReg(oldval).addReg(dest); 9391 BuildMI(BB, dl, TII->get(PPC::BCC)) 9392 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 9393 BB->addSuccessor(loop2MBB); 9394 BB->addSuccessor(midMBB); 9395 9396 BB = loop2MBB; 9397 BuildMI(BB, dl, TII->get(StoreMnemonic)) 9398 .addReg(newval).addReg(ptrA).addReg(ptrB); 9399 BuildMI(BB, dl, TII->get(PPC::BCC)) 9400 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 9401 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 9402 BB->addSuccessor(loop1MBB); 9403 BB->addSuccessor(exitMBB); 9404 9405 BB = midMBB; 9406 BuildMI(BB, dl, TII->get(StoreMnemonic)) 9407 .addReg(dest).addReg(ptrA).addReg(ptrB); 9408 BB->addSuccessor(exitMBB); 9409 9410 // exitMBB: 9411 // ... 9412 BB = exitMBB; 9413 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 9414 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 9415 // We must use 64-bit registers for addresses when targeting 64-bit, 9416 // since we're actually doing arithmetic on them. Other registers 9417 // can be 32-bit. 9418 bool is64bit = Subtarget.isPPC64(); 9419 bool isLittleEndian = Subtarget.isLittleEndian(); 9420 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 9421 9422 unsigned dest = MI.getOperand(0).getReg(); 9423 unsigned ptrA = MI.getOperand(1).getReg(); 9424 unsigned ptrB = MI.getOperand(2).getReg(); 9425 unsigned oldval = MI.getOperand(3).getReg(); 9426 unsigned newval = MI.getOperand(4).getReg(); 9427 DebugLoc dl = MI.getDebugLoc(); 9428 9429 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 9430 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 9431 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 9432 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 9433 F->insert(It, loop1MBB); 9434 F->insert(It, loop2MBB); 9435 F->insert(It, midMBB); 9436 F->insert(It, exitMBB); 9437 exitMBB->splice(exitMBB->begin(), BB, 9438 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9439 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 9440 9441 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9442 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 9443 : &PPC::GPRCRegClass; 9444 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 9445 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 9446 unsigned ShiftReg = 9447 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC); 9448 unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); 9449 unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); 9450 unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); 9451 unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); 9452 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 9453 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 9454 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 9455 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 9456 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 9457 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 9458 unsigned Ptr1Reg; 9459 unsigned TmpReg = RegInfo.createVirtualRegister(RC); 9460 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 9461 // thisMBB: 9462 // ... 9463 // fallthrough --> loopMBB 9464 BB->addSuccessor(loop1MBB); 9465 9466 // The 4-byte load must be aligned, while a char or short may be 9467 // anywhere in the word. Hence all this nasty bookkeeping code. 9468 // add ptr1, ptrA, ptrB [copy if ptrA==0] 9469 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 9470 // xori shift, shift1, 24 [16] 9471 // rlwinm ptr, ptr1, 0, 0, 29 9472 // slw newval2, newval, shift 9473 // slw oldval2, oldval,shift 9474 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 9475 // slw mask, mask2, shift 9476 // and newval3, newval2, mask 9477 // and oldval3, oldval2, mask 9478 // loop1MBB: 9479 // lwarx tmpDest, ptr 9480 // and tmp, tmpDest, mask 9481 // cmpw tmp, oldval3 9482 // bne- midMBB 9483 // loop2MBB: 9484 // andc tmp2, tmpDest, mask 9485 // or tmp4, tmp2, newval3 9486 // stwcx. tmp4, ptr 9487 // bne- loop1MBB 9488 // b exitBB 9489 // midMBB: 9490 // stwcx. tmpDest, ptr 9491 // exitBB: 9492 // srw dest, tmpDest, shift 9493 if (ptrA != ZeroReg) { 9494 Ptr1Reg = RegInfo.createVirtualRegister(RC); 9495 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 9496 .addReg(ptrA).addReg(ptrB); 9497 } else { 9498 Ptr1Reg = ptrB; 9499 } 9500 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 9501 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 9502 if (!isLittleEndian) 9503 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 9504 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 9505 if (is64bit) 9506 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 9507 .addReg(Ptr1Reg).addImm(0).addImm(61); 9508 else 9509 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 9510 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 9511 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 9512 .addReg(newval).addReg(ShiftReg); 9513 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 9514 .addReg(oldval).addReg(ShiftReg); 9515 if (is8bit) 9516 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 9517 else { 9518 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 9519 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 9520 .addReg(Mask3Reg).addImm(65535); 9521 } 9522 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 9523 .addReg(Mask2Reg).addReg(ShiftReg); 9524 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 9525 .addReg(NewVal2Reg).addReg(MaskReg); 9526 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 9527 .addReg(OldVal2Reg).addReg(MaskReg); 9528 9529 BB = loop1MBB; 9530 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 9531 .addReg(ZeroReg).addReg(PtrReg); 9532 BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) 9533 .addReg(TmpDestReg).addReg(MaskReg); 9534 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 9535 .addReg(TmpReg).addReg(OldVal3Reg); 9536 BuildMI(BB, dl, TII->get(PPC::BCC)) 9537 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 9538 BB->addSuccessor(loop2MBB); 9539 BB->addSuccessor(midMBB); 9540 9541 BB = loop2MBB; 9542 BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) 9543 .addReg(TmpDestReg).addReg(MaskReg); 9544 BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) 9545 .addReg(Tmp2Reg).addReg(NewVal3Reg); 9546 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) 9547 .addReg(ZeroReg).addReg(PtrReg); 9548 BuildMI(BB, dl, TII->get(PPC::BCC)) 9549 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 9550 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 9551 BB->addSuccessor(loop1MBB); 9552 BB->addSuccessor(exitMBB); 9553 9554 BB = midMBB; 9555 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) 9556 .addReg(ZeroReg).addReg(PtrReg); 9557 BB->addSuccessor(exitMBB); 9558 9559 // exitMBB: 9560 // ... 9561 BB = exitMBB; 9562 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) 9563 .addReg(ShiftReg); 9564 } else if (MI.getOpcode() == PPC::FADDrtz) { 9565 // This pseudo performs an FADD with rounding mode temporarily forced 9566 // to round-to-zero. We emit this via custom inserter since the FPSCR 9567 // is not modeled at the SelectionDAG level. 9568 unsigned Dest = MI.getOperand(0).getReg(); 9569 unsigned Src1 = MI.getOperand(1).getReg(); 9570 unsigned Src2 = MI.getOperand(2).getReg(); 9571 DebugLoc dl = MI.getDebugLoc(); 9572 9573 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9574 unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 9575 9576 // Save FPSCR value. 9577 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); 9578 9579 // Set rounding mode to round-to-zero. 9580 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); 9581 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); 9582 9583 // Perform addition. 9584 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); 9585 9586 // Restore FPSCR value. 9587 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); 9588 } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || 9589 MI.getOpcode() == PPC::ANDIo_1_GT_BIT || 9590 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 9591 MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) { 9592 unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 9593 MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) 9594 ? PPC::ANDIo8 9595 : PPC::ANDIo; 9596 bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || 9597 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8); 9598 9599 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9600 unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? 9601 &PPC::GPRCRegClass : 9602 &PPC::G8RCRegClass); 9603 9604 DebugLoc dl = MI.getDebugLoc(); 9605 BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) 9606 .addReg(MI.getOperand(1).getReg()) 9607 .addImm(1); 9608 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), 9609 MI.getOperand(0).getReg()) 9610 .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); 9611 } else if (MI.getOpcode() == PPC::TCHECK_RET) { 9612 DebugLoc Dl = MI.getDebugLoc(); 9613 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9614 unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 9615 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); 9616 return BB; 9617 } else { 9618 llvm_unreachable("Unexpected instr type to insert"); 9619 } 9620 9621 MI.eraseFromParent(); // The pseudo instruction is gone now. 9622 return BB; 9623 } 9624 9625 //===----------------------------------------------------------------------===// 9626 // Target Optimization Hooks 9627 //===----------------------------------------------------------------------===// 9628 9629 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) { 9630 // For the estimates, convergence is quadratic, so we essentially double the 9631 // number of digits correct after every iteration. For both FRE and FRSQRTE, 9632 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), 9633 // this is 2^-14. IEEE float has 23 digits and double has 52 digits. 9634 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; 9635 if (VT.getScalarType() == MVT::f64) 9636 RefinementSteps++; 9637 return RefinementSteps; 9638 } 9639 9640 SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, 9641 int Enabled, int &RefinementSteps, 9642 bool &UseOneConstNR) const { 9643 EVT VT = Operand.getValueType(); 9644 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || 9645 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || 9646 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 9647 (VT == MVT::v2f64 && Subtarget.hasVSX()) || 9648 (VT == MVT::v4f32 && Subtarget.hasQPX()) || 9649 (VT == MVT::v4f64 && Subtarget.hasQPX())) { 9650 if (RefinementSteps == ReciprocalEstimate::Unspecified) 9651 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); 9652 9653 UseOneConstNR = true; 9654 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); 9655 } 9656 return SDValue(); 9657 } 9658 9659 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG, 9660 int Enabled, 9661 int &RefinementSteps) const { 9662 EVT VT = Operand.getValueType(); 9663 if ((VT == MVT::f32 && Subtarget.hasFRES()) || 9664 (VT == MVT::f64 && Subtarget.hasFRE()) || 9665 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 9666 (VT == MVT::v2f64 && Subtarget.hasVSX()) || 9667 (VT == MVT::v4f32 && Subtarget.hasQPX()) || 9668 (VT == MVT::v4f64 && Subtarget.hasQPX())) { 9669 if (RefinementSteps == ReciprocalEstimate::Unspecified) 9670 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); 9671 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); 9672 } 9673 return SDValue(); 9674 } 9675 9676 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const { 9677 // Note: This functionality is used only when unsafe-fp-math is enabled, and 9678 // on cores with reciprocal estimates (which are used when unsafe-fp-math is 9679 // enabled for division), this functionality is redundant with the default 9680 // combiner logic (once the division -> reciprocal/multiply transformation 9681 // has taken place). As a result, this matters more for older cores than for 9682 // newer ones. 9683 9684 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 9685 // reciprocal if there are two or more FDIVs (for embedded cores with only 9686 // one FP pipeline) for three or more FDIVs (for generic OOO cores). 9687 switch (Subtarget.getDarwinDirective()) { 9688 default: 9689 return 3; 9690 case PPC::DIR_440: 9691 case PPC::DIR_A2: 9692 case PPC::DIR_E500mc: 9693 case PPC::DIR_E5500: 9694 return 2; 9695 } 9696 } 9697 9698 // isConsecutiveLSLoc needs to work even if all adds have not yet been 9699 // collapsed, and so we need to look through chains of them. 9700 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, 9701 int64_t& Offset, SelectionDAG &DAG) { 9702 if (DAG.isBaseWithConstantOffset(Loc)) { 9703 Base = Loc.getOperand(0); 9704 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue(); 9705 9706 // The base might itself be a base plus an offset, and if so, accumulate 9707 // that as well. 9708 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG); 9709 } 9710 } 9711 9712 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, 9713 unsigned Bytes, int Dist, 9714 SelectionDAG &DAG) { 9715 if (VT.getSizeInBits() / 8 != Bytes) 9716 return false; 9717 9718 SDValue BaseLoc = Base->getBasePtr(); 9719 if (Loc.getOpcode() == ISD::FrameIndex) { 9720 if (BaseLoc.getOpcode() != ISD::FrameIndex) 9721 return false; 9722 const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 9723 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 9724 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 9725 int FS = MFI.getObjectSize(FI); 9726 int BFS = MFI.getObjectSize(BFI); 9727 if (FS != BFS || FS != (int)Bytes) return false; 9728 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes); 9729 } 9730 9731 SDValue Base1 = Loc, Base2 = BaseLoc; 9732 int64_t Offset1 = 0, Offset2 = 0; 9733 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG); 9734 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG); 9735 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes)) 9736 return true; 9737 9738 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9739 const GlobalValue *GV1 = nullptr; 9740 const GlobalValue *GV2 = nullptr; 9741 Offset1 = 0; 9742 Offset2 = 0; 9743 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); 9744 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); 9745 if (isGA1 && isGA2 && GV1 == GV2) 9746 return Offset1 == (Offset2 + Dist*Bytes); 9747 return false; 9748 } 9749 9750 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does 9751 // not enforce equality of the chain operands. 9752 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, 9753 unsigned Bytes, int Dist, 9754 SelectionDAG &DAG) { 9755 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) { 9756 EVT VT = LS->getMemoryVT(); 9757 SDValue Loc = LS->getBasePtr(); 9758 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); 9759 } 9760 9761 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 9762 EVT VT; 9763 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 9764 default: return false; 9765 case Intrinsic::ppc_qpx_qvlfd: 9766 case Intrinsic::ppc_qpx_qvlfda: 9767 VT = MVT::v4f64; 9768 break; 9769 case Intrinsic::ppc_qpx_qvlfs: 9770 case Intrinsic::ppc_qpx_qvlfsa: 9771 VT = MVT::v4f32; 9772 break; 9773 case Intrinsic::ppc_qpx_qvlfcd: 9774 case Intrinsic::ppc_qpx_qvlfcda: 9775 VT = MVT::v2f64; 9776 break; 9777 case Intrinsic::ppc_qpx_qvlfcs: 9778 case Intrinsic::ppc_qpx_qvlfcsa: 9779 VT = MVT::v2f32; 9780 break; 9781 case Intrinsic::ppc_qpx_qvlfiwa: 9782 case Intrinsic::ppc_qpx_qvlfiwz: 9783 case Intrinsic::ppc_altivec_lvx: 9784 case Intrinsic::ppc_altivec_lvxl: 9785 case Intrinsic::ppc_vsx_lxvw4x: 9786 VT = MVT::v4i32; 9787 break; 9788 case Intrinsic::ppc_vsx_lxvd2x: 9789 VT = MVT::v2f64; 9790 break; 9791 case Intrinsic::ppc_altivec_lvebx: 9792 VT = MVT::i8; 9793 break; 9794 case Intrinsic::ppc_altivec_lvehx: 9795 VT = MVT::i16; 9796 break; 9797 case Intrinsic::ppc_altivec_lvewx: 9798 VT = MVT::i32; 9799 break; 9800 } 9801 9802 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); 9803 } 9804 9805 if (N->getOpcode() == ISD::INTRINSIC_VOID) { 9806 EVT VT; 9807 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 9808 default: return false; 9809 case Intrinsic::ppc_qpx_qvstfd: 9810 case Intrinsic::ppc_qpx_qvstfda: 9811 VT = MVT::v4f64; 9812 break; 9813 case Intrinsic::ppc_qpx_qvstfs: 9814 case Intrinsic::ppc_qpx_qvstfsa: 9815 VT = MVT::v4f32; 9816 break; 9817 case Intrinsic::ppc_qpx_qvstfcd: 9818 case Intrinsic::ppc_qpx_qvstfcda: 9819 VT = MVT::v2f64; 9820 break; 9821 case Intrinsic::ppc_qpx_qvstfcs: 9822 case Intrinsic::ppc_qpx_qvstfcsa: 9823 VT = MVT::v2f32; 9824 break; 9825 case Intrinsic::ppc_qpx_qvstfiw: 9826 case Intrinsic::ppc_qpx_qvstfiwa: 9827 case Intrinsic::ppc_altivec_stvx: 9828 case Intrinsic::ppc_altivec_stvxl: 9829 case Intrinsic::ppc_vsx_stxvw4x: 9830 VT = MVT::v4i32; 9831 break; 9832 case Intrinsic::ppc_vsx_stxvd2x: 9833 VT = MVT::v2f64; 9834 break; 9835 case Intrinsic::ppc_altivec_stvebx: 9836 VT = MVT::i8; 9837 break; 9838 case Intrinsic::ppc_altivec_stvehx: 9839 VT = MVT::i16; 9840 break; 9841 case Intrinsic::ppc_altivec_stvewx: 9842 VT = MVT::i32; 9843 break; 9844 } 9845 9846 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); 9847 } 9848 9849 return false; 9850 } 9851 9852 // Return true is there is a nearyby consecutive load to the one provided 9853 // (regardless of alignment). We search up and down the chain, looking though 9854 // token factors and other loads (but nothing else). As a result, a true result 9855 // indicates that it is safe to create a new consecutive load adjacent to the 9856 // load provided. 9857 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { 9858 SDValue Chain = LD->getChain(); 9859 EVT VT = LD->getMemoryVT(); 9860 9861 SmallSet<SDNode *, 16> LoadRoots; 9862 SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); 9863 SmallSet<SDNode *, 16> Visited; 9864 9865 // First, search up the chain, branching to follow all token-factor operands. 9866 // If we find a consecutive load, then we're done, otherwise, record all 9867 // nodes just above the top-level loads and token factors. 9868 while (!Queue.empty()) { 9869 SDNode *ChainNext = Queue.pop_back_val(); 9870 if (!Visited.insert(ChainNext).second) 9871 continue; 9872 9873 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) { 9874 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 9875 return true; 9876 9877 if (!Visited.count(ChainLD->getChain().getNode())) 9878 Queue.push_back(ChainLD->getChain().getNode()); 9879 } else if (ChainNext->getOpcode() == ISD::TokenFactor) { 9880 for (const SDUse &O : ChainNext->ops()) 9881 if (!Visited.count(O.getNode())) 9882 Queue.push_back(O.getNode()); 9883 } else 9884 LoadRoots.insert(ChainNext); 9885 } 9886 9887 // Second, search down the chain, starting from the top-level nodes recorded 9888 // in the first phase. These top-level nodes are the nodes just above all 9889 // loads and token factors. Starting with their uses, recursively look though 9890 // all loads (just the chain uses) and token factors to find a consecutive 9891 // load. 9892 Visited.clear(); 9893 Queue.clear(); 9894 9895 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), 9896 IE = LoadRoots.end(); I != IE; ++I) { 9897 Queue.push_back(*I); 9898 9899 while (!Queue.empty()) { 9900 SDNode *LoadRoot = Queue.pop_back_val(); 9901 if (!Visited.insert(LoadRoot).second) 9902 continue; 9903 9904 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot)) 9905 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 9906 return true; 9907 9908 for (SDNode::use_iterator UI = LoadRoot->use_begin(), 9909 UE = LoadRoot->use_end(); UI != UE; ++UI) 9910 if (((isa<MemSDNode>(*UI) && 9911 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) || 9912 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) 9913 Queue.push_back(*UI); 9914 } 9915 } 9916 9917 return false; 9918 } 9919 9920 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, 9921 DAGCombinerInfo &DCI) const { 9922 SelectionDAG &DAG = DCI.DAG; 9923 SDLoc dl(N); 9924 9925 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits"); 9926 // If we're tracking CR bits, we need to be careful that we don't have: 9927 // trunc(binary-ops(zext(x), zext(y))) 9928 // or 9929 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) 9930 // such that we're unnecessarily moving things into GPRs when it would be 9931 // better to keep them in CR bits. 9932 9933 // Note that trunc here can be an actual i1 trunc, or can be the effective 9934 // truncation that comes from a setcc or select_cc. 9935 if (N->getOpcode() == ISD::TRUNCATE && 9936 N->getValueType(0) != MVT::i1) 9937 return SDValue(); 9938 9939 if (N->getOperand(0).getValueType() != MVT::i32 && 9940 N->getOperand(0).getValueType() != MVT::i64) 9941 return SDValue(); 9942 9943 if (N->getOpcode() == ISD::SETCC || 9944 N->getOpcode() == ISD::SELECT_CC) { 9945 // If we're looking at a comparison, then we need to make sure that the 9946 // high bits (all except for the first) don't matter the result. 9947 ISD::CondCode CC = 9948 cast<CondCodeSDNode>(N->getOperand( 9949 N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); 9950 unsigned OpBits = N->getOperand(0).getValueSizeInBits(); 9951 9952 if (ISD::isSignedIntSetCC(CC)) { 9953 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || 9954 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) 9955 return SDValue(); 9956 } else if (ISD::isUnsignedIntSetCC(CC)) { 9957 if (!DAG.MaskedValueIsZero(N->getOperand(0), 9958 APInt::getHighBitsSet(OpBits, OpBits-1)) || 9959 !DAG.MaskedValueIsZero(N->getOperand(1), 9960 APInt::getHighBitsSet(OpBits, OpBits-1))) 9961 return SDValue(); 9962 } else { 9963 // This is neither a signed nor an unsigned comparison, just make sure 9964 // that the high bits are equal. 9965 APInt Op1Zero, Op1One; 9966 APInt Op2Zero, Op2One; 9967 DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One); 9968 DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One); 9969 9970 // We don't really care about what is known about the first bit (if 9971 // anything), so clear it in all masks prior to comparing them. 9972 Op1Zero.clearBit(0); Op1One.clearBit(0); 9973 Op2Zero.clearBit(0); Op2One.clearBit(0); 9974 9975 if (Op1Zero != Op2Zero || Op1One != Op2One) 9976 return SDValue(); 9977 } 9978 } 9979 9980 // We now know that the higher-order bits are irrelevant, we just need to 9981 // make sure that all of the intermediate operations are bit operations, and 9982 // all inputs are extensions. 9983 if (N->getOperand(0).getOpcode() != ISD::AND && 9984 N->getOperand(0).getOpcode() != ISD::OR && 9985 N->getOperand(0).getOpcode() != ISD::XOR && 9986 N->getOperand(0).getOpcode() != ISD::SELECT && 9987 N->getOperand(0).getOpcode() != ISD::SELECT_CC && 9988 N->getOperand(0).getOpcode() != ISD::TRUNCATE && 9989 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && 9990 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && 9991 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) 9992 return SDValue(); 9993 9994 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && 9995 N->getOperand(1).getOpcode() != ISD::AND && 9996 N->getOperand(1).getOpcode() != ISD::OR && 9997 N->getOperand(1).getOpcode() != ISD::XOR && 9998 N->getOperand(1).getOpcode() != ISD::SELECT && 9999 N->getOperand(1).getOpcode() != ISD::SELECT_CC && 10000 N->getOperand(1).getOpcode() != ISD::TRUNCATE && 10001 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && 10002 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && 10003 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) 10004 return SDValue(); 10005 10006 SmallVector<SDValue, 4> Inputs; 10007 SmallVector<SDValue, 8> BinOps, PromOps; 10008 SmallPtrSet<SDNode *, 16> Visited; 10009 10010 for (unsigned i = 0; i < 2; ++i) { 10011 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 10012 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 10013 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 10014 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || 10015 isa<ConstantSDNode>(N->getOperand(i))) 10016 Inputs.push_back(N->getOperand(i)); 10017 else 10018 BinOps.push_back(N->getOperand(i)); 10019 10020 if (N->getOpcode() == ISD::TRUNCATE) 10021 break; 10022 } 10023 10024 // Visit all inputs, collect all binary operations (and, or, xor and 10025 // select) that are all fed by extensions. 10026 while (!BinOps.empty()) { 10027 SDValue BinOp = BinOps.back(); 10028 BinOps.pop_back(); 10029 10030 if (!Visited.insert(BinOp.getNode()).second) 10031 continue; 10032 10033 PromOps.push_back(BinOp); 10034 10035 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 10036 // The condition of the select is not promoted. 10037 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 10038 continue; 10039 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 10040 continue; 10041 10042 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 10043 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 10044 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 10045 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || 10046 isa<ConstantSDNode>(BinOp.getOperand(i))) { 10047 Inputs.push_back(BinOp.getOperand(i)); 10048 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 10049 BinOp.getOperand(i).getOpcode() == ISD::OR || 10050 BinOp.getOperand(i).getOpcode() == ISD::XOR || 10051 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 10052 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || 10053 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 10054 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 10055 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 10056 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { 10057 BinOps.push_back(BinOp.getOperand(i)); 10058 } else { 10059 // We have an input that is not an extension or another binary 10060 // operation; we'll abort this transformation. 10061 return SDValue(); 10062 } 10063 } 10064 } 10065 10066 // Make sure that this is a self-contained cluster of operations (which 10067 // is not quite the same thing as saying that everything has only one 10068 // use). 10069 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10070 if (isa<ConstantSDNode>(Inputs[i])) 10071 continue; 10072 10073 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 10074 UE = Inputs[i].getNode()->use_end(); 10075 UI != UE; ++UI) { 10076 SDNode *User = *UI; 10077 if (User != N && !Visited.count(User)) 10078 return SDValue(); 10079 10080 // Make sure that we're not going to promote the non-output-value 10081 // operand(s) or SELECT or SELECT_CC. 10082 // FIXME: Although we could sometimes handle this, and it does occur in 10083 // practice that one of the condition inputs to the select is also one of 10084 // the outputs, we currently can't deal with this. 10085 if (User->getOpcode() == ISD::SELECT) { 10086 if (User->getOperand(0) == Inputs[i]) 10087 return SDValue(); 10088 } else if (User->getOpcode() == ISD::SELECT_CC) { 10089 if (User->getOperand(0) == Inputs[i] || 10090 User->getOperand(1) == Inputs[i]) 10091 return SDValue(); 10092 } 10093 } 10094 } 10095 10096 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 10097 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 10098 UE = PromOps[i].getNode()->use_end(); 10099 UI != UE; ++UI) { 10100 SDNode *User = *UI; 10101 if (User != N && !Visited.count(User)) 10102 return SDValue(); 10103 10104 // Make sure that we're not going to promote the non-output-value 10105 // operand(s) or SELECT or SELECT_CC. 10106 // FIXME: Although we could sometimes handle this, and it does occur in 10107 // practice that one of the condition inputs to the select is also one of 10108 // the outputs, we currently can't deal with this. 10109 if (User->getOpcode() == ISD::SELECT) { 10110 if (User->getOperand(0) == PromOps[i]) 10111 return SDValue(); 10112 } else if (User->getOpcode() == ISD::SELECT_CC) { 10113 if (User->getOperand(0) == PromOps[i] || 10114 User->getOperand(1) == PromOps[i]) 10115 return SDValue(); 10116 } 10117 } 10118 } 10119 10120 // Replace all inputs with the extension operand. 10121 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10122 // Constants may have users outside the cluster of to-be-promoted nodes, 10123 // and so we need to replace those as we do the promotions. 10124 if (isa<ConstantSDNode>(Inputs[i])) 10125 continue; 10126 else 10127 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); 10128 } 10129 10130 std::list<HandleSDNode> PromOpHandles; 10131 for (auto &PromOp : PromOps) 10132 PromOpHandles.emplace_back(PromOp); 10133 10134 // Replace all operations (these are all the same, but have a different 10135 // (i1) return type). DAG.getNode will validate that the types of 10136 // a binary operator match, so go through the list in reverse so that 10137 // we've likely promoted both operands first. Any intermediate truncations or 10138 // extensions disappear. 10139 while (!PromOpHandles.empty()) { 10140 SDValue PromOp = PromOpHandles.back().getValue(); 10141 PromOpHandles.pop_back(); 10142 10143 if (PromOp.getOpcode() == ISD::TRUNCATE || 10144 PromOp.getOpcode() == ISD::SIGN_EXTEND || 10145 PromOp.getOpcode() == ISD::ZERO_EXTEND || 10146 PromOp.getOpcode() == ISD::ANY_EXTEND) { 10147 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) && 10148 PromOp.getOperand(0).getValueType() != MVT::i1) { 10149 // The operand is not yet ready (see comment below). 10150 PromOpHandles.emplace_front(PromOp); 10151 continue; 10152 } 10153 10154 SDValue RepValue = PromOp.getOperand(0); 10155 if (isa<ConstantSDNode>(RepValue)) 10156 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); 10157 10158 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); 10159 continue; 10160 } 10161 10162 unsigned C; 10163 switch (PromOp.getOpcode()) { 10164 default: C = 0; break; 10165 case ISD::SELECT: C = 1; break; 10166 case ISD::SELECT_CC: C = 2; break; 10167 } 10168 10169 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 10170 PromOp.getOperand(C).getValueType() != MVT::i1) || 10171 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 10172 PromOp.getOperand(C+1).getValueType() != MVT::i1)) { 10173 // The to-be-promoted operands of this node have not yet been 10174 // promoted (this should be rare because we're going through the 10175 // list backward, but if one of the operands has several users in 10176 // this cluster of to-be-promoted nodes, it is possible). 10177 PromOpHandles.emplace_front(PromOp); 10178 continue; 10179 } 10180 10181 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 10182 PromOp.getNode()->op_end()); 10183 10184 // If there are any constant inputs, make sure they're replaced now. 10185 for (unsigned i = 0; i < 2; ++i) 10186 if (isa<ConstantSDNode>(Ops[C+i])) 10187 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); 10188 10189 DAG.ReplaceAllUsesOfValueWith(PromOp, 10190 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); 10191 } 10192 10193 // Now we're left with the initial truncation itself. 10194 if (N->getOpcode() == ISD::TRUNCATE) 10195 return N->getOperand(0); 10196 10197 // Otherwise, this is a comparison. The operands to be compared have just 10198 // changed type (to i1), but everything else is the same. 10199 return SDValue(N, 0); 10200 } 10201 10202 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, 10203 DAGCombinerInfo &DCI) const { 10204 SelectionDAG &DAG = DCI.DAG; 10205 SDLoc dl(N); 10206 10207 // If we're tracking CR bits, we need to be careful that we don't have: 10208 // zext(binary-ops(trunc(x), trunc(y))) 10209 // or 10210 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) 10211 // such that we're unnecessarily moving things into CR bits that can more 10212 // efficiently stay in GPRs. Note that if we're not certain that the high 10213 // bits are set as required by the final extension, we still may need to do 10214 // some masking to get the proper behavior. 10215 10216 // This same functionality is important on PPC64 when dealing with 10217 // 32-to-64-bit extensions; these occur often when 32-bit values are used as 10218 // the return values of functions. Because it is so similar, it is handled 10219 // here as well. 10220 10221 if (N->getValueType(0) != MVT::i32 && 10222 N->getValueType(0) != MVT::i64) 10223 return SDValue(); 10224 10225 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) || 10226 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64()))) 10227 return SDValue(); 10228 10229 if (N->getOperand(0).getOpcode() != ISD::AND && 10230 N->getOperand(0).getOpcode() != ISD::OR && 10231 N->getOperand(0).getOpcode() != ISD::XOR && 10232 N->getOperand(0).getOpcode() != ISD::SELECT && 10233 N->getOperand(0).getOpcode() != ISD::SELECT_CC) 10234 return SDValue(); 10235 10236 SmallVector<SDValue, 4> Inputs; 10237 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps; 10238 SmallPtrSet<SDNode *, 16> Visited; 10239 10240 // Visit all inputs, collect all binary operations (and, or, xor and 10241 // select) that are all fed by truncations. 10242 while (!BinOps.empty()) { 10243 SDValue BinOp = BinOps.back(); 10244 BinOps.pop_back(); 10245 10246 if (!Visited.insert(BinOp.getNode()).second) 10247 continue; 10248 10249 PromOps.push_back(BinOp); 10250 10251 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 10252 // The condition of the select is not promoted. 10253 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 10254 continue; 10255 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 10256 continue; 10257 10258 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 10259 isa<ConstantSDNode>(BinOp.getOperand(i))) { 10260 Inputs.push_back(BinOp.getOperand(i)); 10261 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 10262 BinOp.getOperand(i).getOpcode() == ISD::OR || 10263 BinOp.getOperand(i).getOpcode() == ISD::XOR || 10264 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 10265 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { 10266 BinOps.push_back(BinOp.getOperand(i)); 10267 } else { 10268 // We have an input that is not a truncation or another binary 10269 // operation; we'll abort this transformation. 10270 return SDValue(); 10271 } 10272 } 10273 } 10274 10275 // The operands of a select that must be truncated when the select is 10276 // promoted because the operand is actually part of the to-be-promoted set. 10277 DenseMap<SDNode *, EVT> SelectTruncOp[2]; 10278 10279 // Make sure that this is a self-contained cluster of operations (which 10280 // is not quite the same thing as saying that everything has only one 10281 // use). 10282 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10283 if (isa<ConstantSDNode>(Inputs[i])) 10284 continue; 10285 10286 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 10287 UE = Inputs[i].getNode()->use_end(); 10288 UI != UE; ++UI) { 10289 SDNode *User = *UI; 10290 if (User != N && !Visited.count(User)) 10291 return SDValue(); 10292 10293 // If we're going to promote the non-output-value operand(s) or SELECT or 10294 // SELECT_CC, record them for truncation. 10295 if (User->getOpcode() == ISD::SELECT) { 10296 if (User->getOperand(0) == Inputs[i]) 10297 SelectTruncOp[0].insert(std::make_pair(User, 10298 User->getOperand(0).getValueType())); 10299 } else if (User->getOpcode() == ISD::SELECT_CC) { 10300 if (User->getOperand(0) == Inputs[i]) 10301 SelectTruncOp[0].insert(std::make_pair(User, 10302 User->getOperand(0).getValueType())); 10303 if (User->getOperand(1) == Inputs[i]) 10304 SelectTruncOp[1].insert(std::make_pair(User, 10305 User->getOperand(1).getValueType())); 10306 } 10307 } 10308 } 10309 10310 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 10311 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 10312 UE = PromOps[i].getNode()->use_end(); 10313 UI != UE; ++UI) { 10314 SDNode *User = *UI; 10315 if (User != N && !Visited.count(User)) 10316 return SDValue(); 10317 10318 // If we're going to promote the non-output-value operand(s) or SELECT or 10319 // SELECT_CC, record them for truncation. 10320 if (User->getOpcode() == ISD::SELECT) { 10321 if (User->getOperand(0) == PromOps[i]) 10322 SelectTruncOp[0].insert(std::make_pair(User, 10323 User->getOperand(0).getValueType())); 10324 } else if (User->getOpcode() == ISD::SELECT_CC) { 10325 if (User->getOperand(0) == PromOps[i]) 10326 SelectTruncOp[0].insert(std::make_pair(User, 10327 User->getOperand(0).getValueType())); 10328 if (User->getOperand(1) == PromOps[i]) 10329 SelectTruncOp[1].insert(std::make_pair(User, 10330 User->getOperand(1).getValueType())); 10331 } 10332 } 10333 } 10334 10335 unsigned PromBits = N->getOperand(0).getValueSizeInBits(); 10336 bool ReallyNeedsExt = false; 10337 if (N->getOpcode() != ISD::ANY_EXTEND) { 10338 // If all of the inputs are not already sign/zero extended, then 10339 // we'll still need to do that at the end. 10340 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10341 if (isa<ConstantSDNode>(Inputs[i])) 10342 continue; 10343 10344 unsigned OpBits = 10345 Inputs[i].getOperand(0).getValueSizeInBits(); 10346 assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); 10347 10348 if ((N->getOpcode() == ISD::ZERO_EXTEND && 10349 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), 10350 APInt::getHighBitsSet(OpBits, 10351 OpBits-PromBits))) || 10352 (N->getOpcode() == ISD::SIGN_EXTEND && 10353 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < 10354 (OpBits-(PromBits-1)))) { 10355 ReallyNeedsExt = true; 10356 break; 10357 } 10358 } 10359 } 10360 10361 // Replace all inputs, either with the truncation operand, or a 10362 // truncation or extension to the final output type. 10363 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10364 // Constant inputs need to be replaced with the to-be-promoted nodes that 10365 // use them because they might have users outside of the cluster of 10366 // promoted nodes. 10367 if (isa<ConstantSDNode>(Inputs[i])) 10368 continue; 10369 10370 SDValue InSrc = Inputs[i].getOperand(0); 10371 if (Inputs[i].getValueType() == N->getValueType(0)) 10372 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); 10373 else if (N->getOpcode() == ISD::SIGN_EXTEND) 10374 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 10375 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); 10376 else if (N->getOpcode() == ISD::ZERO_EXTEND) 10377 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 10378 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); 10379 else 10380 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 10381 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); 10382 } 10383 10384 std::list<HandleSDNode> PromOpHandles; 10385 for (auto &PromOp : PromOps) 10386 PromOpHandles.emplace_back(PromOp); 10387 10388 // Replace all operations (these are all the same, but have a different 10389 // (promoted) return type). DAG.getNode will validate that the types of 10390 // a binary operator match, so go through the list in reverse so that 10391 // we've likely promoted both operands first. 10392 while (!PromOpHandles.empty()) { 10393 SDValue PromOp = PromOpHandles.back().getValue(); 10394 PromOpHandles.pop_back(); 10395 10396 unsigned C; 10397 switch (PromOp.getOpcode()) { 10398 default: C = 0; break; 10399 case ISD::SELECT: C = 1; break; 10400 case ISD::SELECT_CC: C = 2; break; 10401 } 10402 10403 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 10404 PromOp.getOperand(C).getValueType() != N->getValueType(0)) || 10405 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 10406 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { 10407 // The to-be-promoted operands of this node have not yet been 10408 // promoted (this should be rare because we're going through the 10409 // list backward, but if one of the operands has several users in 10410 // this cluster of to-be-promoted nodes, it is possible). 10411 PromOpHandles.emplace_front(PromOp); 10412 continue; 10413 } 10414 10415 // For SELECT and SELECT_CC nodes, we do a similar check for any 10416 // to-be-promoted comparison inputs. 10417 if (PromOp.getOpcode() == ISD::SELECT || 10418 PromOp.getOpcode() == ISD::SELECT_CC) { 10419 if ((SelectTruncOp[0].count(PromOp.getNode()) && 10420 PromOp.getOperand(0).getValueType() != N->getValueType(0)) || 10421 (SelectTruncOp[1].count(PromOp.getNode()) && 10422 PromOp.getOperand(1).getValueType() != N->getValueType(0))) { 10423 PromOpHandles.emplace_front(PromOp); 10424 continue; 10425 } 10426 } 10427 10428 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 10429 PromOp.getNode()->op_end()); 10430 10431 // If this node has constant inputs, then they'll need to be promoted here. 10432 for (unsigned i = 0; i < 2; ++i) { 10433 if (!isa<ConstantSDNode>(Ops[C+i])) 10434 continue; 10435 if (Ops[C+i].getValueType() == N->getValueType(0)) 10436 continue; 10437 10438 if (N->getOpcode() == ISD::SIGN_EXTEND) 10439 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 10440 else if (N->getOpcode() == ISD::ZERO_EXTEND) 10441 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 10442 else 10443 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 10444 } 10445 10446 // If we've promoted the comparison inputs of a SELECT or SELECT_CC, 10447 // truncate them again to the original value type. 10448 if (PromOp.getOpcode() == ISD::SELECT || 10449 PromOp.getOpcode() == ISD::SELECT_CC) { 10450 auto SI0 = SelectTruncOp[0].find(PromOp.getNode()); 10451 if (SI0 != SelectTruncOp[0].end()) 10452 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]); 10453 auto SI1 = SelectTruncOp[1].find(PromOp.getNode()); 10454 if (SI1 != SelectTruncOp[1].end()) 10455 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]); 10456 } 10457 10458 DAG.ReplaceAllUsesOfValueWith(PromOp, 10459 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); 10460 } 10461 10462 // Now we're left with the initial extension itself. 10463 if (!ReallyNeedsExt) 10464 return N->getOperand(0); 10465 10466 // To zero extend, just mask off everything except for the first bit (in the 10467 // i1 case). 10468 if (N->getOpcode() == ISD::ZERO_EXTEND) 10469 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), 10470 DAG.getConstant(APInt::getLowBitsSet( 10471 N->getValueSizeInBits(0), PromBits), 10472 dl, N->getValueType(0))); 10473 10474 assert(N->getOpcode() == ISD::SIGN_EXTEND && 10475 "Invalid extension type"); 10476 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout()); 10477 SDValue ShiftCst = 10478 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); 10479 return DAG.getNode( 10480 ISD::SRA, dl, N->getValueType(0), 10481 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst), 10482 ShiftCst); 10483 } 10484 10485 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, 10486 DAGCombinerInfo &DCI) const { 10487 assert(N->getOpcode() == ISD::BUILD_VECTOR && 10488 "Should be called with a BUILD_VECTOR node"); 10489 10490 SelectionDAG &DAG = DCI.DAG; 10491 SDLoc dl(N); 10492 if (N->getValueType(0) != MVT::v2f64 || !Subtarget.hasVSX()) 10493 return SDValue(); 10494 10495 // Looking for: 10496 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1)) 10497 if (N->getOperand(0).getOpcode() != ISD::SINT_TO_FP && 10498 N->getOperand(0).getOpcode() != ISD::UINT_TO_FP) 10499 return SDValue(); 10500 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP && 10501 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP) 10502 return SDValue(); 10503 if (N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode()) 10504 return SDValue(); 10505 10506 SDValue Ext1 = N->getOperand(0).getOperand(0); 10507 SDValue Ext2 = N->getOperand(1).getOperand(0); 10508 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || 10509 Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 10510 return SDValue(); 10511 10512 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1)); 10513 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1)); 10514 if (!Ext1Op || !Ext2Op) 10515 return SDValue(); 10516 if (Ext1.getValueType() != MVT::i32 || 10517 Ext2.getValueType() != MVT::i32) 10518 if (Ext1.getOperand(0) != Ext2.getOperand(0)) 10519 return SDValue(); 10520 10521 int FirstElem = Ext1Op->getZExtValue(); 10522 int SecondElem = Ext2Op->getZExtValue(); 10523 int SubvecIdx; 10524 if (FirstElem == 0 && SecondElem == 1) 10525 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0; 10526 else if (FirstElem == 2 && SecondElem == 3) 10527 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1; 10528 else 10529 return SDValue(); 10530 10531 SDValue SrcVec = Ext1.getOperand(0); 10532 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ? 10533 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP; 10534 return DAG.getNode(NodeType, dl, MVT::v2f64, 10535 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl)); 10536 } 10537 10538 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, 10539 DAGCombinerInfo &DCI) const { 10540 assert((N->getOpcode() == ISD::SINT_TO_FP || 10541 N->getOpcode() == ISD::UINT_TO_FP) && 10542 "Need an int -> FP conversion node here"); 10543 10544 if (useSoftFloat() || !Subtarget.has64BitSupport()) 10545 return SDValue(); 10546 10547 SelectionDAG &DAG = DCI.DAG; 10548 SDLoc dl(N); 10549 SDValue Op(N, 0); 10550 10551 SDValue FirstOperand(Op.getOperand(0)); 10552 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD && 10553 (FirstOperand.getValueType() == MVT::i8 || 10554 FirstOperand.getValueType() == MVT::i16); 10555 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) { 10556 bool Signed = N->getOpcode() == ISD::SINT_TO_FP; 10557 bool DstDouble = Op.getValueType() == MVT::f64; 10558 unsigned ConvOp = Signed ? 10559 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) : 10560 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS); 10561 SDValue WidthConst = 10562 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2, 10563 dl, false); 10564 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode()); 10565 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst }; 10566 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl, 10567 DAG.getVTList(MVT::f64, MVT::Other), 10568 Ops, MVT::i8, LDN->getMemOperand()); 10569 10570 // For signed conversion, we need to sign-extend the value in the VSR 10571 if (Signed) { 10572 SDValue ExtOps[] = { Ld, WidthConst }; 10573 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps); 10574 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext); 10575 } else 10576 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld); 10577 } 10578 10579 // Don't handle ppc_fp128 here or i1 conversions. 10580 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 10581 return SDValue(); 10582 if (Op.getOperand(0).getValueType() == MVT::i1) 10583 return SDValue(); 10584 10585 // For i32 intermediate values, unfortunately, the conversion functions 10586 // leave the upper 32 bits of the value are undefined. Within the set of 10587 // scalar instructions, we have no method for zero- or sign-extending the 10588 // value. Thus, we cannot handle i32 intermediate values here. 10589 if (Op.getOperand(0).getValueType() == MVT::i32) 10590 return SDValue(); 10591 10592 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 10593 "UINT_TO_FP is supported only with FPCVT"); 10594 10595 // If we have FCFIDS, then use it when converting to single-precision. 10596 // Otherwise, convert to double-precision and then round. 10597 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 10598 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS 10599 : PPCISD::FCFIDS) 10600 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU 10601 : PPCISD::FCFID); 10602 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 10603 ? MVT::f32 10604 : MVT::f64; 10605 10606 // If we're converting from a float, to an int, and back to a float again, 10607 // then we don't need the store/load pair at all. 10608 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT && 10609 Subtarget.hasFPCVT()) || 10610 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) { 10611 SDValue Src = Op.getOperand(0).getOperand(0); 10612 if (Src.getValueType() == MVT::f32) { 10613 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 10614 DCI.AddToWorklist(Src.getNode()); 10615 } else if (Src.getValueType() != MVT::f64) { 10616 // Make sure that we don't pick up a ppc_fp128 source value. 10617 return SDValue(); 10618 } 10619 10620 unsigned FCTOp = 10621 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 10622 PPCISD::FCTIDUZ; 10623 10624 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src); 10625 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp); 10626 10627 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { 10628 FP = DAG.getNode(ISD::FP_ROUND, dl, 10629 MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); 10630 DCI.AddToWorklist(FP.getNode()); 10631 } 10632 10633 return FP; 10634 } 10635 10636 return SDValue(); 10637 } 10638 10639 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for 10640 // builtins) into loads with swaps. 10641 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, 10642 DAGCombinerInfo &DCI) const { 10643 SelectionDAG &DAG = DCI.DAG; 10644 SDLoc dl(N); 10645 SDValue Chain; 10646 SDValue Base; 10647 MachineMemOperand *MMO; 10648 10649 switch (N->getOpcode()) { 10650 default: 10651 llvm_unreachable("Unexpected opcode for little endian VSX load"); 10652 case ISD::LOAD: { 10653 LoadSDNode *LD = cast<LoadSDNode>(N); 10654 Chain = LD->getChain(); 10655 Base = LD->getBasePtr(); 10656 MMO = LD->getMemOperand(); 10657 // If the MMO suggests this isn't a load of a full vector, leave 10658 // things alone. For a built-in, we have to make the change for 10659 // correctness, so if there is a size problem that will be a bug. 10660 if (MMO->getSize() < 16) 10661 return SDValue(); 10662 break; 10663 } 10664 case ISD::INTRINSIC_W_CHAIN: { 10665 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 10666 Chain = Intrin->getChain(); 10667 // Similarly to the store case below, Intrin->getBasePtr() doesn't get 10668 // us what we want. Get operand 2 instead. 10669 Base = Intrin->getOperand(2); 10670 MMO = Intrin->getMemOperand(); 10671 break; 10672 } 10673 } 10674 10675 MVT VecTy = N->getValueType(0).getSimpleVT(); 10676 SDValue LoadOps[] = { Chain, Base }; 10677 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, 10678 DAG.getVTList(MVT::v2f64, MVT::Other), 10679 LoadOps, MVT::v2f64, MMO); 10680 10681 DCI.AddToWorklist(Load.getNode()); 10682 Chain = Load.getValue(1); 10683 SDValue Swap = DAG.getNode( 10684 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load); 10685 DCI.AddToWorklist(Swap.getNode()); 10686 10687 // Add a bitcast if the resulting load type doesn't match v2f64. 10688 if (VecTy != MVT::v2f64) { 10689 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap); 10690 DCI.AddToWorklist(N.getNode()); 10691 // Package {bitcast value, swap's chain} to match Load's shape. 10692 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other), 10693 N, Swap.getValue(1)); 10694 } 10695 10696 return Swap; 10697 } 10698 10699 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for 10700 // builtins) into stores with swaps. 10701 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, 10702 DAGCombinerInfo &DCI) const { 10703 SelectionDAG &DAG = DCI.DAG; 10704 SDLoc dl(N); 10705 SDValue Chain; 10706 SDValue Base; 10707 unsigned SrcOpnd; 10708 MachineMemOperand *MMO; 10709 10710 switch (N->getOpcode()) { 10711 default: 10712 llvm_unreachable("Unexpected opcode for little endian VSX store"); 10713 case ISD::STORE: { 10714 StoreSDNode *ST = cast<StoreSDNode>(N); 10715 Chain = ST->getChain(); 10716 Base = ST->getBasePtr(); 10717 MMO = ST->getMemOperand(); 10718 SrcOpnd = 1; 10719 // If the MMO suggests this isn't a store of a full vector, leave 10720 // things alone. For a built-in, we have to make the change for 10721 // correctness, so if there is a size problem that will be a bug. 10722 if (MMO->getSize() < 16) 10723 return SDValue(); 10724 break; 10725 } 10726 case ISD::INTRINSIC_VOID: { 10727 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 10728 Chain = Intrin->getChain(); 10729 // Intrin->getBasePtr() oddly does not get what we want. 10730 Base = Intrin->getOperand(3); 10731 MMO = Intrin->getMemOperand(); 10732 SrcOpnd = 2; 10733 break; 10734 } 10735 } 10736 10737 SDValue Src = N->getOperand(SrcOpnd); 10738 MVT VecTy = Src.getValueType().getSimpleVT(); 10739 10740 // All stores are done as v2f64 and possible bit cast. 10741 if (VecTy != MVT::v2f64) { 10742 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); 10743 DCI.AddToWorklist(Src.getNode()); 10744 } 10745 10746 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, 10747 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src); 10748 DCI.AddToWorklist(Swap.getNode()); 10749 Chain = Swap.getValue(1); 10750 SDValue StoreOps[] = { Chain, Swap, Base }; 10751 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl, 10752 DAG.getVTList(MVT::Other), 10753 StoreOps, VecTy, MMO); 10754 DCI.AddToWorklist(Store.getNode()); 10755 return Store; 10756 } 10757 10758 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 10759 DAGCombinerInfo &DCI) const { 10760 SelectionDAG &DAG = DCI.DAG; 10761 SDLoc dl(N); 10762 switch (N->getOpcode()) { 10763 default: break; 10764 case PPCISD::SHL: 10765 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. 10766 return N->getOperand(0); 10767 break; 10768 case PPCISD::SRL: 10769 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0. 10770 return N->getOperand(0); 10771 break; 10772 case PPCISD::SRA: 10773 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 10774 if (C->isNullValue() || // 0 >>s V -> 0. 10775 C->isAllOnesValue()) // -1 >>s V -> -1. 10776 return N->getOperand(0); 10777 } 10778 break; 10779 case ISD::SIGN_EXTEND: 10780 case ISD::ZERO_EXTEND: 10781 case ISD::ANY_EXTEND: 10782 return DAGCombineExtBoolTrunc(N, DCI); 10783 case ISD::TRUNCATE: 10784 case ISD::SETCC: 10785 case ISD::SELECT_CC: 10786 return DAGCombineTruncBoolExt(N, DCI); 10787 case ISD::SINT_TO_FP: 10788 case ISD::UINT_TO_FP: 10789 return combineFPToIntToFP(N, DCI); 10790 case ISD::STORE: { 10791 EVT Op1VT = N->getOperand(1).getValueType(); 10792 bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) || 10793 (Subtarget.hasP9Vector() && (Op1VT == MVT::i8 || Op1VT == MVT::i16)); 10794 10795 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 10796 if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() && 10797 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 10798 ValidTypeForStoreFltAsInt && 10799 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 10800 SDValue Val = N->getOperand(1).getOperand(0); 10801 if (Val.getValueType() == MVT::f32) { 10802 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 10803 DCI.AddToWorklist(Val.getNode()); 10804 } 10805 Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); 10806 DCI.AddToWorklist(Val.getNode()); 10807 10808 if (Op1VT == MVT::i32) { 10809 SDValue Ops[] = { 10810 N->getOperand(0), Val, N->getOperand(2), 10811 DAG.getValueType(N->getOperand(1).getValueType()) 10812 }; 10813 10814 Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 10815 DAG.getVTList(MVT::Other), Ops, 10816 cast<StoreSDNode>(N)->getMemoryVT(), 10817 cast<StoreSDNode>(N)->getMemOperand()); 10818 } else { 10819 unsigned WidthInBytes = 10820 N->getOperand(1).getValueType() == MVT::i8 ? 1 : 2; 10821 SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false); 10822 10823 SDValue Ops[] = { 10824 N->getOperand(0), Val, N->getOperand(2), WidthConst, 10825 DAG.getValueType(N->getOperand(1).getValueType()) 10826 }; 10827 Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl, 10828 DAG.getVTList(MVT::Other), Ops, 10829 cast<StoreSDNode>(N)->getMemoryVT(), 10830 cast<StoreSDNode>(N)->getMemOperand()); 10831 } 10832 10833 DCI.AddToWorklist(Val.getNode()); 10834 return Val; 10835 } 10836 10837 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 10838 if (cast<StoreSDNode>(N)->isUnindexed() && 10839 N->getOperand(1).getOpcode() == ISD::BSWAP && 10840 N->getOperand(1).getNode()->hasOneUse() && 10841 (N->getOperand(1).getValueType() == MVT::i32 || 10842 N->getOperand(1).getValueType() == MVT::i16 || 10843 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && 10844 N->getOperand(1).getValueType() == MVT::i64))) { 10845 SDValue BSwapOp = N->getOperand(1).getOperand(0); 10846 // Do an any-extend to 32-bits if this is a half-word input. 10847 if (BSwapOp.getValueType() == MVT::i16) 10848 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 10849 10850 SDValue Ops[] = { 10851 N->getOperand(0), BSwapOp, N->getOperand(2), 10852 DAG.getValueType(N->getOperand(1).getValueType()) 10853 }; 10854 return 10855 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 10856 Ops, cast<StoreSDNode>(N)->getMemoryVT(), 10857 cast<StoreSDNode>(N)->getMemOperand()); 10858 } 10859 10860 // For little endian, VSX stores require generating xxswapd/lxvd2x. 10861 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. 10862 EVT VT = N->getOperand(1).getValueType(); 10863 if (VT.isSimple()) { 10864 MVT StoreVT = VT.getSimpleVT(); 10865 if (Subtarget.needsSwapsForVSXMemOps() && 10866 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || 10867 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) 10868 return expandVSXStoreForLE(N, DCI); 10869 } 10870 break; 10871 } 10872 case ISD::LOAD: { 10873 LoadSDNode *LD = cast<LoadSDNode>(N); 10874 EVT VT = LD->getValueType(0); 10875 10876 // For little endian, VSX loads require generating lxvd2x/xxswapd. 10877 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. 10878 if (VT.isSimple()) { 10879 MVT LoadVT = VT.getSimpleVT(); 10880 if (Subtarget.needsSwapsForVSXMemOps() && 10881 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || 10882 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) 10883 return expandVSXLoadForLE(N, DCI); 10884 } 10885 10886 // We sometimes end up with a 64-bit integer load, from which we extract 10887 // two single-precision floating-point numbers. This happens with 10888 // std::complex<float>, and other similar structures, because of the way we 10889 // canonicalize structure copies. However, if we lack direct moves, 10890 // then the final bitcasts from the extracted integer values to the 10891 // floating-point numbers turn into store/load pairs. Even with direct moves, 10892 // just loading the two floating-point numbers is likely better. 10893 auto ReplaceTwoFloatLoad = [&]() { 10894 if (VT != MVT::i64) 10895 return false; 10896 10897 if (LD->getExtensionType() != ISD::NON_EXTLOAD || 10898 LD->isVolatile()) 10899 return false; 10900 10901 // We're looking for a sequence like this: 10902 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64 10903 // t16: i64 = srl t13, Constant:i32<32> 10904 // t17: i32 = truncate t16 10905 // t18: f32 = bitcast t17 10906 // t19: i32 = truncate t13 10907 // t20: f32 = bitcast t19 10908 10909 if (!LD->hasNUsesOfValue(2, 0)) 10910 return false; 10911 10912 auto UI = LD->use_begin(); 10913 while (UI.getUse().getResNo() != 0) ++UI; 10914 SDNode *Trunc = *UI++; 10915 while (UI.getUse().getResNo() != 0) ++UI; 10916 SDNode *RightShift = *UI; 10917 if (Trunc->getOpcode() != ISD::TRUNCATE) 10918 std::swap(Trunc, RightShift); 10919 10920 if (Trunc->getOpcode() != ISD::TRUNCATE || 10921 Trunc->getValueType(0) != MVT::i32 || 10922 !Trunc->hasOneUse()) 10923 return false; 10924 if (RightShift->getOpcode() != ISD::SRL || 10925 !isa<ConstantSDNode>(RightShift->getOperand(1)) || 10926 RightShift->getConstantOperandVal(1) != 32 || 10927 !RightShift->hasOneUse()) 10928 return false; 10929 10930 SDNode *Trunc2 = *RightShift->use_begin(); 10931 if (Trunc2->getOpcode() != ISD::TRUNCATE || 10932 Trunc2->getValueType(0) != MVT::i32 || 10933 !Trunc2->hasOneUse()) 10934 return false; 10935 10936 SDNode *Bitcast = *Trunc->use_begin(); 10937 SDNode *Bitcast2 = *Trunc2->use_begin(); 10938 10939 if (Bitcast->getOpcode() != ISD::BITCAST || 10940 Bitcast->getValueType(0) != MVT::f32) 10941 return false; 10942 if (Bitcast2->getOpcode() != ISD::BITCAST || 10943 Bitcast2->getValueType(0) != MVT::f32) 10944 return false; 10945 10946 if (Subtarget.isLittleEndian()) 10947 std::swap(Bitcast, Bitcast2); 10948 10949 // Bitcast has the second float (in memory-layout order) and Bitcast2 10950 // has the first one. 10951 10952 SDValue BasePtr = LD->getBasePtr(); 10953 if (LD->isIndexed()) { 10954 assert(LD->getAddressingMode() == ISD::PRE_INC && 10955 "Non-pre-inc AM on PPC?"); 10956 BasePtr = 10957 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 10958 LD->getOffset()); 10959 } 10960 10961 auto MMOFlags = 10962 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile; 10963 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr, 10964 LD->getPointerInfo(), LD->getAlignment(), 10965 MMOFlags, LD->getAAInfo()); 10966 SDValue AddPtr = 10967 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), 10968 BasePtr, DAG.getIntPtrConstant(4, dl)); 10969 SDValue FloatLoad2 = DAG.getLoad( 10970 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr, 10971 LD->getPointerInfo().getWithOffset(4), 10972 MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo()); 10973 10974 if (LD->isIndexed()) { 10975 // Note that DAGCombine should re-form any pre-increment load(s) from 10976 // what is produced here if that makes sense. 10977 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr); 10978 } 10979 10980 DCI.CombineTo(Bitcast2, FloatLoad); 10981 DCI.CombineTo(Bitcast, FloatLoad2); 10982 10983 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1), 10984 SDValue(FloatLoad2.getNode(), 1)); 10985 return true; 10986 }; 10987 10988 if (ReplaceTwoFloatLoad()) 10989 return SDValue(N, 0); 10990 10991 EVT MemVT = LD->getMemoryVT(); 10992 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); 10993 unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); 10994 Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext()); 10995 unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy); 10996 if (LD->isUnindexed() && VT.isVector() && 10997 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) && 10998 // P8 and later hardware should just use LOAD. 10999 !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || 11000 VT == MVT::v4i32 || VT == MVT::v4f32)) || 11001 (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) && 11002 LD->getAlignment() >= ScalarABIAlignment)) && 11003 LD->getAlignment() < ABIAlignment) { 11004 // This is a type-legal unaligned Altivec or QPX load. 11005 SDValue Chain = LD->getChain(); 11006 SDValue Ptr = LD->getBasePtr(); 11007 bool isLittleEndian = Subtarget.isLittleEndian(); 11008 11009 // This implements the loading of unaligned vectors as described in 11010 // the venerable Apple Velocity Engine overview. Specifically: 11011 // https://developer.apple.com/hardwaredrivers/ve/alignment.html 11012 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html 11013 // 11014 // The general idea is to expand a sequence of one or more unaligned 11015 // loads into an alignment-based permutation-control instruction (lvsl 11016 // or lvsr), a series of regular vector loads (which always truncate 11017 // their input address to an aligned address), and a series of 11018 // permutations. The results of these permutations are the requested 11019 // loaded values. The trick is that the last "extra" load is not taken 11020 // from the address you might suspect (sizeof(vector) bytes after the 11021 // last requested load), but rather sizeof(vector) - 1 bytes after the 11022 // last requested vector. The point of this is to avoid a page fault if 11023 // the base address happened to be aligned. This works because if the 11024 // base address is aligned, then adding less than a full vector length 11025 // will cause the last vector in the sequence to be (re)loaded. 11026 // Otherwise, the next vector will be fetched as you might suspect was 11027 // necessary. 11028 11029 // We might be able to reuse the permutation generation from 11030 // a different base address offset from this one by an aligned amount. 11031 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this 11032 // optimization later. 11033 Intrinsic::ID Intr, IntrLD, IntrPerm; 11034 MVT PermCntlTy, PermTy, LDTy; 11035 if (Subtarget.hasAltivec()) { 11036 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr : 11037 Intrinsic::ppc_altivec_lvsl; 11038 IntrLD = Intrinsic::ppc_altivec_lvx; 11039 IntrPerm = Intrinsic::ppc_altivec_vperm; 11040 PermCntlTy = MVT::v16i8; 11041 PermTy = MVT::v4i32; 11042 LDTy = MVT::v4i32; 11043 } else { 11044 Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld : 11045 Intrinsic::ppc_qpx_qvlpcls; 11046 IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd : 11047 Intrinsic::ppc_qpx_qvlfs; 11048 IntrPerm = Intrinsic::ppc_qpx_qvfperm; 11049 PermCntlTy = MVT::v4f64; 11050 PermTy = MVT::v4f64; 11051 LDTy = MemVT.getSimpleVT(); 11052 } 11053 11054 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy); 11055 11056 // Create the new MMO for the new base load. It is like the original MMO, 11057 // but represents an area in memory almost twice the vector size centered 11058 // on the original address. If the address is unaligned, we might start 11059 // reading up to (sizeof(vector)-1) bytes below the address of the 11060 // original unaligned load. 11061 MachineFunction &MF = DAG.getMachineFunction(); 11062 MachineMemOperand *BaseMMO = 11063 MF.getMachineMemOperand(LD->getMemOperand(), 11064 -(long)MemVT.getStoreSize()+1, 11065 2*MemVT.getStoreSize()-1); 11066 11067 // Create the new base load. 11068 SDValue LDXIntID = 11069 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout())); 11070 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; 11071 SDValue BaseLoad = 11072 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 11073 DAG.getVTList(PermTy, MVT::Other), 11074 BaseLoadOps, LDTy, BaseMMO); 11075 11076 // Note that the value of IncOffset (which is provided to the next 11077 // load's pointer info offset value, and thus used to calculate the 11078 // alignment), and the value of IncValue (which is actually used to 11079 // increment the pointer value) are different! This is because we 11080 // require the next load to appear to be aligned, even though it 11081 // is actually offset from the base pointer by a lesser amount. 11082 int IncOffset = VT.getSizeInBits() / 8; 11083 int IncValue = IncOffset; 11084 11085 // Walk (both up and down) the chain looking for another load at the real 11086 // (aligned) offset (the alignment of the other load does not matter in 11087 // this case). If found, then do not use the offset reduction trick, as 11088 // that will prevent the loads from being later combined (as they would 11089 // otherwise be duplicates). 11090 if (!findConsecutiveLoad(LD, DAG)) 11091 --IncValue; 11092 11093 SDValue Increment = 11094 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout())); 11095 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); 11096 11097 MachineMemOperand *ExtraMMO = 11098 MF.getMachineMemOperand(LD->getMemOperand(), 11099 1, 2*MemVT.getStoreSize()-1); 11100 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; 11101 SDValue ExtraLoad = 11102 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 11103 DAG.getVTList(PermTy, MVT::Other), 11104 ExtraLoadOps, LDTy, ExtraMMO); 11105 11106 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 11107 BaseLoad.getValue(1), ExtraLoad.getValue(1)); 11108 11109 // Because vperm has a big-endian bias, we must reverse the order 11110 // of the input vectors and complement the permute control vector 11111 // when generating little endian code. We have already handled the 11112 // latter by using lvsr instead of lvsl, so just reverse BaseLoad 11113 // and ExtraLoad here. 11114 SDValue Perm; 11115 if (isLittleEndian) 11116 Perm = BuildIntrinsicOp(IntrPerm, 11117 ExtraLoad, BaseLoad, PermCntl, DAG, dl); 11118 else 11119 Perm = BuildIntrinsicOp(IntrPerm, 11120 BaseLoad, ExtraLoad, PermCntl, DAG, dl); 11121 11122 if (VT != PermTy) 11123 Perm = Subtarget.hasAltivec() ? 11124 DAG.getNode(ISD::BITCAST, dl, VT, Perm) : 11125 DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX 11126 DAG.getTargetConstant(1, dl, MVT::i64)); 11127 // second argument is 1 because this rounding 11128 // is always exact. 11129 11130 // The output of the permutation is our loaded result, the TokenFactor is 11131 // our new chain. 11132 DCI.CombineTo(N, Perm, TF); 11133 return SDValue(N, 0); 11134 } 11135 } 11136 break; 11137 case ISD::INTRINSIC_WO_CHAIN: { 11138 bool isLittleEndian = Subtarget.isLittleEndian(); 11139 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 11140 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr 11141 : Intrinsic::ppc_altivec_lvsl); 11142 if ((IID == Intr || 11143 IID == Intrinsic::ppc_qpx_qvlpcld || 11144 IID == Intrinsic::ppc_qpx_qvlpcls) && 11145 N->getOperand(1)->getOpcode() == ISD::ADD) { 11146 SDValue Add = N->getOperand(1); 11147 11148 int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ? 11149 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */; 11150 11151 if (DAG.MaskedValueIsZero(Add->getOperand(1), 11152 APInt::getAllOnesValue(Bits /* alignment */) 11153 .zext(Add.getScalarValueSizeInBits()))) { 11154 SDNode *BasePtr = Add->getOperand(0).getNode(); 11155 for (SDNode::use_iterator UI = BasePtr->use_begin(), 11156 UE = BasePtr->use_end(); 11157 UI != UE; ++UI) { 11158 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 11159 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) { 11160 // We've found another LVSL/LVSR, and this address is an aligned 11161 // multiple of that one. The results will be the same, so use the 11162 // one we've just found instead. 11163 11164 return SDValue(*UI, 0); 11165 } 11166 } 11167 } 11168 11169 if (isa<ConstantSDNode>(Add->getOperand(1))) { 11170 SDNode *BasePtr = Add->getOperand(0).getNode(); 11171 for (SDNode::use_iterator UI = BasePtr->use_begin(), 11172 UE = BasePtr->use_end(); UI != UE; ++UI) { 11173 if (UI->getOpcode() == ISD::ADD && 11174 isa<ConstantSDNode>(UI->getOperand(1)) && 11175 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() - 11176 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) % 11177 (1ULL << Bits) == 0) { 11178 SDNode *OtherAdd = *UI; 11179 for (SDNode::use_iterator VI = OtherAdd->use_begin(), 11180 VE = OtherAdd->use_end(); VI != VE; ++VI) { 11181 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 11182 cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) { 11183 return SDValue(*VI, 0); 11184 } 11185 } 11186 } 11187 } 11188 } 11189 } 11190 } 11191 11192 break; 11193 case ISD::INTRINSIC_W_CHAIN: { 11194 // For little endian, VSX loads require generating lxvd2x/xxswapd. 11195 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. 11196 if (Subtarget.needsSwapsForVSXMemOps()) { 11197 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 11198 default: 11199 break; 11200 case Intrinsic::ppc_vsx_lxvw4x: 11201 case Intrinsic::ppc_vsx_lxvd2x: 11202 return expandVSXLoadForLE(N, DCI); 11203 } 11204 } 11205 break; 11206 } 11207 case ISD::INTRINSIC_VOID: { 11208 // For little endian, VSX stores require generating xxswapd/stxvd2x. 11209 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. 11210 if (Subtarget.needsSwapsForVSXMemOps()) { 11211 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 11212 default: 11213 break; 11214 case Intrinsic::ppc_vsx_stxvw4x: 11215 case Intrinsic::ppc_vsx_stxvd2x: 11216 return expandVSXStoreForLE(N, DCI); 11217 } 11218 } 11219 break; 11220 } 11221 case ISD::BSWAP: 11222 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 11223 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 11224 N->getOperand(0).hasOneUse() && 11225 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || 11226 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && 11227 N->getValueType(0) == MVT::i64))) { 11228 SDValue Load = N->getOperand(0); 11229 LoadSDNode *LD = cast<LoadSDNode>(Load); 11230 // Create the byte-swapping load. 11231 SDValue Ops[] = { 11232 LD->getChain(), // Chain 11233 LD->getBasePtr(), // Ptr 11234 DAG.getValueType(N->getValueType(0)) // VT 11235 }; 11236 SDValue BSLoad = 11237 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 11238 DAG.getVTList(N->getValueType(0) == MVT::i64 ? 11239 MVT::i64 : MVT::i32, MVT::Other), 11240 Ops, LD->getMemoryVT(), LD->getMemOperand()); 11241 11242 // If this is an i16 load, insert the truncate. 11243 SDValue ResVal = BSLoad; 11244 if (N->getValueType(0) == MVT::i16) 11245 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 11246 11247 // First, combine the bswap away. This makes the value produced by the 11248 // load dead. 11249 DCI.CombineTo(N, ResVal); 11250 11251 // Next, combine the load away, we give it a bogus result value but a real 11252 // chain result. The result value is dead because the bswap is dead. 11253 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 11254 11255 // Return N so it doesn't get rechecked! 11256 return SDValue(N, 0); 11257 } 11258 11259 break; 11260 case PPCISD::VCMP: { 11261 // If a VCMPo node already exists with exactly the same operands as this 11262 // node, use its result instead of this node (VCMPo computes both a CR6 and 11263 // a normal output). 11264 // 11265 if (!N->getOperand(0).hasOneUse() && 11266 !N->getOperand(1).hasOneUse() && 11267 !N->getOperand(2).hasOneUse()) { 11268 11269 // Scan all of the users of the LHS, looking for VCMPo's that match. 11270 SDNode *VCMPoNode = nullptr; 11271 11272 SDNode *LHSN = N->getOperand(0).getNode(); 11273 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 11274 UI != E; ++UI) 11275 if (UI->getOpcode() == PPCISD::VCMPo && 11276 UI->getOperand(1) == N->getOperand(1) && 11277 UI->getOperand(2) == N->getOperand(2) && 11278 UI->getOperand(0) == N->getOperand(0)) { 11279 VCMPoNode = *UI; 11280 break; 11281 } 11282 11283 // If there is no VCMPo node, or if the flag value has a single use, don't 11284 // transform this. 11285 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 11286 break; 11287 11288 // Look at the (necessarily single) use of the flag value. If it has a 11289 // chain, this transformation is more complex. Note that multiple things 11290 // could use the value result, which we should ignore. 11291 SDNode *FlagUser = nullptr; 11292 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 11293 FlagUser == nullptr; ++UI) { 11294 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 11295 SDNode *User = *UI; 11296 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 11297 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 11298 FlagUser = User; 11299 break; 11300 } 11301 } 11302 } 11303 11304 // If the user is a MFOCRF instruction, we know this is safe. 11305 // Otherwise we give up for right now. 11306 if (FlagUser->getOpcode() == PPCISD::MFOCRF) 11307 return SDValue(VCMPoNode, 0); 11308 } 11309 break; 11310 } 11311 case ISD::BRCOND: { 11312 SDValue Cond = N->getOperand(1); 11313 SDValue Target = N->getOperand(2); 11314 11315 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && 11316 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() == 11317 Intrinsic::ppc_is_decremented_ctr_nonzero) { 11318 11319 // We now need to make the intrinsic dead (it cannot be instruction 11320 // selected). 11321 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); 11322 assert(Cond.getNode()->hasOneUse() && 11323 "Counter decrement has more than one use"); 11324 11325 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, 11326 N->getOperand(0), Target); 11327 } 11328 } 11329 break; 11330 case ISD::BR_CC: { 11331 // If this is a branch on an altivec predicate comparison, lower this so 11332 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This 11333 // lowering is done pre-legalize, because the legalizer lowers the predicate 11334 // compare down to code that is difficult to reassemble. 11335 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 11336 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 11337 11338 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero 11339 // value. If so, pass-through the AND to get to the intrinsic. 11340 if (LHS.getOpcode() == ISD::AND && 11341 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && 11342 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == 11343 Intrinsic::ppc_is_decremented_ctr_nonzero && 11344 isa<ConstantSDNode>(LHS.getOperand(1)) && 11345 !isNullConstant(LHS.getOperand(1))) 11346 LHS = LHS.getOperand(0); 11347 11348 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && 11349 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 11350 Intrinsic::ppc_is_decremented_ctr_nonzero && 11351 isa<ConstantSDNode>(RHS)) { 11352 assert((CC == ISD::SETEQ || CC == ISD::SETNE) && 11353 "Counter decrement comparison is not EQ or NE"); 11354 11355 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 11356 bool isBDNZ = (CC == ISD::SETEQ && Val) || 11357 (CC == ISD::SETNE && !Val); 11358 11359 // We now need to make the intrinsic dead (it cannot be instruction 11360 // selected). 11361 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); 11362 assert(LHS.getNode()->hasOneUse() && 11363 "Counter decrement has more than one use"); 11364 11365 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, 11366 N->getOperand(0), N->getOperand(4)); 11367 } 11368 11369 int CompareOpc; 11370 bool isDot; 11371 11372 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 11373 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 11374 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { 11375 assert(isDot && "Can't compare against a vector result!"); 11376 11377 // If this is a comparison against something other than 0/1, then we know 11378 // that the condition is never/always true. 11379 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 11380 if (Val != 0 && Val != 1) { 11381 if (CC == ISD::SETEQ) // Cond never true, remove branch. 11382 return N->getOperand(0); 11383 // Always !=, turn it into an unconditional branch. 11384 return DAG.getNode(ISD::BR, dl, MVT::Other, 11385 N->getOperand(0), N->getOperand(4)); 11386 } 11387 11388 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 11389 11390 // Create the PPCISD altivec 'dot' comparison node. 11391 SDValue Ops[] = { 11392 LHS.getOperand(2), // LHS of compare 11393 LHS.getOperand(3), // RHS of compare 11394 DAG.getConstant(CompareOpc, dl, MVT::i32) 11395 }; 11396 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; 11397 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 11398 11399 // Unpack the result based on how the target uses it. 11400 PPC::Predicate CompOpc; 11401 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 11402 default: // Can't happen, don't crash on invalid number though. 11403 case 0: // Branch on the value of the EQ bit of CR6. 11404 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 11405 break; 11406 case 1: // Branch on the inverted value of the EQ bit of CR6. 11407 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 11408 break; 11409 case 2: // Branch on the value of the LT bit of CR6. 11410 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 11411 break; 11412 case 3: // Branch on the inverted value of the LT bit of CR6. 11413 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 11414 break; 11415 } 11416 11417 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 11418 DAG.getConstant(CompOpc, dl, MVT::i32), 11419 DAG.getRegister(PPC::CR6, MVT::i32), 11420 N->getOperand(4), CompNode.getValue(1)); 11421 } 11422 break; 11423 } 11424 case ISD::BUILD_VECTOR: 11425 return DAGCombineBuildVector(N, DCI); 11426 } 11427 11428 return SDValue(); 11429 } 11430 11431 SDValue 11432 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, 11433 SelectionDAG &DAG, 11434 std::vector<SDNode *> *Created) const { 11435 // fold (sdiv X, pow2) 11436 EVT VT = N->getValueType(0); 11437 if (VT == MVT::i64 && !Subtarget.isPPC64()) 11438 return SDValue(); 11439 if ((VT != MVT::i32 && VT != MVT::i64) || 11440 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) 11441 return SDValue(); 11442 11443 SDLoc DL(N); 11444 SDValue N0 = N->getOperand(0); 11445 11446 bool IsNegPow2 = (-Divisor).isPowerOf2(); 11447 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros(); 11448 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT); 11449 11450 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); 11451 if (Created) 11452 Created->push_back(Op.getNode()); 11453 11454 if (IsNegPow2) { 11455 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); 11456 if (Created) 11457 Created->push_back(Op.getNode()); 11458 } 11459 11460 return Op; 11461 } 11462 11463 //===----------------------------------------------------------------------===// 11464 // Inline Assembly Support 11465 //===----------------------------------------------------------------------===// 11466 11467 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 11468 APInt &KnownZero, 11469 APInt &KnownOne, 11470 const SelectionDAG &DAG, 11471 unsigned Depth) const { 11472 KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); 11473 switch (Op.getOpcode()) { 11474 default: break; 11475 case PPCISD::LBRX: { 11476 // lhbrx is known to have the top bits cleared out. 11477 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 11478 KnownZero = 0xFFFF0000; 11479 break; 11480 } 11481 case ISD::INTRINSIC_WO_CHAIN: { 11482 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 11483 default: break; 11484 case Intrinsic::ppc_altivec_vcmpbfp_p: 11485 case Intrinsic::ppc_altivec_vcmpeqfp_p: 11486 case Intrinsic::ppc_altivec_vcmpequb_p: 11487 case Intrinsic::ppc_altivec_vcmpequh_p: 11488 case Intrinsic::ppc_altivec_vcmpequw_p: 11489 case Intrinsic::ppc_altivec_vcmpequd_p: 11490 case Intrinsic::ppc_altivec_vcmpgefp_p: 11491 case Intrinsic::ppc_altivec_vcmpgtfp_p: 11492 case Intrinsic::ppc_altivec_vcmpgtsb_p: 11493 case Intrinsic::ppc_altivec_vcmpgtsh_p: 11494 case Intrinsic::ppc_altivec_vcmpgtsw_p: 11495 case Intrinsic::ppc_altivec_vcmpgtsd_p: 11496 case Intrinsic::ppc_altivec_vcmpgtub_p: 11497 case Intrinsic::ppc_altivec_vcmpgtuh_p: 11498 case Intrinsic::ppc_altivec_vcmpgtuw_p: 11499 case Intrinsic::ppc_altivec_vcmpgtud_p: 11500 KnownZero = ~1U; // All bits but the low one are known to be zero. 11501 break; 11502 } 11503 } 11504 } 11505 } 11506 11507 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { 11508 switch (Subtarget.getDarwinDirective()) { 11509 default: break; 11510 case PPC::DIR_970: 11511 case PPC::DIR_PWR4: 11512 case PPC::DIR_PWR5: 11513 case PPC::DIR_PWR5X: 11514 case PPC::DIR_PWR6: 11515 case PPC::DIR_PWR6X: 11516 case PPC::DIR_PWR7: 11517 case PPC::DIR_PWR8: 11518 case PPC::DIR_PWR9: { 11519 if (!ML) 11520 break; 11521 11522 const PPCInstrInfo *TII = Subtarget.getInstrInfo(); 11523 11524 // For small loops (between 5 and 8 instructions), align to a 32-byte 11525 // boundary so that the entire loop fits in one instruction-cache line. 11526 uint64_t LoopSize = 0; 11527 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) 11528 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) { 11529 LoopSize += TII->getInstSizeInBytes(*J); 11530 if (LoopSize > 32) 11531 break; 11532 } 11533 11534 if (LoopSize > 16 && LoopSize <= 32) 11535 return 5; 11536 11537 break; 11538 } 11539 } 11540 11541 return TargetLowering::getPrefLoopAlignment(ML); 11542 } 11543 11544 /// getConstraintType - Given a constraint, return the type of 11545 /// constraint it is for this target. 11546 PPCTargetLowering::ConstraintType 11547 PPCTargetLowering::getConstraintType(StringRef Constraint) const { 11548 if (Constraint.size() == 1) { 11549 switch (Constraint[0]) { 11550 default: break; 11551 case 'b': 11552 case 'r': 11553 case 'f': 11554 case 'd': 11555 case 'v': 11556 case 'y': 11557 return C_RegisterClass; 11558 case 'Z': 11559 // FIXME: While Z does indicate a memory constraint, it specifically 11560 // indicates an r+r address (used in conjunction with the 'y' modifier 11561 // in the replacement string). Currently, we're forcing the base 11562 // register to be r0 in the asm printer (which is interpreted as zero) 11563 // and forming the complete address in the second register. This is 11564 // suboptimal. 11565 return C_Memory; 11566 } 11567 } else if (Constraint == "wc") { // individual CR bits. 11568 return C_RegisterClass; 11569 } else if (Constraint == "wa" || Constraint == "wd" || 11570 Constraint == "wf" || Constraint == "ws") { 11571 return C_RegisterClass; // VSX registers. 11572 } 11573 return TargetLowering::getConstraintType(Constraint); 11574 } 11575 11576 /// Examine constraint type and operand type and determine a weight value. 11577 /// This object must already have been set up with the operand type 11578 /// and the current alternative constraint selected. 11579 TargetLowering::ConstraintWeight 11580 PPCTargetLowering::getSingleConstraintMatchWeight( 11581 AsmOperandInfo &info, const char *constraint) const { 11582 ConstraintWeight weight = CW_Invalid; 11583 Value *CallOperandVal = info.CallOperandVal; 11584 // If we don't have a value, we can't do a match, 11585 // but allow it at the lowest weight. 11586 if (!CallOperandVal) 11587 return CW_Default; 11588 Type *type = CallOperandVal->getType(); 11589 11590 // Look at the constraint type. 11591 if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) 11592 return CW_Register; // an individual CR bit. 11593 else if ((StringRef(constraint) == "wa" || 11594 StringRef(constraint) == "wd" || 11595 StringRef(constraint) == "wf") && 11596 type->isVectorTy()) 11597 return CW_Register; 11598 else if (StringRef(constraint) == "ws" && type->isDoubleTy()) 11599 return CW_Register; 11600 11601 switch (*constraint) { 11602 default: 11603 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 11604 break; 11605 case 'b': 11606 if (type->isIntegerTy()) 11607 weight = CW_Register; 11608 break; 11609 case 'f': 11610 if (type->isFloatTy()) 11611 weight = CW_Register; 11612 break; 11613 case 'd': 11614 if (type->isDoubleTy()) 11615 weight = CW_Register; 11616 break; 11617 case 'v': 11618 if (type->isVectorTy()) 11619 weight = CW_Register; 11620 break; 11621 case 'y': 11622 weight = CW_Register; 11623 break; 11624 case 'Z': 11625 weight = CW_Memory; 11626 break; 11627 } 11628 return weight; 11629 } 11630 11631 std::pair<unsigned, const TargetRegisterClass *> 11632 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 11633 StringRef Constraint, 11634 MVT VT) const { 11635 if (Constraint.size() == 1) { 11636 // GCC RS6000 Constraint Letters 11637 switch (Constraint[0]) { 11638 case 'b': // R1-R31 11639 if (VT == MVT::i64 && Subtarget.isPPC64()) 11640 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); 11641 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); 11642 case 'r': // R0-R31 11643 if (VT == MVT::i64 && Subtarget.isPPC64()) 11644 return std::make_pair(0U, &PPC::G8RCRegClass); 11645 return std::make_pair(0U, &PPC::GPRCRegClass); 11646 // 'd' and 'f' constraints are both defined to be "the floating point 11647 // registers", where one is for 32-bit and the other for 64-bit. We don't 11648 // really care overly much here so just give them all the same reg classes. 11649 case 'd': 11650 case 'f': 11651 if (VT == MVT::f32 || VT == MVT::i32) 11652 return std::make_pair(0U, &PPC::F4RCRegClass); 11653 if (VT == MVT::f64 || VT == MVT::i64) 11654 return std::make_pair(0U, &PPC::F8RCRegClass); 11655 if (VT == MVT::v4f64 && Subtarget.hasQPX()) 11656 return std::make_pair(0U, &PPC::QFRCRegClass); 11657 if (VT == MVT::v4f32 && Subtarget.hasQPX()) 11658 return std::make_pair(0U, &PPC::QSRCRegClass); 11659 break; 11660 case 'v': 11661 if (VT == MVT::v4f64 && Subtarget.hasQPX()) 11662 return std::make_pair(0U, &PPC::QFRCRegClass); 11663 if (VT == MVT::v4f32 && Subtarget.hasQPX()) 11664 return std::make_pair(0U, &PPC::QSRCRegClass); 11665 if (Subtarget.hasAltivec()) 11666 return std::make_pair(0U, &PPC::VRRCRegClass); 11667 case 'y': // crrc 11668 return std::make_pair(0U, &PPC::CRRCRegClass); 11669 } 11670 } else if (Constraint == "wc" && Subtarget.useCRBits()) { 11671 // An individual CR bit. 11672 return std::make_pair(0U, &PPC::CRBITRCRegClass); 11673 } else if ((Constraint == "wa" || Constraint == "wd" || 11674 Constraint == "wf") && Subtarget.hasVSX()) { 11675 return std::make_pair(0U, &PPC::VSRCRegClass); 11676 } else if (Constraint == "ws" && Subtarget.hasVSX()) { 11677 if (VT == MVT::f32 && Subtarget.hasP8Vector()) 11678 return std::make_pair(0U, &PPC::VSSRCRegClass); 11679 else 11680 return std::make_pair(0U, &PPC::VSFRCRegClass); 11681 } 11682 11683 std::pair<unsigned, const TargetRegisterClass *> R = 11684 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 11685 11686 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers 11687 // (which we call X[0-9]+). If a 64-bit value has been requested, and a 11688 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent 11689 // register. 11690 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use 11691 // the AsmName field from *RegisterInfo.td, then this would not be necessary. 11692 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && 11693 PPC::GPRCRegClass.contains(R.first)) 11694 return std::make_pair(TRI->getMatchingSuperReg(R.first, 11695 PPC::sub_32, &PPC::G8RCRegClass), 11696 &PPC::G8RCRegClass); 11697 11698 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. 11699 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) { 11700 R.first = PPC::CR0; 11701 R.second = &PPC::CRRCRegClass; 11702 } 11703 11704 return R; 11705 } 11706 11707 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 11708 /// vector. If it is invalid, don't add anything to Ops. 11709 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 11710 std::string &Constraint, 11711 std::vector<SDValue>&Ops, 11712 SelectionDAG &DAG) const { 11713 SDValue Result; 11714 11715 // Only support length 1 constraints. 11716 if (Constraint.length() > 1) return; 11717 11718 char Letter = Constraint[0]; 11719 switch (Letter) { 11720 default: break; 11721 case 'I': 11722 case 'J': 11723 case 'K': 11724 case 'L': 11725 case 'M': 11726 case 'N': 11727 case 'O': 11728 case 'P': { 11729 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 11730 if (!CST) return; // Must be an immediate to match. 11731 SDLoc dl(Op); 11732 int64_t Value = CST->getSExtValue(); 11733 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative 11734 // numbers are printed as such. 11735 switch (Letter) { 11736 default: llvm_unreachable("Unknown constraint letter!"); 11737 case 'I': // "I" is a signed 16-bit constant. 11738 if (isInt<16>(Value)) 11739 Result = DAG.getTargetConstant(Value, dl, TCVT); 11740 break; 11741 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 11742 if (isShiftedUInt<16, 16>(Value)) 11743 Result = DAG.getTargetConstant(Value, dl, TCVT); 11744 break; 11745 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 11746 if (isShiftedInt<16, 16>(Value)) 11747 Result = DAG.getTargetConstant(Value, dl, TCVT); 11748 break; 11749 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 11750 if (isUInt<16>(Value)) 11751 Result = DAG.getTargetConstant(Value, dl, TCVT); 11752 break; 11753 case 'M': // "M" is a constant that is greater than 31. 11754 if (Value > 31) 11755 Result = DAG.getTargetConstant(Value, dl, TCVT); 11756 break; 11757 case 'N': // "N" is a positive constant that is an exact power of two. 11758 if (Value > 0 && isPowerOf2_64(Value)) 11759 Result = DAG.getTargetConstant(Value, dl, TCVT); 11760 break; 11761 case 'O': // "O" is the constant zero. 11762 if (Value == 0) 11763 Result = DAG.getTargetConstant(Value, dl, TCVT); 11764 break; 11765 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 11766 if (isInt<16>(-Value)) 11767 Result = DAG.getTargetConstant(Value, dl, TCVT); 11768 break; 11769 } 11770 break; 11771 } 11772 } 11773 11774 if (Result.getNode()) { 11775 Ops.push_back(Result); 11776 return; 11777 } 11778 11779 // Handle standard constraint letters. 11780 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 11781 } 11782 11783 // isLegalAddressingMode - Return true if the addressing mode represented 11784 // by AM is legal for this target, for a load/store of the specified type. 11785 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, 11786 const AddrMode &AM, Type *Ty, 11787 unsigned AS) const { 11788 // PPC does not allow r+i addressing modes for vectors! 11789 if (Ty->isVectorTy() && AM.BaseOffs != 0) 11790 return false; 11791 11792 // PPC allows a sign-extended 16-bit immediate field. 11793 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 11794 return false; 11795 11796 // No global is ever allowed as a base. 11797 if (AM.BaseGV) 11798 return false; 11799 11800 // PPC only support r+r, 11801 switch (AM.Scale) { 11802 case 0: // "r+i" or just "i", depending on HasBaseReg. 11803 break; 11804 case 1: 11805 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 11806 return false; 11807 // Otherwise we have r+r or r+i. 11808 break; 11809 case 2: 11810 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 11811 return false; 11812 // Allow 2*r as r+r. 11813 break; 11814 default: 11815 // No other scales are supported. 11816 return false; 11817 } 11818 11819 return true; 11820 } 11821 11822 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 11823 SelectionDAG &DAG) const { 11824 MachineFunction &MF = DAG.getMachineFunction(); 11825 MachineFrameInfo &MFI = MF.getFrameInfo(); 11826 MFI.setReturnAddressIsTaken(true); 11827 11828 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 11829 return SDValue(); 11830 11831 SDLoc dl(Op); 11832 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 11833 11834 // Make sure the function does not optimize away the store of the RA to 11835 // the stack. 11836 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 11837 FuncInfo->setLRStoreRequired(); 11838 bool isPPC64 = Subtarget.isPPC64(); 11839 auto PtrVT = getPointerTy(MF.getDataLayout()); 11840 11841 if (Depth > 0) { 11842 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 11843 SDValue Offset = 11844 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl, 11845 isPPC64 ? MVT::i64 : MVT::i32); 11846 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 11847 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), 11848 MachinePointerInfo()); 11849 } 11850 11851 // Just load the return address off the stack. 11852 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 11853 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, 11854 MachinePointerInfo()); 11855 } 11856 11857 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 11858 SelectionDAG &DAG) const { 11859 SDLoc dl(Op); 11860 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 11861 11862 MachineFunction &MF = DAG.getMachineFunction(); 11863 MachineFrameInfo &MFI = MF.getFrameInfo(); 11864 MFI.setFrameAddressIsTaken(true); 11865 11866 EVT PtrVT = getPointerTy(MF.getDataLayout()); 11867 bool isPPC64 = PtrVT == MVT::i64; 11868 11869 // Naked functions never have a frame pointer, and so we use r1. For all 11870 // other functions, this decision must be delayed until during PEI. 11871 unsigned FrameReg; 11872 if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) 11873 FrameReg = isPPC64 ? PPC::X1 : PPC::R1; 11874 else 11875 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; 11876 11877 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 11878 PtrVT); 11879 while (Depth--) 11880 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 11881 FrameAddr, MachinePointerInfo()); 11882 return FrameAddr; 11883 } 11884 11885 // FIXME? Maybe this could be a TableGen attribute on some registers and 11886 // this table could be generated automatically from RegInfo. 11887 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, 11888 SelectionDAG &DAG) const { 11889 bool isPPC64 = Subtarget.isPPC64(); 11890 bool isDarwinABI = Subtarget.isDarwinABI(); 11891 11892 if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || 11893 (!isPPC64 && VT != MVT::i32)) 11894 report_fatal_error("Invalid register global variable type"); 11895 11896 bool is64Bit = isPPC64 && VT == MVT::i64; 11897 unsigned Reg = StringSwitch<unsigned>(RegName) 11898 .Case("r1", is64Bit ? PPC::X1 : PPC::R1) 11899 .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2) 11900 .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : 11901 (is64Bit ? PPC::X13 : PPC::R13)) 11902 .Default(0); 11903 11904 if (Reg) 11905 return Reg; 11906 report_fatal_error("Invalid register name global variable"); 11907 } 11908 11909 bool 11910 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 11911 // The PowerPC target isn't yet aware of offsets. 11912 return false; 11913 } 11914 11915 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 11916 const CallInst &I, 11917 unsigned Intrinsic) const { 11918 11919 switch (Intrinsic) { 11920 case Intrinsic::ppc_qpx_qvlfd: 11921 case Intrinsic::ppc_qpx_qvlfs: 11922 case Intrinsic::ppc_qpx_qvlfcd: 11923 case Intrinsic::ppc_qpx_qvlfcs: 11924 case Intrinsic::ppc_qpx_qvlfiwa: 11925 case Intrinsic::ppc_qpx_qvlfiwz: 11926 case Intrinsic::ppc_altivec_lvx: 11927 case Intrinsic::ppc_altivec_lvxl: 11928 case Intrinsic::ppc_altivec_lvebx: 11929 case Intrinsic::ppc_altivec_lvehx: 11930 case Intrinsic::ppc_altivec_lvewx: 11931 case Intrinsic::ppc_vsx_lxvd2x: 11932 case Intrinsic::ppc_vsx_lxvw4x: { 11933 EVT VT; 11934 switch (Intrinsic) { 11935 case Intrinsic::ppc_altivec_lvebx: 11936 VT = MVT::i8; 11937 break; 11938 case Intrinsic::ppc_altivec_lvehx: 11939 VT = MVT::i16; 11940 break; 11941 case Intrinsic::ppc_altivec_lvewx: 11942 VT = MVT::i32; 11943 break; 11944 case Intrinsic::ppc_vsx_lxvd2x: 11945 VT = MVT::v2f64; 11946 break; 11947 case Intrinsic::ppc_qpx_qvlfd: 11948 VT = MVT::v4f64; 11949 break; 11950 case Intrinsic::ppc_qpx_qvlfs: 11951 VT = MVT::v4f32; 11952 break; 11953 case Intrinsic::ppc_qpx_qvlfcd: 11954 VT = MVT::v2f64; 11955 break; 11956 case Intrinsic::ppc_qpx_qvlfcs: 11957 VT = MVT::v2f32; 11958 break; 11959 default: 11960 VT = MVT::v4i32; 11961 break; 11962 } 11963 11964 Info.opc = ISD::INTRINSIC_W_CHAIN; 11965 Info.memVT = VT; 11966 Info.ptrVal = I.getArgOperand(0); 11967 Info.offset = -VT.getStoreSize()+1; 11968 Info.size = 2*VT.getStoreSize()-1; 11969 Info.align = 1; 11970 Info.vol = false; 11971 Info.readMem = true; 11972 Info.writeMem = false; 11973 return true; 11974 } 11975 case Intrinsic::ppc_qpx_qvlfda: 11976 case Intrinsic::ppc_qpx_qvlfsa: 11977 case Intrinsic::ppc_qpx_qvlfcda: 11978 case Intrinsic::ppc_qpx_qvlfcsa: 11979 case Intrinsic::ppc_qpx_qvlfiwaa: 11980 case Intrinsic::ppc_qpx_qvlfiwza: { 11981 EVT VT; 11982 switch (Intrinsic) { 11983 case Intrinsic::ppc_qpx_qvlfda: 11984 VT = MVT::v4f64; 11985 break; 11986 case Intrinsic::ppc_qpx_qvlfsa: 11987 VT = MVT::v4f32; 11988 break; 11989 case Intrinsic::ppc_qpx_qvlfcda: 11990 VT = MVT::v2f64; 11991 break; 11992 case Intrinsic::ppc_qpx_qvlfcsa: 11993 VT = MVT::v2f32; 11994 break; 11995 default: 11996 VT = MVT::v4i32; 11997 break; 11998 } 11999 12000 Info.opc = ISD::INTRINSIC_W_CHAIN; 12001 Info.memVT = VT; 12002 Info.ptrVal = I.getArgOperand(0); 12003 Info.offset = 0; 12004 Info.size = VT.getStoreSize(); 12005 Info.align = 1; 12006 Info.vol = false; 12007 Info.readMem = true; 12008 Info.writeMem = false; 12009 return true; 12010 } 12011 case Intrinsic::ppc_qpx_qvstfd: 12012 case Intrinsic::ppc_qpx_qvstfs: 12013 case Intrinsic::ppc_qpx_qvstfcd: 12014 case Intrinsic::ppc_qpx_qvstfcs: 12015 case Intrinsic::ppc_qpx_qvstfiw: 12016 case Intrinsic::ppc_altivec_stvx: 12017 case Intrinsic::ppc_altivec_stvxl: 12018 case Intrinsic::ppc_altivec_stvebx: 12019 case Intrinsic::ppc_altivec_stvehx: 12020 case Intrinsic::ppc_altivec_stvewx: 12021 case Intrinsic::ppc_vsx_stxvd2x: 12022 case Intrinsic::ppc_vsx_stxvw4x: { 12023 EVT VT; 12024 switch (Intrinsic) { 12025 case Intrinsic::ppc_altivec_stvebx: 12026 VT = MVT::i8; 12027 break; 12028 case Intrinsic::ppc_altivec_stvehx: 12029 VT = MVT::i16; 12030 break; 12031 case Intrinsic::ppc_altivec_stvewx: 12032 VT = MVT::i32; 12033 break; 12034 case Intrinsic::ppc_vsx_stxvd2x: 12035 VT = MVT::v2f64; 12036 break; 12037 case Intrinsic::ppc_qpx_qvstfd: 12038 VT = MVT::v4f64; 12039 break; 12040 case Intrinsic::ppc_qpx_qvstfs: 12041 VT = MVT::v4f32; 12042 break; 12043 case Intrinsic::ppc_qpx_qvstfcd: 12044 VT = MVT::v2f64; 12045 break; 12046 case Intrinsic::ppc_qpx_qvstfcs: 12047 VT = MVT::v2f32; 12048 break; 12049 default: 12050 VT = MVT::v4i32; 12051 break; 12052 } 12053 12054 Info.opc = ISD::INTRINSIC_VOID; 12055 Info.memVT = VT; 12056 Info.ptrVal = I.getArgOperand(1); 12057 Info.offset = -VT.getStoreSize()+1; 12058 Info.size = 2*VT.getStoreSize()-1; 12059 Info.align = 1; 12060 Info.vol = false; 12061 Info.readMem = false; 12062 Info.writeMem = true; 12063 return true; 12064 } 12065 case Intrinsic::ppc_qpx_qvstfda: 12066 case Intrinsic::ppc_qpx_qvstfsa: 12067 case Intrinsic::ppc_qpx_qvstfcda: 12068 case Intrinsic::ppc_qpx_qvstfcsa: 12069 case Intrinsic::ppc_qpx_qvstfiwa: { 12070 EVT VT; 12071 switch (Intrinsic) { 12072 case Intrinsic::ppc_qpx_qvstfda: 12073 VT = MVT::v4f64; 12074 break; 12075 case Intrinsic::ppc_qpx_qvstfsa: 12076 VT = MVT::v4f32; 12077 break; 12078 case Intrinsic::ppc_qpx_qvstfcda: 12079 VT = MVT::v2f64; 12080 break; 12081 case Intrinsic::ppc_qpx_qvstfcsa: 12082 VT = MVT::v2f32; 12083 break; 12084 default: 12085 VT = MVT::v4i32; 12086 break; 12087 } 12088 12089 Info.opc = ISD::INTRINSIC_VOID; 12090 Info.memVT = VT; 12091 Info.ptrVal = I.getArgOperand(1); 12092 Info.offset = 0; 12093 Info.size = VT.getStoreSize(); 12094 Info.align = 1; 12095 Info.vol = false; 12096 Info.readMem = false; 12097 Info.writeMem = true; 12098 return true; 12099 } 12100 default: 12101 break; 12102 } 12103 12104 return false; 12105 } 12106 12107 /// getOptimalMemOpType - Returns the target specific optimal type for load 12108 /// and store operations as a result of memset, memcpy, and memmove 12109 /// lowering. If DstAlign is zero that means it's safe to destination 12110 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 12111 /// means there isn't a need to check it against alignment requirement, 12112 /// probably because the source does not need to be loaded. If 'IsMemset' is 12113 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 12114 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 12115 /// source is constant so it does not need to be loaded. 12116 /// It returns EVT::Other if the type should be determined using generic 12117 /// target-independent logic. 12118 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, 12119 unsigned DstAlign, unsigned SrcAlign, 12120 bool IsMemset, bool ZeroMemset, 12121 bool MemcpyStrSrc, 12122 MachineFunction &MF) const { 12123 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { 12124 const Function *F = MF.getFunction(); 12125 // When expanding a memset, require at least two QPX instructions to cover 12126 // the cost of loading the value to be stored from the constant pool. 12127 if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && 12128 (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && 12129 !F->hasFnAttribute(Attribute::NoImplicitFloat)) { 12130 return MVT::v4f64; 12131 } 12132 12133 // We should use Altivec/VSX loads and stores when available. For unaligned 12134 // addresses, unaligned VSX loads are only fast starting with the P8. 12135 if (Subtarget.hasAltivec() && Size >= 16 && 12136 (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || 12137 ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) 12138 return MVT::v4i32; 12139 } 12140 12141 if (Subtarget.isPPC64()) { 12142 return MVT::i64; 12143 } 12144 12145 return MVT::i32; 12146 } 12147 12148 /// \brief Returns true if it is beneficial to convert a load of a constant 12149 /// to just the constant itself. 12150 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 12151 Type *Ty) const { 12152 assert(Ty->isIntegerTy()); 12153 12154 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 12155 return !(BitSize == 0 || BitSize > 64); 12156 } 12157 12158 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { 12159 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 12160 return false; 12161 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 12162 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 12163 return NumBits1 == 64 && NumBits2 == 32; 12164 } 12165 12166 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { 12167 if (!VT1.isInteger() || !VT2.isInteger()) 12168 return false; 12169 unsigned NumBits1 = VT1.getSizeInBits(); 12170 unsigned NumBits2 = VT2.getSizeInBits(); 12171 return NumBits1 == 64 && NumBits2 == 32; 12172 } 12173 12174 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 12175 // Generally speaking, zexts are not free, but they are free when they can be 12176 // folded with other operations. 12177 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) { 12178 EVT MemVT = LD->getMemoryVT(); 12179 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 || 12180 (Subtarget.isPPC64() && MemVT == MVT::i32)) && 12181 (LD->getExtensionType() == ISD::NON_EXTLOAD || 12182 LD->getExtensionType() == ISD::ZEXTLOAD)) 12183 return true; 12184 } 12185 12186 // FIXME: Add other cases... 12187 // - 32-bit shifts with a zext to i64 12188 // - zext after ctlz, bswap, etc. 12189 // - zext after and by a constant mask 12190 12191 return TargetLowering::isZExtFree(Val, VT2); 12192 } 12193 12194 bool PPCTargetLowering::isFPExtFree(EVT VT) const { 12195 assert(VT.isFloatingPoint()); 12196 return true; 12197 } 12198 12199 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 12200 return isInt<16>(Imm) || isUInt<16>(Imm); 12201 } 12202 12203 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { 12204 return isInt<16>(Imm) || isUInt<16>(Imm); 12205 } 12206 12207 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 12208 unsigned, 12209 unsigned, 12210 bool *Fast) const { 12211 if (DisablePPCUnaligned) 12212 return false; 12213 12214 // PowerPC supports unaligned memory access for simple non-vector types. 12215 // Although accessing unaligned addresses is not as efficient as accessing 12216 // aligned addresses, it is generally more efficient than manual expansion, 12217 // and generally only traps for software emulation when crossing page 12218 // boundaries. 12219 12220 if (!VT.isSimple()) 12221 return false; 12222 12223 if (VT.getSimpleVT().isVector()) { 12224 if (Subtarget.hasVSX()) { 12225 if (VT != MVT::v2f64 && VT != MVT::v2i64 && 12226 VT != MVT::v4f32 && VT != MVT::v4i32) 12227 return false; 12228 } else { 12229 return false; 12230 } 12231 } 12232 12233 if (VT == MVT::ppcf128) 12234 return false; 12235 12236 if (Fast) 12237 *Fast = true; 12238 12239 return true; 12240 } 12241 12242 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 12243 VT = VT.getScalarType(); 12244 12245 if (!VT.isSimple()) 12246 return false; 12247 12248 switch (VT.getSimpleVT().SimpleTy) { 12249 case MVT::f32: 12250 case MVT::f64: 12251 return true; 12252 default: 12253 break; 12254 } 12255 12256 return false; 12257 } 12258 12259 const MCPhysReg * 12260 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { 12261 // LR is a callee-save register, but we must treat it as clobbered by any call 12262 // site. Hence we include LR in the scratch registers, which are in turn added 12263 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies 12264 // to CTR, which is used by any indirect call. 12265 static const MCPhysReg ScratchRegs[] = { 12266 PPC::X12, PPC::LR8, PPC::CTR8, 0 12267 }; 12268 12269 return ScratchRegs; 12270 } 12271 12272 unsigned PPCTargetLowering::getExceptionPointerRegister( 12273 const Constant *PersonalityFn) const { 12274 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3; 12275 } 12276 12277 unsigned PPCTargetLowering::getExceptionSelectorRegister( 12278 const Constant *PersonalityFn) const { 12279 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4; 12280 } 12281 12282 bool 12283 PPCTargetLowering::shouldExpandBuildVectorWithShuffles( 12284 EVT VT , unsigned DefinedValues) const { 12285 if (VT == MVT::v2i64) 12286 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves 12287 12288 if (Subtarget.hasVSX() || Subtarget.hasQPX()) 12289 return true; 12290 12291 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); 12292 } 12293 12294 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 12295 if (DisableILPPref || Subtarget.enableMachineScheduler()) 12296 return TargetLowering::getSchedulingPreference(N); 12297 12298 return Sched::ILP; 12299 } 12300 12301 // Create a fast isel object. 12302 FastISel * 12303 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, 12304 const TargetLibraryInfo *LibInfo) const { 12305 return PPC::createFastISel(FuncInfo, LibInfo); 12306 } 12307 12308 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { 12309 if (Subtarget.isDarwinABI()) return; 12310 if (!Subtarget.isPPC64()) return; 12311 12312 // Update IsSplitCSR in PPCFunctionInfo 12313 PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>(); 12314 PFI->setIsSplitCSR(true); 12315 } 12316 12317 void PPCTargetLowering::insertCopiesSplitCSR( 12318 MachineBasicBlock *Entry, 12319 const SmallVectorImpl<MachineBasicBlock *> &Exits) const { 12320 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 12321 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); 12322 if (!IStart) 12323 return; 12324 12325 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 12326 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); 12327 MachineBasicBlock::iterator MBBI = Entry->begin(); 12328 for (const MCPhysReg *I = IStart; *I; ++I) { 12329 const TargetRegisterClass *RC = nullptr; 12330 if (PPC::G8RCRegClass.contains(*I)) 12331 RC = &PPC::G8RCRegClass; 12332 else if (PPC::F8RCRegClass.contains(*I)) 12333 RC = &PPC::F8RCRegClass; 12334 else if (PPC::CRRCRegClass.contains(*I)) 12335 RC = &PPC::CRRCRegClass; 12336 else if (PPC::VRRCRegClass.contains(*I)) 12337 RC = &PPC::VRRCRegClass; 12338 else 12339 llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 12340 12341 unsigned NewVR = MRI->createVirtualRegister(RC); 12342 // Create copy from CSR to a virtual register. 12343 // FIXME: this currently does not emit CFI pseudo-instructions, it works 12344 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be 12345 // nounwind. If we want to generalize this later, we may need to emit 12346 // CFI pseudo-instructions. 12347 assert(Entry->getParent()->getFunction()->hasFnAttribute( 12348 Attribute::NoUnwind) && 12349 "Function should be nounwind in insertCopiesSplitCSR!"); 12350 Entry->addLiveIn(*I); 12351 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) 12352 .addReg(*I); 12353 12354 // Insert the copy-back instructions right before the terminator 12355 for (auto *Exit : Exits) 12356 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), 12357 TII->get(TargetOpcode::COPY), *I) 12358 .addReg(NewVR); 12359 } 12360 } 12361 12362 // Override to enable LOAD_STACK_GUARD lowering on Linux. 12363 bool PPCTargetLowering::useLoadStackGuardNode() const { 12364 if (!Subtarget.isTargetLinux()) 12365 return TargetLowering::useLoadStackGuardNode(); 12366 return true; 12367 } 12368 12369 // Override to disable global variable loading on Linux. 12370 void PPCTargetLowering::insertSSPDeclarations(Module &M) const { 12371 if (!Subtarget.isTargetLinux()) 12372 return TargetLowering::insertSSPDeclarations(M); 12373 } 12374 12375 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 12376 12377 if (!VT.isSimple() || !Subtarget.hasVSX()) 12378 return false; 12379 12380 switch(VT.getSimpleVT().SimpleTy) { 12381 default: 12382 // For FP types that are currently not supported by PPC backend, return 12383 // false. Examples: f16, f80. 12384 return false; 12385 case MVT::f32: 12386 case MVT::f64: 12387 case MVT::ppcf128: 12388 return Imm.isPosZero(); 12389 } 12390 } 12391